Skip to content

Commit

Permalink
Fix git packager for git repo with submodules (#109)
Browse files Browse the repository at this point in the history
  • Loading branch information
hemildesai authored Nov 16, 2024
1 parent 070fe56 commit 27bccfa
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 20 deletions.
45 changes: 25 additions & 20 deletions src/nemo_run/core/packaging/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class GitArchivePackager(Packager):
#: Can be a branch name or a commit ref like HEAD.
ref: str = "HEAD"

#: Include submodules in the archive.
include_submodules: bool = True

#: Include extra files in the archive which matches include_pattern
#: This str will be included in the command as: find {include_pattern} -type f to get the list of extra files to include in the archive
include_pattern: str = ""
Expand Down Expand Up @@ -109,40 +112,42 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
), "Your repo has untracked files. Please track your files via git or set check_untracked_files to False to proceed with packaging."

ctx = Context()
# we first add git files into an uncompressed archive
# then we add submodule files into that archive
# then we add an extra files from pattern to that archive
# finally we compress it (cannot compress right away, since adding files is not possible)
git_archive_cmd = (
f"git archive --format=tar --output={output_file}.tmp {self.ref}:{git_sub_path}"
)
git_submodule_cmd = f"""git submodule foreach --recursive \
'git archive --format=tar --prefix=$sm_path/ --output=$sha1.tmp HEAD && tar -Af {output_file}.tmp $sha1.tmp && rm $sha1.tmp'"""
with ctx.cd(git_base_path):
ctx.run(git_archive_cmd)
if self.include_submodules:
ctx.run(git_submodule_cmd)

if self.include_pattern:
include_pattern_relative_path = self.include_pattern_relative_path or shlex.quote(
str(git_base_path)
)
relative_include_pattern = os.path.relpath(
self.include_pattern, include_pattern_relative_path
)
# we first add git files into an uncompressed archive
# then we add an extra files from pattern to that archive
# finally we compress it (cannot compress right away, since adding files is not possible)
git_archive_cmd = (
f"git archive --format=tar --output={output_file}.tmp {self.ref}:{git_sub_path}"
)
include_pattern_cmd = f"find {relative_include_pattern} -type f | tar -cf {os.path.join(git_base_path, 'additional.tmp')} -T -"
tar_concatenate_cmd = f"tar -Af {output_file}.tmp additional.tmp"
gzip_cmd = f"gzip -c {output_file}.tmp > {output_file}"
rm_cmd = f"rm {output_file}.tmp additional.tmp"

with ctx.cd(git_base_path):
ctx.run(git_archive_cmd)
tar_concatenate_cmd = f"tar -Af {output_file}.tmp additional.tmp && rm additional.tmp"

with ctx.cd(include_pattern_relative_path):
ctx.run(include_pattern_cmd)

with ctx.cd(git_base_path):
ctx.run(tar_concatenate_cmd)
ctx.run(gzip_cmd)
ctx.run(rm_cmd)
else:
with ctx.cd(git_base_path):
git_archive_cmd = (
f"git archive --format=tar.gz --output={output_file} {self.ref}:{git_sub_path}"
)
ctx.run(git_archive_cmd)

gzip_cmd = f"gzip -c {output_file}.tmp > {output_file}"
rm_cmd = f"rm {output_file}.tmp"

with ctx.cd(git_base_path):
ctx.run(gzip_cmd)
ctx.run(rm_cmd)

return output_file

Expand Down
57 changes: 57 additions & 0 deletions test/core/packaging/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,60 @@ def test_untracked_files_raises_exception(temp_repo):
f.write("Untracked file")
with pytest.raises(AssertionError, match="Your repo has untracked files"):
packager.package(temp_repo, str(temp_repo), "test")


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_with_include_submodules(packager, temp_repo):
temp_repo = Path(temp_repo)
# Create a submodule
submodule_path = temp_repo / "submodule"
submodule_path.mkdir()
os.chdir(str(submodule_path))
subprocess.check_call(["git", "init", "--initial-branch=main"])
open("submodule_file.txt", "w").write("Submodule file")
subprocess.check_call(["git", "add", "."])
subprocess.check_call(["git", "commit", "-m", "Initial submodule commit"])
os.chdir(str(temp_repo))
subprocess.check_call(["git", "submodule", "add", str(submodule_path)])
subprocess.check_call(["git", "commit", "-m", "Add submodule"])

packager = GitArchivePackager(ref="HEAD", include_submodules=True)
with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
subprocess.check_call(shlex.split(f"mkdir -p {os.path.join(job_dir, 'extracted_output')}"))
subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {os.path.join(job_dir, 'extracted_output')}"),
)
cmp = filecmp.dircmp(
os.path.join(temp_repo, "submodule"),
os.path.join(job_dir, "extracted_output", "submodule"),
)
assert cmp.left_list == cmp.right_list
assert not cmp.diff_files


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_without_include_submodules(packager, temp_repo):
temp_repo = Path(temp_repo)
# Create a submodule
submodule_path = temp_repo / "submodule"
submodule_path.mkdir()
os.chdir(str(submodule_path))
subprocess.check_call(["git", "init", "--initial-branch=main"])
open("submodule_file.txt", "w").write("Submodule file")
subprocess.check_call(["git", "add", "."])
subprocess.check_call(["git", "commit", "-m", "Initial submodule commit"])
os.chdir(str(temp_repo))
subprocess.check_call(["git", "submodule", "add", str(submodule_path)])
subprocess.check_call(["git", "commit", "-m", "Add submodule"])

packager = GitArchivePackager(ref="HEAD", include_submodules=False)
with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
subprocess.check_call(shlex.split(f"mkdir -p {os.path.join(job_dir, 'extracted_output')}"))
subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {os.path.join(job_dir, 'extracted_output')}"),
)
assert len(os.listdir(os.path.join(job_dir, "extracted_output", "submodule"))) == 0

0 comments on commit 27bccfa

Please sign in to comment.