kiss: avoid looping many times during tar extraction.

Instead, sort the tarballs manifest and filter out duplicate
top-level directories. For most tarballs this now results in
one loop iteration.
This commit is contained in:
Dylan Araps 2021-07-17 18:55:11 +03:00
parent 0cdcdedfd5
commit 72db2d1fc1
No known key found for this signature in database
GPG Key ID: 13295DAC2CF13B5C
1 changed files with 6 additions and 5 deletions

11
kiss
View File

@ -398,14 +398,15 @@ pkg_source_tar_hack() {
tar xf "$_tmp_file_pre" ||
die "$repo_name" "Failed to extract $1"
tar tf "$_tmp_file_pre" > "$_tmp_file" ||
# The sort command filters out all duplicate top-level
# directories from the tarball's manifest. This is an optimization
# as we avoid looping (4000 times for Python(!)).
tar tf "$_tmp_file_pre" | sort -ut / -k1,1 > "$_tmp_file" ||
die "$repo_name" "Failed to extract manifest"
# Iterate over all directories in the first level of the
# tarball's manifest. Each directory is moved up a level.
while IFS=/ read -r dir _; do contains "$_seen" "${dir#.}" || {
_seen="$_seen $dir"
while IFS=/ read -r dir _; do case ${dir#.} in *?*)
# Move the parent directory to prevent naming conflicts
# with the to-be-moved children.
mv -f "$dir" "$KISS_PID-$dir"
@ -426,7 +427,7 @@ pkg_source_tar_hack() {
# transferred out of it. This can't be a simple 'rmdir'
# as we may leave files in here if any were copied.
rm -rf "$KISS_PID-$dir"
} done < "$_tmp_file"
esac done < "$_tmp_file"
# Remove the tarball now that we are done with it.
rm -f "$_tmp_file_pre"