Colin Walters 2023-10-20 12:49:25 -04:00
parent 1b6d5bbf07
commit 9776100e52
4 changed files with 6 additions and 193 deletions

1
.gitignore vendored
View File

@ -102,3 +102,4 @@
/libostree-2023.4.tar.xz
/libostree-2023.5.tar.xz
/libostree-2023.6.tar.xz
/libostree-2023.7.tar.xz

View File

@ -1,189 +0,0 @@
From 3b2fd6e9ff0a3a91a2b72f524492e4f198069dec Mon Sep 17 00:00:00 2001
From: "Owen W. Taylor" <otaylor@fishsoup.net>
Date: Fri, 29 Sep 2023 12:09:04 -0400
Subject: [PATCH] When exporting, use hardlinks for duplicated files
For ostree_repo_export_tree_to_archive(), and 'ostree export', when the
exported tree contains multiple files with the same checksum, write an
archive with hard links.
Without this, importing a tree, then exporting it again breaks
hardlinks.
As an example of savings: this reduces the (compressed) size of the
Fedora Flatpak Runtime image from 1345MiB to 712MiB.
Resolves: #2925
---
src/libostree/ostree-repo-libarchive.c | 50 ++++++++++++++++++++------
tests/archive-test.sh | 4 +--
tests/libtest.sh | 7 ++++
tests/test-composefs.sh | 2 +-
tests/test-export.sh | 10 +++++-
5 files changed, 58 insertions(+), 15 deletions(-)
diff --git a/src/libostree/ostree-repo-libarchive.c b/src/libostree/ostree-repo-libarchive.c
index d0f46883..65a30933 100644
--- a/src/libostree/ostree-repo-libarchive.c
+++ b/src/libostree/ostree-repo-libarchive.c
@@ -943,15 +943,10 @@ ostree_repo_write_archive_to_mtree_from_fd (OstreeRepo *self, int fd, OstreeMuta
#ifdef HAVE_LIBARCHIVE
-static gboolean
-file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
- GFileInfo *file_info, struct archive_entry *entry, GError **error)
+static char *
+file_to_pathstr (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path)
{
- gboolean ret = FALSE;
g_autofree char *pathstr = g_file_get_relative_path (root, path);
- g_autoptr (GVariant) xattrs = NULL;
- time_t ts = (time_t)opts->timestamp_secs;
-
if (opts->path_prefix && opts->path_prefix[0])
{
g_autofree char *old_pathstr = pathstr;
@@ -964,6 +959,18 @@ file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts,
pathstr = g_strdup (".");
}
+ return g_steal_pointer (&pathstr);
+}
+
+static gboolean
+file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
+ GFileInfo *file_info, struct archive_entry *entry, GError **error)
+{
+ gboolean ret = FALSE;
+ g_autofree char *pathstr = file_to_pathstr (root, opts, path);
+ g_autoptr (GVariant) xattrs = NULL;
+ time_t ts = (time_t)opts->timestamp_secs;
+
archive_entry_update_pathname_utf8 (entry, pathstr);
archive_entry_set_ctime (entry, ts, OSTREE_TIMESTAMP);
archive_entry_set_mtime (entry, ts, OSTREE_TIMESTAMP);
@@ -1021,7 +1028,8 @@ out:
static gboolean
write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchiveOptions *opts,
GFile *root, GFile *dir, struct archive *a,
- GCancellable *cancellable, GError **error)
+ GHashTable *seen_checksums, GCancellable *cancellable,
+ GError **error)
{
gboolean ret = FALSE;
g_autoptr (GFileInfo) dir_info = NULL;
@@ -1057,8 +1065,8 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
/* First, handle directories recursively */
if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY)
{
- if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, cancellable,
- error))
+ if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, seen_checksums,
+ cancellable, error))
goto out;
/* Go to the next entry */
@@ -1086,9 +1094,27 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
g_autoptr (GInputStream) file_in = NULL;
g_autoptr (GFileInfo) regular_file_info = NULL;
const char *checksum;
+ GFile *old_path;
checksum = ostree_repo_file_get_checksum ((OstreeRepoFile *)path);
+ old_path = g_hash_table_lookup (seen_checksums, checksum);
+ if (old_path)
+ {
+ g_autofree char *old_pathstr = file_to_pathstr (root, opts, old_path);
+
+ archive_entry_set_hardlink (entry, old_pathstr);
+ if (!write_header_free_entry (a, &entry, error))
+ goto out;
+
+ break;
+ }
+ else
+ {
+ /* The checksum is owned by path (an OstreeRepoFile) */
+ g_hash_table_insert (seen_checksums, (char *)checksum, g_object_ref (path));
+ }
+
if (!ostree_repo_load_file (self, checksum, &file_in, &regular_file_info, NULL,
cancellable, error))
goto out;
@@ -1168,9 +1194,11 @@ ostree_repo_export_tree_to_archive (OstreeRepo *self, OstreeRepoExportArchiveOpt
#ifdef HAVE_LIBARCHIVE
gboolean ret = FALSE;
struct archive *a = archive;
+ g_autoptr (GHashTable) seen_checksums
+ = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_object_unref);
if (!write_directory_to_libarchive_recurse (self, opts, (GFile *)root, (GFile *)root, a,
- cancellable, error))
+ seen_checksums, cancellable, error))
goto out;
ret = TRUE;
diff --git a/tests/archive-test.sh b/tests/archive-test.sh
index 6b45790e..f6bfd5fb 100644
--- a/tests/archive-test.sh
+++ b/tests/archive-test.sh
@@ -72,9 +72,9 @@ date > test-overlays/overlaid-file
$OSTREE commit ${COMMIT_ARGS} -b test-base --base test2 --owner-uid 42 --owner-gid 42 test-overlays/
$OSTREE ls -R test-base > ls.txt
if can_create_whiteout_devices; then
- assert_streq "$(wc -l < ls.txt)" 17
+ assert_streq "$(wc -l < ls.txt)" 22
else
- assert_streq "$(wc -l < ls.txt)" 14
+ assert_streq "$(wc -l < ls.txt)" 19
fi
assert_streq "$(grep '42.*42' ls.txt | wc -l)" 2
diff --git a/tests/libtest.sh b/tests/libtest.sh
index fa937827..d1c99eab 100755
--- a/tests/libtest.sh
+++ b/tests/libtest.sh
@@ -249,6 +249,13 @@ setup_test_repository () {
mkdir baz/another/
echo x > baz/another/y
+ mkdir baz/sub1
+ echo SAME_CONTENT > baz/sub1/duplicate_a
+ echo SAME_CONTENT > baz/sub1/duplicate_b
+
+ mkdir baz/sub2
+ echo SAME_CONTENT > baz/sub2/duplicate_c
+
# if we are running inside a container we cannot test
# the overlayfs whiteout marker passthrough
if ! test -n "${OSTREE_NO_WHITEOUTS:-}"; then
diff --git a/tests/test-export.sh b/tests/test-export.sh
index e490ae40..6b8de94c 100755
--- a/tests/test-export.sh
+++ b/tests/test-export.sh
@@ -28,7 +28,7 @@ fi
setup_test_repository "archive"
-echo '1..5'
+echo '1..6'
$OSTREE checkout test2 test2-co
$OSTREE commit --no-xattrs -b test2-noxattrs -s "test2 without xattrs" --tree=dir=test2-co
@@ -81,3 +81,11 @@ assert_file_empty diff.txt
rm test2.tar diff.txt t -rf
echo 'ok export import'
+
+cd ${test_tmpdir}
+${OSTREE} 'export' test2 -o test2.tar
+tar tvf test2.tar > test2.manifest
+assert_file_has_content test2.manifest 'baz/sub1/duplicate_b link to baz/sub1/duplicate_a'
+assert_file_has_content test2.manifest 'baz/sub2/duplicate_c link to baz/sub1/duplicate_a'
+
+echo 'ok export hard links'
--
2.41.0

View File

@ -7,14 +7,12 @@
Summary: Tool for managing bootable, immutable filesystem trees
Name: ostree
Version: 2023.6
Version: 2023.7
Release: 2%{?dist}
Source0: https://github.com/ostreedev/%{name}/releases/download/v%{version}/libostree-%{version}.tar.xz
License: LGPL-2.0-or-later
URL: https://ostree.readthedocs.io/en/latest/
Patch0: 0001-When-exporting-use-hardlinks-for-duplicated-files.patch
BuildRequires: make
BuildRequires: git
# We always run autogen.sh
@ -172,6 +170,9 @@ find %{buildroot} -name '*.la' -delete
%endif
%changelog
* Fri Oct 20 2023 Colin Walters <walters@verbum.org> - 2023.7-2
- https://github.com/ostreedev/ostree/releases/tag/v2023.7
* Fri Oct 06 2023 Colin Walters <walters@verbum.org> - 2023.6-2
- Cherry pick
https://github.com/ostreedev/ostree/pull/3060/commits/3b2fd6e9ff0a3a91a2b72f524492e4f198069dec

View File

@ -1 +1 @@
SHA512 (libostree-2023.6.tar.xz) = ae803d41ed38ec713959cfa88dab3db94f63c14cc4ad7fc1b0941e3fcfe5128436e25a354c8a291f93ea0c59a3f440b57aecefa109996427463f9165331f78f1
SHA512 (libostree-2023.7.tar.xz) = 12398c1c30df11e899204e0b798ee650c6099d983b3e20e4f6da4a3a0e4c0b1fcee7c8e123919ccca8e45324102f2378f63a6597c1fd4c3bae14fb89241879b1