From e6d9f31ef4586b2c43118b4958a622d1637043b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubom=C3=ADr=20Sedl=C3=A1=C5=99?= Date: Tue, 13 Dec 2022 10:55:41 +0100 Subject: [PATCH] Add script for cleaning up the cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pungi would by default only ever add files to the cache. That would eventually result in essentially a mirror of the Koji volume. This patch adds a helper cleanup script. When called, it goes through files in the cache and deletes anything that is not hardlinked from elsewhere and with mtime not updated recently. Cleaning up files that hardlinked from some compose would not save any space anyway. The mtime check should account for cases like subpackage being downloaded but not included in any compose. This would avoid it from being downloaded over and over again. When a compose fails or is aborted, there can be a stale lock file left behind in the cache. This script cleans that up too. Signed-off-by: Lubomír Sedlář --- pungi/scripts/cache_cleanup.py | 63 ++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 64 insertions(+) create mode 100644 pungi/scripts/cache_cleanup.py diff --git a/pungi/scripts/cache_cleanup.py b/pungi/scripts/cache_cleanup.py new file mode 100644 index 00000000..1cc1e615 --- /dev/null +++ b/pungi/scripts/cache_cleanup.py @@ -0,0 +1,63 @@ +import argparse +import os +import re +import time + +from pungi.util import format_size + + +LOCK_RE = re.compile(r".*\.lock(\|[A-Za-z0-9]+)*$") + + +def should_be_cleaned_up(path, st, threshold): + if st.st_nlink == 1 and st.st_mtime < threshold: + # No other instances, older than limit + return True + + if LOCK_RE.match(path) and st.st_mtime < threshold: + # Suspiciously old lock + return True + + return False + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("CACHE_DIR") + parser.add_argument("-n", "--dry-run", action="store_true") + parser.add_argument("--verbose", action="store_true") + parser.add_argument( + "--max-age", + help="how old files should be considered for deletion", + default=7, + type=int, + ) + + args = parser.parse_args() + + topdir = os.path.abspath(args.CACHE_DIR) + max_age = args.max_age * 24 * 3600 + + cleaned_up = 0 + + threshold = time.time() - max_age + for dirpath, dirnames, filenames in os.walk(topdir): + for f in filenames: + filepath = os.path.join(dirpath, f) + st = os.stat(filepath) + if should_be_cleaned_up(filepath, st, threshold): + if args.verbose: + print("RM %s" % filepath) + cleaned_up += st.st_size + if not args.dry_run: + os.remove(filepath) + if not dirnames and not filenames: + if args.verbose: + print("RMDIR %s" % dirpath) + if not args.dry_run: + os.rmdir(dirpath) + + if args.dry_run: + print("Would reclaim %s bytes." % format_size(cleaned_up)) + else: + print("Reclaimed %s bytes." % format_size(cleaned_up)) diff --git a/setup.py b/setup.py index 297ff596..97e35b84 100755 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( "pungi-gather = pungi.scripts.pungi_gather:cli_main", "pungi-config-dump = pungi.scripts.config_dump:cli_main", "pungi-config-validate = pungi.scripts.config_validate:cli_main", + "pungi-cache-cleanup = pungi.scripts.cache_cleanup:main", ] }, scripts=["contrib/yum-dnf-compare/pungi-compare-depsolving"],