Add script for cleaning up the cache

Pungi would by default only ever add files to the cache. That would
eventually result in essentially a mirror of the Koji volume.

This patch adds a helper cleanup script. When called, it goes through
files in the cache and deletes anything that is not hardlinked from
elsewhere and with mtime not updated recently.

Cleaning up files that hardlinked from some compose would not save any
space anyway. The mtime check should account for cases like subpackage
being downloaded but not included in any compose. This would avoid it
from being downloaded over and over again.

When a compose fails or is aborted, there can be a stale lock file left
behind in the cache. This script cleans that up too.

Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
This commit is contained in:
Lubomír Sedlář 2022-12-13 10:55:41 +01:00 committed by lsedlar
parent bf3e9bc53a
commit e6d9f31ef4
2 changed files with 64 additions and 0 deletions

View File

@ -0,0 +1,63 @@
import argparse
import os
import re
import time
from pungi.util import format_size
LOCK_RE = re.compile(r".*\.lock(\|[A-Za-z0-9]+)*$")
def should_be_cleaned_up(path, st, threshold):
if st.st_nlink == 1 and st.st_mtime < threshold:
# No other instances, older than limit
return True
if LOCK_RE.match(path) and st.st_mtime < threshold:
# Suspiciously old lock
return True
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument("CACHE_DIR")
parser.add_argument("-n", "--dry-run", action="store_true")
parser.add_argument("--verbose", action="store_true")
parser.add_argument(
"--max-age",
help="how old files should be considered for deletion",
default=7,
type=int,
)
args = parser.parse_args()
topdir = os.path.abspath(args.CACHE_DIR)
max_age = args.max_age * 24 * 3600
cleaned_up = 0
threshold = time.time() - max_age
for dirpath, dirnames, filenames in os.walk(topdir):
for f in filenames:
filepath = os.path.join(dirpath, f)
st = os.stat(filepath)
if should_be_cleaned_up(filepath, st, threshold):
if args.verbose:
print("RM %s" % filepath)
cleaned_up += st.st_size
if not args.dry_run:
os.remove(filepath)
if not dirnames and not filenames:
if args.verbose:
print("RMDIR %s" % dirpath)
if not args.dry_run:
os.rmdir(dirpath)
if args.dry_run:
print("Would reclaim %s bytes." % format_size(cleaned_up))
else:
print("Reclaimed %s bytes." % format_size(cleaned_up))

View File

@ -41,6 +41,7 @@ setup(
"pungi-gather = pungi.scripts.pungi_gather:cli_main", "pungi-gather = pungi.scripts.pungi_gather:cli_main",
"pungi-config-dump = pungi.scripts.config_dump:cli_main", "pungi-config-dump = pungi.scripts.config_dump:cli_main",
"pungi-config-validate = pungi.scripts.config_validate:cli_main", "pungi-config-validate = pungi.scripts.config_validate:cli_main",
"pungi-cache-cleanup = pungi.scripts.cache_cleanup:main",
] ]
}, },
scripts=["contrib/yum-dnf-compare/pungi-compare-depsolving"], scripts=["contrib/yum-dnf-compare/pungi-compare-depsolving"],