From 116a0df8987406e373dcfa02a7a5762149529b68 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Tue, 15 Jul 2008 18:10:19 -0400 Subject: [PATCH] Don't read the entire file at once. This will run machines out of memory. Also, always use binary mode on the files, or else our hashes will be odd. --- src/pypungi/util.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/pypungi/util.py b/src/pypungi/util.py index 4492d576..d566326c 100644 --- a/src/pypungi/util.py +++ b/src/pypungi/util.py @@ -88,28 +88,31 @@ def _ensuredir(target, logger, force=False, clean=False): sys.stderr(message) sys.exit(1) -def _doCheckSum(path, hash, logger, binary=True): +def _doCheckSum(path, hash, logger): """Generate a checksum hash from a provided path. - Set binary to false if the file is not binary. Return the hash""" + # Try to figure out what hash we want to do try: - func = getattr(hashlib, hash) - except AttributeError: + sum = hashlib.new(hash) + except ValueError: logger.error("Invalid hash type: %s" % hash) return False - if binary: - flags = 'rb' - else: - flags = 'r' - + # Try to open the file, using binary flag. try: myfile = open(path, 'rb') except IOError, e: logger.error("Could not open file %s: %s" % (path, e)) return False - sum = func(myfile.read()).hexdigest() + # Loop through the file reading chunks at a time as to not + # put the entire file in memory. That would suck for DVDs + while True: + chunk = myfile.read(8192) # magic number! Taking suggestions for better blocksize + if not chunk: + break # we're done with the file + sum.update(chunk) + myfile.close() - return sum + return sum.hexdigest()