Don't read the entire file at once. This will run machines out of memory.

Also, always use binary mode on the files, or else our hashes will be odd.
2008-07-15 18:10:19 -04:00 · 2008-07-15 18:10:19 -04:00 · 116a0df898
commit 116a0df898
parent 74e014707a
1 changed files with 14 additions and 11 deletions
--- a/src/pypungi/util.py
+++ b/src/pypungi/util.py
@ -88,28 +88,31 @@ def _ensuredir(target, logger, force=False, clean=False):
            sys.stderr(message)
        sys.exit(1)

-def _doCheckSum(path, hash, logger, binary=True):
+def _doCheckSum(path, hash, logger):
    """Generate a checksum hash from a provided path.
-    Set binary to false if the file is not binary.
    Return the hash"""

+    # Try to figure out what hash we want to do
    try:
-        func = getattr(hashlib, hash)
-    except AttributeError:
+        sum = hashlib.new(hash)
+    except ValueError:
        logger.error("Invalid hash type: %s" % hash)
        return False

-    if binary:
-        flags = 'rb'
-    else:
-        flags = 'r'
-
+    # Try to open the file, using binary flag.
    try:
        myfile = open(path, 'rb')
    except IOError, e:
        logger.error("Could not open file %s: %s" % (path, e))
        return False

-    sum = func(myfile.read()).hexdigest()
+    # Loop through the file reading chunks at a time as to not
+    # put the entire file in memory.  That would suck for DVDs
+    while True:
+        chunk = myfile.read(8192) # magic number!  Taking suggestions for better blocksize
+        if not chunk:
+            break # we're done with the file
+        sum.update(chunk)
+    myfile.close()

-    return sum
+    return sum.hexdigest()