From 116a0df8987406e373dcfa02a7a5762149529b68 Mon Sep 17 00:00:00 2001
From: Jesse Keating <jkeating@redhat.com>
Date: Tue, 15 Jul 2008 18:10:19 -0400
Subject: [PATCH] Don't read the entire file at once.  This will run machines
 out of memory.

Also, always use binary mode on the files, or else our hashes will be odd.
---
 src/pypungi/util.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/pypungi/util.py b/src/pypungi/util.py
index 4492d576..d566326c 100644
--- a/src/pypungi/util.py
+++ b/src/pypungi/util.py
@@ -88,28 +88,31 @@ def _ensuredir(target, logger, force=False, clean=False):
             sys.stderr(message)
         sys.exit(1)
 
-def _doCheckSum(path, hash, logger, binary=True):
+def _doCheckSum(path, hash, logger):
     """Generate a checksum hash from a provided path.
-    Set binary to false if the file is not binary.
     Return the hash"""
 
+    # Try to figure out what hash we want to do
     try:
-        func = getattr(hashlib, hash)
-    except AttributeError:
+        sum = hashlib.new(hash)
+    except ValueError:
         logger.error("Invalid hash type: %s" % hash)
         return False
 
-    if binary:
-        flags = 'rb'
-    else:
-        flags = 'r'
-
+    # Try to open the file, using binary flag.
     try:
         myfile = open(path, 'rb')
     except IOError, e:
         logger.error("Could not open file %s: %s" % (path, e))
         return False
 
-    sum = func(myfile.read()).hexdigest()
+    # Loop through the file reading chunks at a time as to not
+    # put the entire file in memory.  That would suck for DVDs
+    while True:
+        chunk = myfile.read(8192) # magic number!  Taking suggestions for better blocksize
+        if not chunk:
+            break # we're done with the file
+        sum.update(chunk)
+    myfile.close()
 
-    return sum
+    return sum.hexdigest()