Print excessive files in the initrd diff

This commit is contained in:
Martin Gracik 2010-11-26 15:46:57 +01:00
parent d1653ce89e
commit 6bd104c627

View File

@ -2,6 +2,8 @@ import sys
import os import os
import magic import magic
import difflib import difflib
import yum
import operator
def main(args): def main(args):
@ -9,9 +11,7 @@ def main(args):
sourcedir, targetdir = args[1], args[2] sourcedir, targetdir = args[1], args[2]
except IndexError: except IndexError:
print("invalid argument count") print("invalid argument count")
print("usage: python {0} sourcetree targettree > output.diff".format( print("usage: python {0} sourcetree targettree".format(args[0]))
args[0]))
sys.exit(2) sys.exit(2)
if sourcedir.endswith("/"): if sourcedir.endswith("/"):
@ -19,57 +19,93 @@ def main(args):
if targetdir.endswith("/"): if targetdir.endswith("/"):
targetdir = targetdir[:-1] targetdir = targetdir[:-1]
sourcetree = {} # parse sourcedir and targetdir
for root, dnames, fnames in os.walk(sourcedir): sourcetree, targettree = {}, {}
for fname in fnames: for tree, dir in [[sourcetree, sourcedir], [targettree, targetdir]]:
fpath = os.path.join(root, fname) for root, dnames, fnames in os.walk(dir):
rpath = fpath.replace(sourcedir, "", 1) for fname in fnames:
sourcetree[rpath] = fpath fpath = os.path.join(root, fname)
rpath = fpath.replace(dir, "", 1)
tree[rpath] = fpath
# set up magic
m = magic.open(magic.MAGIC_NONE) m = magic.open(magic.MAGIC_NONE)
m.load() m.load()
for root, dnames, fnames in os.walk(targetdir): # get files missing in source
for fname in fnames: sys.stderr.write("getting files missing in source\n")
fpath = os.path.join(root, fname) for rpath, fpath in targettree.items():
rpath = fpath.replace(targetdir, "", 1) targetfile = fpath
try:
sourcefile = sourcetree[rpath]
except KeyError:
sys.stdout.write('Missing: %s\n' % rpath)
continue
sys.stderr.write('processing "%s"\n' % rpath) # skip broken links
if os.path.islink(targetfile) and not os.path.exists(targetfile):
continue
targetfile = fpath # check stat
try: #sourcemode = os.stat(sourcefile).st_mode
sourcefile = sourcetree[rpath] #targetmode = os.stat(targetfile).st_mode
except KeyError: #if sourcemode != targetmode:
sys.stdout.write('Missing: %s\n' % rpath) # sys.stdout.write('Stat differ: %s\n' % rpath)
continue
# skip broken links # diff only text files
if os.path.islink(targetfile) and not os.path.exists(targetfile): ftype = m.file(fpath)
continue if ftype not in ["ASCII text"]:
continue
# check stat with open(targetfile, "r") as fobj:
#sourcemode = os.stat(sourcefile).st_mode target = fobj.readlines()
#targetmode = os.stat(targetfile).st_mode with open(sourcefile) as fobj:
#if sourcemode != targetmode: source = fobj.readlines()
# sys.stdout.write('Stat differ: %s\n' % rpath)
ftype = m.file(fpath) # do the file diff
for line in difflib.unified_diff(source, target,
fromfile=sourcefile,
tofile=targetfile):
# diff only text files sys.stdout.write(line)
if ftype not in ["ASCII text"]:
continue
with open(targetfile, "r") as fobj: # set up yum
target = fobj.readlines() yb = yum.YumBase()
with open(sourcefile) as fobj: yb.doSackSetup()
source = fobj.readlines()
# do a file diff # get excessive files in source
for line in difflib.unified_diff(source, target, sys.stderr.write("getting excessive files in source\n")
fromfile=sourcefile, sizedict, pkgdict = {}, {}
tofile=targetfile): for rpath, fpath in sourcetree.items():
# if file in target, skip it
if rpath in targettree:
continue
sys.stdout.write(line) # get file size
try:
sizeinbytes = os.path.getsize(fpath)
except OSError:
sizeinbytes = 0
# set link size to 0
islink = os.path.islink(fpath)
if islink:
sizeinbytes = 0
pkglist = yb.whatProvides(rpath, None, None)
pkglist = set(map(lambda pkgobj: pkgobj.name, pkglist))
for pkg in pkglist:
sizedict[pkg] = sizedict.get(pkg, 0) + sizeinbytes
pkgdict[pkg] = pkgdict.get(pkg, []) + \
[(rpath, sizeinbytes, islink)]
# sort by size
for pkg, size in sorted(sizedict.items(), key=operator.itemgetter(1),
reverse=True):
for item in sorted(pkgdict[pkg]):
sys.stdout.write("%s\t%s\n" % (pkg, item))
if __name__ == "__main__": if __name__ == "__main__":