pungi/pungi/linker.py

316 lines
11 KiB
Python

# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import errno
import os
import shutil
import kobo.log
from kobo.shortcuts import relative_path
from kobo.threads import WorkerThread, ThreadPool
from pungi.util import makedirs
class LinkerPool(ThreadPool):
def __init__(self, link_type="hardlink-or-copy", logger=None):
ThreadPool.__init__(self, logger)
self.link_type = link_type
self.linker = Linker()
class LinkerThread(WorkerThread):
def process(self, item, num):
src, dst = item
if (num % 100 == 0) or (num == self.pool.queue_total):
self.pool.log_debug("Linked %s out of %s packages" % (num, self.pool.queue_total))
self.pool.linker.link(src, dst, link_type=self.pool.link_type)
class Linker(kobo.log.LoggingBase):
def __init__(self, ignore_existing=False, always_copy=None, test=False, logger=None):
kobo.log.LoggingBase.__init__(self, logger=logger)
self.ignore_existing = ignore_existing
self.always_copy = always_copy or []
self.test = test
self._precache = {}
self._inode_map = {}
def _is_same_type(self, path1, path2):
if not os.path.islink(path1) == os.path.islink(path2):
return False
if not os.path.isdir(path1) == os.path.isdir(path2):
return False
if not os.path.isfile(path1) == os.path.isfile(path2):
return False
return True
def _is_same(self, path1, path2):
if self.ignore_existing:
return True
if path1 == path2:
return True
if os.path.islink(path2) and not os.path.exists(path2):
return True
if os.path.getsize(path1) != os.path.getsize(path2):
return False
if int(os.path.getmtime(path1)) != int(os.path.getmtime(path2)):
return False
return True
def symlink(self, src, dst, relative=True):
if src == dst:
return
old_src = src
if relative:
src = relative_path(src, dst)
msg = "Symlinking %s -> %s" % (dst, src)
if self.test:
self.log_info("TEST: %s" % msg)
return
self.log_info(msg)
try:
os.symlink(src, dst)
except OSError as ex:
if ex.errno != errno.EEXIST:
raise
if os.path.islink(dst) and self._is_same(old_src, dst):
if os.readlink(dst) != src:
raise
self.log_debug("The same file already exists, skipping symlink %s -> %s" % (dst, src))
else:
raise
def hardlink_on_dest(self, src, dst):
if src == dst:
return
if os.path.exists(src):
st = os.stat(src)
file_name = os.path.basename(src)
precache_key = (file_name, int(st.st_mtime), st.st_size)
if precache_key in self._precache:
self.log_warning("HIT %s" % str(precache_key))
cached_path = self._precache[precache_key]["path"]
self.hardlink(cached_path, dst)
return True
return False
def hardlink(self, src, dst):
if src == dst:
return
msg = "Hardlinking %s to %s" % (src, dst)
if self.test:
self.log_info("TEST: %s" % msg)
return
self.log_info(msg)
try:
os.link(src, dst)
except OSError as ex:
if ex.errno != errno.EEXIST:
raise
if self._is_same(src, dst):
if not self._is_same_type(src, dst):
self.log_error("File %s already exists but has different type than %s" % (dst, src))
raise
self.log_debug("The same file already exists, skipping hardlink %s to %s" % (src, dst))
else:
raise
def copy(self, src, dst):
if src == dst:
return True
if os.path.islink(src):
msg = "Copying symlink %s to %s" % (src, dst)
else:
msg = "Copying file %s to %s" % (src, dst)
if self.test:
self.log_info("TEST: %s" % msg)
return
self.log_info(msg)
if os.path.exists(dst):
if self._is_same(src, dst):
if not self._is_same_type(src, dst):
self.log_error("File %s already exists but has different type than %s" % (dst, src))
raise OSError(errno.EEXIST, "File exists")
self.log_debug("The same file already exists, skipping copy %s to %s" % (src, dst))
return
else:
raise OSError(errno.EEXIST, "File exists")
if os.path.islink(src):
if not os.path.islink(dst):
os.symlink(os.readlink(src), dst)
return
return
src_stat = os.stat(src)
src_key = (src_stat.st_dev, src_stat.st_ino)
if src_key in self._inode_map:
# (st_dev, st_ino) found in the mapping
self.log_debug("Harlink detected, hardlinking in destination %s to %s" % (self._inode_map[src_key], dst))
os.link(self._inode_map[src_key], dst)
return
# BEWARE: shutil.copy2 automatically *rewrites* existing files
shutil.copy2(src, dst)
self._inode_map[src_key] = dst
if not self._is_same(src, dst):
self.log_error("File %s doesn't match the copied file %s" % (src, dst))
# XXX:
raise OSError(errno.EEXIST, "File exists")
def _put_into_cache(self, path):
def get_stats(item):
return [item[i] for i in ("st_dev", "st_ino", "st_mtime", "st_size")]
filename = os.path.basename(path)
st = os.stat(path)
item = {
"st_dev": st.st_dev,
"st_ino": st.st_ino,
"st_mtime": int(st.st_mtime),
"st_size": st.st_size,
"path": path,
}
precache_key = (filename, int(st.st_mtime), st.st_size)
if precache_key in self._precache:
if get_stats(self._precache[precache_key]) != get_stats(item):
# Files have same mtime and size but device
# or/and inode is/are different.
self.log_debug("Caching failed, files are different: %s, %s"
% (path, self._precache[precache_key]["path"]))
return False
self._precache[precache_key] = item
return True
def scan(self, path):
"""Recursively scan a directory and populate the cache."""
msg = "Scanning directory: %s" % path
self.log_debug("[BEGIN] %s" % msg)
for dirpath, _, filenames in os.walk(path):
for filename in filenames:
path = os.path.join(dirpath, filename)
self._put_into_cache(path)
self.log_debug("[DONE ] %s" % msg)
def _link_file(self, src, dst, link_type):
if link_type == "hardlink":
if not self.hardlink_on_dest(src, dst):
self.hardlink(src, dst)
elif link_type == "copy":
self.copy(src, dst)
elif link_type in ("symlink", "abspath-symlink"):
if os.path.islink(src):
self.copy(src, dst)
else:
relative = link_type != "abspath-symlink"
self.symlink(src, dst, relative)
elif link_type == "hardlink-or-copy":
if not self.hardlink_on_dest(src, dst):
src_stat = os.stat(src)
dst_stat = os.stat(os.path.dirname(dst))
if src_stat.st_dev == dst_stat.st_dev:
self.hardlink(src, dst)
else:
self.copy(src, dst)
else:
raise ValueError("Unknown link_type: %s" % link_type)
def link(self, src, dst, link_type="hardlink-or-copy", scan=True):
"""Link directories recursively."""
if os.path.isfile(src) or os.path.islink(src):
self._link_file(src, dst, link_type)
return
if os.path.isfile(dst):
raise OSError(errno.EEXIST, "File exists")
if not self.test:
if not os.path.exists(dst):
makedirs(dst)
shutil.copystat(src, dst)
for i in os.listdir(src):
src_path = os.path.join(src, i)
dst_path = os.path.join(dst, i)
self.link(src_path, dst_path, link_type)
return
if scan:
self.scan(dst)
self.log_debug("Start linking")
src = os.path.abspath(src)
for dirpath, dirnames, filenames in os.walk(src):
rel_path = dirpath[len(src):].lstrip("/")
dst_path = os.path.join(dst, rel_path)
# Dir check and creation
if not os.path.isdir(dst_path):
if os.path.exists(dst_path):
# At destination there is a file with same name but
# it is not a directory.
self.log_error("Cannot create directory %s" % dst_path)
dirnames = [] # noqa
continue
os.mkdir(dst_path)
# Process all files in directory
for filename in filenames:
path = os.path.join(dirpath, filename)
st = os.stat(path)
# Check cache
# Same file already exists at a destination dir =>
# Create the new file by hardlink to the cached one.
precache_key = (filename, int(st.st_mtime), st.st_size)
full_dst_path = os.path.join(dst_path, filename)
if precache_key in self._precache:
# Cache hit
cached_path = self._precache[precache_key]["path"]
self.log_debug("Cache HIT for %s [%s]" % (path, cached_path))
if cached_path != full_dst_path:
self.hardlink(cached_path, full_dst_path)
else:
self.log_debug("Files are same, skip hardlinking")
continue
# Cache miss
# Copy the new file and put it to the cache.
try:
self.copy(path, full_dst_path)
except Exception as ex:
print(ex)
print(path, open(path, "r").read())
print(full_dst_path, open(full_dst_path, "r").read())
print(os.stat(path))
print(os.stat(full_dst_path))
os.utime(full_dst_path, (st.st_atime, int(st.st_mtime)))
self._put_into_cache(full_dst_path)