0d8ad9a111
Set 'hashed_directories = True' config option to enable the feature.
318 lines
11 KiB
Python
318 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; version 2 of the License.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Library General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
|
|
import errno
|
|
import os
|
|
import shutil
|
|
|
|
import kobo.log
|
|
from kobo.shortcuts import relative_path
|
|
from kobo.threads import WorkerThread, ThreadPool
|
|
|
|
from pungi.util import makedirs
|
|
|
|
|
|
class LinkerPool(ThreadPool):
|
|
def __init__(self, link_type="hardlink-or-copy", logger=None):
|
|
ThreadPool.__init__(self, logger)
|
|
self.link_type = link_type
|
|
self.linker = Linker()
|
|
|
|
|
|
class LinkerThread(WorkerThread):
|
|
def process(self, item, num):
|
|
src, dst = item
|
|
|
|
if (num % 100 == 0) or (num == self.pool.queue_total):
|
|
self.pool.log_debug("Linked %s out of %s packages" % (num, self.pool.queue_total))
|
|
|
|
directory = os.path.dirname(dst)
|
|
makedirs(directory)
|
|
self.pool.linker.link(src, dst, link_type=self.pool.link_type)
|
|
|
|
|
|
class Linker(kobo.log.LoggingBase):
|
|
def __init__(self, ignore_existing=False, always_copy=None, test=False, logger=None):
|
|
kobo.log.LoggingBase.__init__(self, logger=logger)
|
|
self.ignore_existing = ignore_existing
|
|
self.always_copy = always_copy or []
|
|
self.test = test
|
|
self._precache = {}
|
|
self._inode_map = {}
|
|
|
|
def _is_same_type(self, path1, path2):
|
|
if not os.path.islink(path1) == os.path.islink(path2):
|
|
return False
|
|
if not os.path.isdir(path1) == os.path.isdir(path2):
|
|
return False
|
|
if not os.path.isfile(path1) == os.path.isfile(path2):
|
|
return False
|
|
return True
|
|
|
|
def _is_same(self, path1, path2):
|
|
if self.ignore_existing:
|
|
return True
|
|
if path1 == path2:
|
|
return True
|
|
if os.path.islink(path2) and not os.path.exists(path2):
|
|
return True
|
|
if os.path.getsize(path1) != os.path.getsize(path2):
|
|
return False
|
|
if int(os.path.getmtime(path1)) != int(os.path.getmtime(path2)):
|
|
return False
|
|
return True
|
|
|
|
def symlink(self, src, dst, relative=True):
|
|
if src == dst:
|
|
return
|
|
|
|
old_src = src
|
|
if relative:
|
|
src = relative_path(src, dst)
|
|
|
|
msg = "Symlinking %s -> %s" % (dst, src)
|
|
if self.test:
|
|
self.log_info("TEST: %s" % msg)
|
|
return
|
|
self.log_info(msg)
|
|
|
|
try:
|
|
os.symlink(src, dst)
|
|
except OSError as ex:
|
|
if ex.errno != errno.EEXIST:
|
|
raise
|
|
if os.path.islink(dst) and self._is_same(old_src, dst):
|
|
if os.readlink(dst) != src:
|
|
raise
|
|
self.log_debug("The same file already exists, skipping symlink %s -> %s" % (dst, src))
|
|
else:
|
|
raise
|
|
|
|
def hardlink_on_dest(self, src, dst):
|
|
if src == dst:
|
|
return
|
|
|
|
if os.path.exists(src):
|
|
st = os.stat(src)
|
|
file_name = os.path.basename(src)
|
|
precache_key = (file_name, int(st.st_mtime), st.st_size)
|
|
if precache_key in self._precache:
|
|
self.log_warning("HIT %s" % str(precache_key))
|
|
cached_path = self._precache[precache_key]["path"]
|
|
self.hardlink(cached_path, dst)
|
|
return True
|
|
return False
|
|
|
|
def hardlink(self, src, dst):
|
|
if src == dst:
|
|
return
|
|
|
|
msg = "Hardlinking %s to %s" % (src, dst)
|
|
if self.test:
|
|
self.log_info("TEST: %s" % msg)
|
|
return
|
|
self.log_info(msg)
|
|
|
|
try:
|
|
os.link(src, dst)
|
|
except OSError as ex:
|
|
if ex.errno != errno.EEXIST:
|
|
raise
|
|
if self._is_same(src, dst):
|
|
if not self._is_same_type(src, dst):
|
|
self.log_error("File %s already exists but has different type than %s" % (dst, src))
|
|
raise
|
|
self.log_debug("The same file already exists, skipping hardlink %s to %s" % (src, dst))
|
|
else:
|
|
raise
|
|
|
|
def copy(self, src, dst):
|
|
if src == dst:
|
|
return True
|
|
|
|
if os.path.islink(src):
|
|
msg = "Copying symlink %s to %s" % (src, dst)
|
|
else:
|
|
msg = "Copying file %s to %s" % (src, dst)
|
|
|
|
if self.test:
|
|
self.log_info("TEST: %s" % msg)
|
|
return
|
|
self.log_info(msg)
|
|
|
|
if os.path.exists(dst):
|
|
if self._is_same(src, dst):
|
|
if not self._is_same_type(src, dst):
|
|
self.log_error("File %s already exists but has different type than %s" % (dst, src))
|
|
raise OSError(errno.EEXIST, "File exists")
|
|
self.log_debug("The same file already exists, skipping copy %s to %s" % (src, dst))
|
|
return
|
|
else:
|
|
raise OSError(errno.EEXIST, "File exists")
|
|
|
|
if os.path.islink(src):
|
|
if not os.path.islink(dst):
|
|
os.symlink(os.readlink(src), dst)
|
|
return
|
|
return
|
|
|
|
src_stat = os.stat(src)
|
|
src_key = (src_stat.st_dev, src_stat.st_ino)
|
|
if src_key in self._inode_map:
|
|
# (st_dev, st_ino) found in the mapping
|
|
self.log_debug("Harlink detected, hardlinking in destination %s to %s" % (self._inode_map[src_key], dst))
|
|
os.link(self._inode_map[src_key], dst)
|
|
return
|
|
|
|
# BEWARE: shutil.copy2 automatically *rewrites* existing files
|
|
shutil.copy2(src, dst)
|
|
self._inode_map[src_key] = dst
|
|
|
|
if not self._is_same(src, dst):
|
|
self.log_error("File %s doesn't match the copied file %s" % (src, dst))
|
|
# XXX:
|
|
raise OSError(errno.EEXIST, "File exists")
|
|
|
|
def _put_into_cache(self, path):
|
|
def get_stats(item):
|
|
return [item[i] for i in ("st_dev", "st_ino", "st_mtime", "st_size")]
|
|
|
|
filename = os.path.basename(path)
|
|
st = os.stat(path)
|
|
item = {
|
|
"st_dev": st.st_dev,
|
|
"st_ino": st.st_ino,
|
|
"st_mtime": int(st.st_mtime),
|
|
"st_size": st.st_size,
|
|
"path": path,
|
|
}
|
|
precache_key = (filename, int(st.st_mtime), st.st_size)
|
|
if precache_key in self._precache:
|
|
if get_stats(self._precache[precache_key]) != get_stats(item):
|
|
# Files have same mtime and size but device
|
|
# or/and inode is/are different.
|
|
self.log_debug("Caching failed, files are different: %s, %s"
|
|
% (path, self._precache[precache_key]["path"]))
|
|
return False
|
|
self._precache[precache_key] = item
|
|
return True
|
|
|
|
def scan(self, path):
|
|
"""Recursively scan a directory and populate the cache."""
|
|
msg = "Scanning directory: %s" % path
|
|
self.log_debug("[BEGIN] %s" % msg)
|
|
for dirpath, _, filenames in os.walk(path):
|
|
for filename in filenames:
|
|
path = os.path.join(dirpath, filename)
|
|
self._put_into_cache(path)
|
|
self.log_debug("[DONE ] %s" % msg)
|
|
|
|
def _link_file(self, src, dst, link_type):
|
|
if link_type == "hardlink":
|
|
if not self.hardlink_on_dest(src, dst):
|
|
self.hardlink(src, dst)
|
|
elif link_type == "copy":
|
|
self.copy(src, dst)
|
|
elif link_type in ("symlink", "abspath-symlink"):
|
|
if os.path.islink(src):
|
|
self.copy(src, dst)
|
|
else:
|
|
relative = link_type != "abspath-symlink"
|
|
self.symlink(src, dst, relative)
|
|
elif link_type == "hardlink-or-copy":
|
|
if not self.hardlink_on_dest(src, dst):
|
|
src_stat = os.stat(src)
|
|
dst_stat = os.stat(os.path.dirname(dst))
|
|
if src_stat.st_dev == dst_stat.st_dev:
|
|
self.hardlink(src, dst)
|
|
else:
|
|
self.copy(src, dst)
|
|
else:
|
|
raise ValueError("Unknown link_type: %s" % link_type)
|
|
|
|
def link(self, src, dst, link_type="hardlink-or-copy", scan=True):
|
|
"""Link directories recursively."""
|
|
if os.path.isfile(src) or os.path.islink(src):
|
|
self._link_file(src, dst, link_type)
|
|
return
|
|
|
|
if os.path.isfile(dst):
|
|
raise OSError(errno.EEXIST, "File exists")
|
|
|
|
if not self.test:
|
|
if not os.path.exists(dst):
|
|
makedirs(dst)
|
|
shutil.copystat(src, dst)
|
|
|
|
for i in os.listdir(src):
|
|
src_path = os.path.join(src, i)
|
|
dst_path = os.path.join(dst, i)
|
|
self.link(src_path, dst_path, link_type)
|
|
|
|
return
|
|
|
|
if scan:
|
|
self.scan(dst)
|
|
|
|
self.log_debug("Start linking")
|
|
|
|
src = os.path.abspath(src)
|
|
for dirpath, dirnames, filenames in os.walk(src):
|
|
rel_path = dirpath[len(src):].lstrip("/")
|
|
dst_path = os.path.join(dst, rel_path)
|
|
|
|
# Dir check and creation
|
|
if not os.path.isdir(dst_path):
|
|
if os.path.exists(dst_path):
|
|
# At destination there is a file with same name but
|
|
# it is not a directory.
|
|
self.log_error("Cannot create directory %s" % dst_path)
|
|
dirnames = [] # noqa
|
|
continue
|
|
os.mkdir(dst_path)
|
|
|
|
# Process all files in directory
|
|
for filename in filenames:
|
|
path = os.path.join(dirpath, filename)
|
|
st = os.stat(path)
|
|
# Check cache
|
|
# Same file already exists at a destination dir =>
|
|
# Create the new file by hardlink to the cached one.
|
|
precache_key = (filename, int(st.st_mtime), st.st_size)
|
|
full_dst_path = os.path.join(dst_path, filename)
|
|
if precache_key in self._precache:
|
|
# Cache hit
|
|
cached_path = self._precache[precache_key]["path"]
|
|
self.log_debug("Cache HIT for %s [%s]" % (path, cached_path))
|
|
if cached_path != full_dst_path:
|
|
self.hardlink(cached_path, full_dst_path)
|
|
else:
|
|
self.log_debug("Files are same, skip hardlinking")
|
|
continue
|
|
# Cache miss
|
|
# Copy the new file and put it to the cache.
|
|
try:
|
|
self.copy(path, full_dst_path)
|
|
except Exception as ex:
|
|
print(ex)
|
|
print(path, open(path, "r").read())
|
|
print(full_dst_path, open(full_dst_path, "r").read())
|
|
print(os.stat(path))
|
|
print(os.stat(full_dst_path))
|
|
os.utime(full_dst_path, (st.st_atime, int(st.st_mtime)))
|
|
self._put_into_cache(full_dst_path)
|