createrepo_c/SOURCES/0002-Add-zstd-compression-support.patch

493 lines
20 KiB
Diff

From 126a79f7e313090c0bb09993f7bace43a7d05e7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ale=C5=A1=20Mat=C4=9Bj?= <amatej@redhat.com>
Date: Tue, 17 Aug 2021 14:27:47 +0200
Subject: [PATCH 1/4] Add zstd compression support
---
CMakeLists.txt | 9 ++
README.md | 1 +
createrepo_c.spec | 9 +-
doc/createrepo_c.8 | 2 +-
src/CMakeLists.txt | 1 +
src/cmd_parser.c | 6 +-
src/compression_wrapper.c | 192 ++++++++++++++++++++++++++++
src/compression_wrapper.h | 1 +
src/error.h | 2 +
src/python/createrepo_c/__init__.py | 3 +
src/python/createrepo_cmodule.c | 1 +
11 files changed, 223 insertions(+), 4 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b016960..40d43b6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ if(NOT BUILD_LIBCREATEREPO_C_SHARED)
set(CMAKE_POSITION_INDEPENDENT_CODE 1)
endif()
+option(WITH_ZSTD "Build with zstd support" ON)
+
option(CREATEREPO_C_INSTALL_DEVELOPMENT "Install createrepo_c development files." ON)
option(CREATEREPO_C_INSTALL_MANPAGES "Install createrepo_c man-pages." ON)
@@ -100,6 +102,13 @@ IF (WITH_LIBMODULEMD)
SET (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DWITH_LIBMODULEMD")
ENDIF (WITH_LIBMODULEMD)
+if (WITH_ZSTD)
+ pkg_check_modules(ZSTD REQUIRED libzstd)
+ include_directories(${ZSTD_INCLUDE_DIRS})
+ SET (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWITH_ZSTD")
+ SET (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DWITH_ZSTD")
+endif()
+
# Threaded XZ Compression
# Note: This option is disabled by default, because Createrepo_c
# parallelize a lot of tasks (including compression) by default, this
diff --git a/README.md b/README.md
index de2cd01..95b03ce 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ Package build requires - Pkg name in Fedora/Ubuntu:
* xz (http://tukaani.org/xz/) - xz-devel/liblzma-dev
* zchunk (https://github.com/zchunk/zchunk) - zchunk-devel/
* zlib (http://www.zlib.net/) - zlib-devel/zlib1g-dev
+* libzstd (http://facebook.github.io/zstd/) - libzstd-devel/libzstd-dev
* *Documentation:* doxygen (http://doxygen.org/) - doxygen/doxygen
* *Documentation:* sphinx (http://sphinx-doc.org/) - python3-sphinx/python3-sphinx
* **Test requires:** check (http://check.sourceforge.net/) - check-devel/check
diff --git a/createrepo_c.spec b/createrepo_c.spec
index 0105a71..afa45f6 100644
--- a/createrepo_c.spec
+++ b/createrepo_c.spec
@@ -18,8 +18,11 @@
%if 0%{?rhel} && 0%{?rhel} < 8
%bcond_with libmodulemd
+# dnf supports zstd since 8.4: https://bugzilla.redhat.com/show_bug.cgi?id=1914876
+%bcond_with zstd
%else
%bcond_without libmodulemd
+%bcond_without zstd
%endif
%if 0%{?rhel} && 0%{?rhel} <= 8
@@ -65,6 +68,9 @@ Requires: rpm >= 4.9.0
%if %{with drpm}
BuildRequires: drpm-devel >= 0.4.0
%endif
+%if %{with zstd}
+BuildRequires: pkgconfig(libzstd)
+%endif
%if 0%{?fedora} || 0%{?rhel} > 7
Obsoletes: createrepo < 0.11.0
@@ -114,7 +120,8 @@ pushd build-py3
-DWITH_ZCHUNK=%{?with_zchunk:ON}%{!?with_zchunk:OFF} \
-DWITH_LIBMODULEMD=%{?with_libmodulemd:ON}%{!?with_libmodulemd:OFF} \
-DWITH_LEGACY_HASHES=%{?with_legacy_hashes:ON}%{!?with_legacy_hashes:OFF} \
- -DENABLE_DRPM=%{?with_drpm:ON}%{!?with_drpm:OFF}
+ -DENABLE_DRPM=%{?with_drpm:ON}%{!?with_drpm:OFF} \
+ -DWITH_ZSTD=%{?with_zstd:ON}%{!?with_zstd:OFF}
make %{?_smp_mflags} RPM_OPT_FLAGS="%{optflags}"
# Build C documentation
make doc-c
diff --git a/doc/createrepo_c.8 b/doc/createrepo_c.8
index 86d0dc7..bf9862b 100644
--- a/doc/createrepo_c.8
+++ b/doc/createrepo_c.8
@@ -161,7 +161,7 @@ Number of workers to spawn to read rpms.
Use xz for repodata compression.
.SS \-\-compress\-type COMPRESSION_TYPE
.sp
-Which compression type to use.
+Which compression type to use. Supported compressions are: bzip2, gzip, zck, zstd, xz.
.SS \-\-general\-compress\-type COMPRESSION_TYPE
.sp
Which compression type to use (even for primary, filelists and other xml).
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d118750..1689d5d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -80,6 +80,7 @@ TARGET_LINK_LIBRARIES(libcreaterepo_c ${SQLITE3_LIBRARIES})
TARGET_LINK_LIBRARIES(libcreaterepo_c ${ZLIB_LIBRARY})
TARGET_LINK_LIBRARIES(libcreaterepo_c ${ZCK_LIBRARIES})
TARGET_LINK_LIBRARIES(libcreaterepo_c ${DRPM_LIBRARIES})
+TARGET_LINK_LIBRARIES(libcreaterepo_c ${ZSTD_LIBRARIES})
SET_TARGET_PROPERTIES(libcreaterepo_c PROPERTIES
OUTPUT_NAME "createrepo_c"
diff --git a/src/cmd_parser.c b/src/cmd_parser.c
index 0e79b40..f8b027e 100644
--- a/src/cmd_parser.c
+++ b/src/cmd_parser.c
@@ -158,9 +158,9 @@ static GOptionEntry cmd_entries[] =
{ "xz", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.xz_compression),
"Use xz for repodata compression.", NULL },
{ "compress-type", 0, 0, G_OPTION_ARG_STRING, &(_cmd_options.compress_type),
- "Which compression type to use.", "COMPRESSION_TYPE" },
+ "Which compression type to use for additional metadata files (comps, updateinfo, etc). Supported compressions are: bzip2, gzip, zck, zstd, xz.", "COMPRESSION_TYPE" },
{ "general-compress-type", 0, 0, G_OPTION_ARG_STRING, &(_cmd_options.general_compress_type),
- "Which compression type to use (even for primary, filelists and other xml).",
+ "Which compression type to use (even for primary, filelists and other xml). Supported compressions are: bzip2, gzip, zck, zstd, xz.",
"COMPRESSION_TYPE" },
#ifdef WITH_ZCHUNK
{ "zck", 0, 0, G_OPTION_ARG_NONE, &(_cmd_options.zck_compression),
@@ -284,6 +284,8 @@ check_and_set_compression_type(const char *type_str,
*type = CR_CW_BZ2_COMPRESSION;
} else if (!strcmp(compress_str->str, "xz")) {
*type = CR_CW_XZ_COMPRESSION;
+ } else if (!strcmp(compress_str->str, "zstd")) {
+ *type = CR_CW_ZSTD_COMPRESSION;
} else {
g_set_error(err, ERR_DOMAIN, CRE_BADARG,
"Unknown/Unsupported compression type \"%s\"", type_str);
diff --git a/src/compression_wrapper.c b/src/compression_wrapper.c
index b23c345..9100222 100644
--- a/src/compression_wrapper.c
+++ b/src/compression_wrapper.c
@@ -35,6 +35,9 @@
#endif // WITH_ZCHUNK
#include "error.h"
#include "compression_wrapper.h"
+#ifdef WITH_ZSTD
+#include <zstd.h>
+#endif
#define ERR_DOMAIN CREATEREPO_C_ERROR
@@ -118,6 +121,17 @@ typedef struct {
unsigned char buffer[XZ_BUFFER_SIZE];
} XzFile;
+#ifdef WITH_ZSTD
+#define CR_CW_ZSTD_COMPRESSION_LEVEL 9
+typedef struct {
+ void *buffer;
+ size_t buffer_size;
+ ZSTD_inBuffer zib;
+ ZSTD_outBuffer zob;
+ void * context; //ZSTD_{C,D}Ctx
+} ZstdFile;
+#endif
+
cr_CompressionType
cr_detect_compression(const char *filename, GError **err)
{
@@ -151,6 +165,9 @@ cr_detect_compression(const char *filename, GError **err)
} else if (g_str_has_suffix(filename, ".zck"))
{
return CR_CW_ZCK_COMPRESSION;
+ } else if (g_str_has_suffix(filename, ".zst"))
+ {
+ return CR_CW_ZSTD_COMPRESSION;
} else if (g_str_has_suffix(filename, ".xml") ||
g_str_has_suffix(filename, ".tar") ||
g_str_has_suffix(filename, ".yaml") ||
@@ -192,6 +209,11 @@ cr_detect_compression(const char *filename, GError **err)
type = CR_CW_GZ_COMPRESSION;
}
+ else if (g_str_has_prefix(mime_type, "application/zstd"))
+ {
+ type = CR_CW_ZSTD_COMPRESSION;
+ }
+
else if (g_str_has_prefix(mime_type, "application/x-bzip2") ||
g_str_has_prefix(mime_type, "application/x-bz2") ||
g_str_has_prefix(mime_type, "application/bzip2") ||
@@ -255,6 +277,8 @@ cr_compression_type(const char *name)
type = CR_CW_XZ_COMPRESSION;
if (!g_strcmp0(name_lower, "zck"))
type = CR_CW_ZCK_COMPRESSION;
+ if (!g_strcmp0(name_lower, "zstd"))
+ type = CR_CW_ZSTD_COMPRESSION;
g_free(name_lower);
return type;
@@ -272,6 +296,8 @@ cr_compression_suffix(cr_CompressionType comtype)
return ".xz";
case CR_CW_ZCK_COMPRESSION:
return ".zck";
+ case CR_CW_ZSTD_COMPRESSION:
+ return ".zst";
default:
return NULL;
}
@@ -413,6 +439,56 @@ cr_sopen(const char *filename,
}
break;
+ case (CR_CW_ZSTD_COMPRESSION): { // ------------------------------------
+#ifdef WITH_ZSTD
+ FILE *f = fopen(filename, mode_str);
+
+ if (!f) {
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "fopen(): %s", g_strerror(errno));
+ break;
+ }
+
+ file->INNERFILE = f;
+
+ ZstdFile *zstd_file = g_malloc0(sizeof(ZstdFile));
+
+ if (mode == CR_CW_MODE_WRITE) {
+ if ((zstd_file->context = (void *) ZSTD_createCCtx()) == NULL) {
+ g_set_error(err, ERR_DOMAIN, CRE_ZSTD, "%s",
+ "Failed to create ZSTD context.");
+ g_free(zstd_file);
+ fclose(f);
+ break;
+ }
+ size_t ret = ZSTD_CCtx_setParameter(zstd_file->context, ZSTD_c_compressionLevel, CR_CW_ZSTD_COMPRESSION_LEVEL);
+ if (ZSTD_isError(ret)) {
+ g_set_error(err, ERR_DOMAIN, CRE_ZSTD, "%s",
+ ZSTD_getErrorName(ret));
+ g_free(zstd_file);
+ fclose(f);
+ break;
+ }
+ zstd_file->buffer_size = ZSTD_CStreamOutSize();
+ } else {
+ if ((zstd_file->context = (void *) ZSTD_createDCtx()) == NULL) {
+ g_free(zstd_file);
+ fclose(f);
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "%s",
+ "Failed to create ZSTD context.");
+ break;
+ }
+ zstd_file->buffer_size = ZSTD_DStreamInSize();
+ }
+ zstd_file->buffer = g_malloc(zstd_file->buffer_size);
+ file->FILE = (void *) zstd_file;
+
+ break;
+#else
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "createrepo_c wasn't compiled with zstd support");
+ break;
+#endif // WITH_ZSTD
+ }
+
case (CR_CW_BZ2_COMPRESSION): { // ------------------------------------
FILE *f = fopen(filename, mode_str);
file->INNERFILE = f;
@@ -769,6 +845,43 @@ cr_close(CR_FILE *cr_file, GError **err)
}
break;
+ case (CR_CW_ZSTD_COMPRESSION): // --------------------------------------
+#ifdef WITH_ZSTD
+ ZstdFile * zstd = (ZstdFile *) cr_file->FILE;
+ if (cr_file->mode == CR_CW_MODE_READ) {
+ ZSTD_freeDCtx(zstd->context);
+ } else {
+ size_t remaining;
+ // No more new input just finish flushing compression data
+ ZSTD_inBuffer zip = { NULL, 0, 0 };
+ do {
+ zstd->zob.dst = zstd->buffer;
+ zstd->zob.size = zstd->buffer_size;
+ zstd->zob.pos = 0;
+
+ remaining = ZSTD_compressStream2(zstd->context, &zstd->zob , &zip, ZSTD_e_end);
+ if (ZSTD_isError(remaining)) {
+ g_set_error(err, ERR_DOMAIN, CRE_ZSTD, "%s", ZSTD_getErrorName(remaining));
+ break;
+ } else if (zstd->zob.pos != fwrite(zstd->buffer, 1, zstd->zob.pos, cr_file->INNERFILE)) {
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "cr_close ZSTD fwrite failed");
+ break;
+ }
+ } while(remaining != 0);
+ ZSTD_freeCCtx(zstd->context);
+ }
+
+ fclose(cr_file->INNERFILE);
+ g_free(zstd->buffer);
+ g_free(cr_file->FILE);
+
+ ret = CRE_OK;
+ break;
+#else
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "createrepo_c wasn't compiled with zstd support");
+ break;
+#endif // WITH_ZSTD
+
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
if (cr_file->mode == CR_CW_MODE_READ)
BZ2_bzReadClose(&rc, (BZFILE *) cr_file->FILE);
@@ -980,6 +1093,43 @@ cr_read(CR_FILE *cr_file, void *buffer, unsigned int len, GError **err)
}
break;
+ case (CR_CW_ZSTD_COMPRESSION): // ---------------------------------------
+#ifdef WITH_ZSTD
+ ZstdFile * zstd = (ZstdFile *) cr_file->FILE;
+
+ ZSTD_outBuffer zob = {buffer, len, 0};
+
+ while (zob.pos < zob.size) {
+ // Re-fill compressed data buffer
+ if (zstd->zib.pos >= zstd->zib.size) {
+ zstd->zib.size = fread(zstd->buffer, 1, zstd->buffer_size, cr_file->INNERFILE);
+ if (zstd->zib.size == 0) {
+ break; //EOF
+ }
+ zstd->zib.src = zstd->buffer;
+ zstd->zib.pos = 0;
+ }
+
+ // Decompress chunk
+ int decomp_ret = ZSTD_decompressStream(zstd->context, &zob, &zstd->zib);
+ if (ZSTD_isError(decomp_ret)) {
+ ret = CR_CW_ERR;
+ g_set_error(err, ERR_DOMAIN, CRE_ZSTD, "%s", ZSTD_getErrorName(decomp_ret));
+ break;
+ }
+
+ }
+
+ if (!(err && *err)) {
+ ret = zob.pos;
+ }
+
+ break;
+#else
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "createrepo_c wasn't compiled with zstd support");
+ break;
+#endif // WITH_ZSTD
+
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
ret = BZ2_bzRead(&bzerror, (BZFILE *) cr_file->FILE, buffer, len);
if (!ret && bzerror == BZ_SEQUENCE_ERROR)
@@ -1214,6 +1364,44 @@ cr_write(CR_FILE *cr_file, const void *buffer, unsigned int len, GError **err)
}
break;
+ case (CR_CW_ZSTD_COMPRESSION): // ---------------------------------------
+#ifdef WITH_ZSTD
+ ZstdFile * zstd = (ZstdFile *) cr_file->FILE;
+ ZSTD_inBuffer zib = {buffer, len, 0};
+
+ while (zib.pos < zib.size) {
+ zstd->zob.dst = zstd->buffer;
+ zstd->zob.size = zstd->buffer_size;
+ zstd->zob.pos = 0;
+
+ // Compress chunk into buffer
+ size_t remaining = ZSTD_compressStream2(zstd->context, &zstd->zob , &zib, ZSTD_e_continue);
+ if (ZSTD_isError(remaining)) {
+ g_set_error(err, ERR_DOMAIN, CRE_ZSTD, "%s", ZSTD_getErrorName(remaining));
+ break;
+ }
+
+ // Write compressed buffer
+ if (zstd->zob.pos > 0) {
+ size_t nw = fwrite(zstd->buffer, 1, zstd->zob.pos, cr_file->INNERFILE);
+ if (nw != zstd->zob.pos) {
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "cr_write zstd write failed");
+ break;
+ }
+ }
+
+ }
+
+ if (!(err && *err)) {
+ ret = zib.pos;
+ }
+
+ break;
+#else
+ g_set_error(err, ERR_DOMAIN, CRE_IO, "createrepo_c wasn't compiled with zstd support");
+ break;
+#endif // WITH_ZSTD
+
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
BZ2_bzWrite(&bzerror, (BZFILE *) cr_file->FILE, (void *) buffer, len);
if (bzerror == BZ_OK) {
@@ -1361,6 +1549,7 @@ cr_puts(CR_FILE *cr_file, const char *str, GError **err)
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
case (CR_CW_XZ_COMPRESSION): // ---------------------------------------
case (CR_CW_ZCK_COMPRESSION): // --------------------------------------
+ case (CR_CW_ZSTD_COMPRESSION): // --------------------------------------
len = strlen(str);
ret = cr_write(cr_file, str, len, err);
if (ret != (int) len)
@@ -1398,6 +1587,7 @@ cr_end_chunk(CR_FILE *cr_file, GError **err)
case (CR_CW_GZ_COMPRESSION): // ---------------------------------------
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
case (CR_CW_XZ_COMPRESSION): // ---------------------------------------
+ case (CR_CW_ZSTD_COMPRESSION): // ---------------------------------------
break;
case (CR_CW_ZCK_COMPRESSION): { // ------------------------------------
#ifdef WITH_ZCHUNK
@@ -1450,6 +1640,7 @@ cr_set_autochunk(CR_FILE *cr_file, gboolean auto_chunk, GError **err)
case (CR_CW_GZ_COMPRESSION): // ---------------------------------------
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
case (CR_CW_XZ_COMPRESSION): // ---------------------------------------
+ case (CR_CW_ZSTD_COMPRESSION): // ---------------------------------------
break;
case (CR_CW_ZCK_COMPRESSION): { // ------------------------------------
#ifdef WITH_ZCHUNK
@@ -1524,6 +1715,7 @@ cr_printf(GError **err, CR_FILE *cr_file, const char *format, ...)
case (CR_CW_BZ2_COMPRESSION): // --------------------------------------
case (CR_CW_XZ_COMPRESSION): // ---------------------------------------
case (CR_CW_ZCK_COMPRESSION): // --------------------------------------
+ case (CR_CW_ZSTD_COMPRESSION): // --------------------------------------
tmp_ret = cr_write(cr_file, buf, ret, err);
if (tmp_ret != (int) ret)
ret = CR_CW_ERR;
diff --git a/src/compression_wrapper.h b/src/compression_wrapper.h
index 72e0078..32936d0 100644
--- a/src/compression_wrapper.h
+++ b/src/compression_wrapper.h
@@ -42,6 +42,7 @@ typedef enum {
CR_CW_BZ2_COMPRESSION, /*!< BZip2 compression */
CR_CW_XZ_COMPRESSION, /*!< XZ compression */
CR_CW_ZCK_COMPRESSION, /*!< ZCK compression */
+ CR_CW_ZSTD_COMPRESSION, /*!< ZSTD compression */
CR_CW_COMPRESSION_SENTINEL, /*!< Sentinel of the list */
} cr_CompressionType;
diff --git a/src/error.h b/src/error.h
index b925bc7..8738032 100644
--- a/src/error.h
+++ b/src/error.h
@@ -98,6 +98,8 @@ typedef enum {
(34) ZCK library related error */
CRE_MODULEMD, /*!<
(35) modulemd related error */
+ CRE_ZSTD, /*!<
+ (36) Zstd library related error */
CRE_SENTINEL, /*!<
(XX) Sentinel */
} cr_Error;
diff --git a/src/python/createrepo_c/__init__.py b/src/python/createrepo_c/__init__.py
index 440e559..21f6f74 100644
--- a/src/python/createrepo_c/__init__.py
+++ b/src/python/createrepo_c/__init__.py
@@ -59,6 +59,9 @@ XZ = _createrepo_c.XZ_COMPRESSION
#: Zchunk compression alias
ZCK = _createrepo_c.ZCK_COMPRESSION
+#: Zstd compression alias
+ZSTD = _createrepo_c.ZSTD_COMPRESSION
+
HT_KEY_DEFAULT = _createrepo_c.HT_KEY_DEFAULT #: Default key (hash)
HT_KEY_HASH = _createrepo_c.HT_KEY_HASH #: Package hash as a key
HT_KEY_NAME = _createrepo_c.HT_KEY_NAME #: Package name as a key
diff --git a/src/python/createrepo_cmodule.c b/src/python/createrepo_cmodule.c
index ba6cad6..64ac4ec 100644
--- a/src/python/createrepo_cmodule.c
+++ b/src/python/createrepo_cmodule.c
@@ -264,6 +264,7 @@ PyInit__createrepo_c(void)
PyModule_AddIntConstant(m, "BZ2_COMPRESSION", CR_CW_BZ2_COMPRESSION);
PyModule_AddIntConstant(m, "XZ_COMPRESSION", CR_CW_XZ_COMPRESSION);
PyModule_AddIntConstant(m, "ZCK_COMPRESSION", CR_CW_ZCK_COMPRESSION);
+ PyModule_AddIntConstant(m, "ZSTD_COMPRESSION", CR_CW_ZSTD_COMPRESSION);
/* Zchunk support */
#ifdef WITH_ZCHUNK
--
2.48.1