Backport fixes and add udev rules

This commit is contained in:
Tulio Magno Quites Machado Filho 2022-04-14 11:51:14 -03:00 committed by Tulio Magno Quites Machado Filho
parent 23d8d2295c
commit 134949a2b9
4 changed files with 979 additions and 2 deletions

View File

@ -1,18 +1,25 @@
Name: libnxz
Version: 0.63
Release: 1%{?dist}
Release: 2%{?dist}
Summary: Zlib implementation for POWER processors
License: ASL 2.0 or GPLv2+
Url: https://github.com/libnxz/power-gzip
BuildRequires: zlib-devel
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
Source1: nx-gzip.udev
# https://github.com/libnxz/power-gzip/pull/150
Patch0: pr150.patch
# https://github.com/libnxz/power-gzip/pull/155
Patch1: pr155.patch
# Be explicit about the soname in order to avoid unintentional changes.
%global soname libnxz.so.0
ExclusiveArch: ppc64le
BuildRequires: gcc
BuildRequires: make
BuildRequires: make
BuildRequires: systemd-rpm-macros
%description
libnxz is a zlib-compatible library that uses the NX GZIP Engine available on
@ -52,6 +59,10 @@ fi
%install
%make_install
install -Dm 644 %{SOURCE1} %{buildroot}%{_udevrulesdir}/90-nx-gzip.rules
%pre
%{_sbindir}/groupadd -r -f nx-gzip
%files
%{_libdir}/%{soname}
@ -59,6 +70,7 @@ fi
%license %{_docdir}/%{name}/APACHE-2.0.txt
%license %{_docdir}/%{name}/gpl-2.0.txt
%doc README.md
%{_udevrulesdir}/90-nx-gzip.rules
%files devel
%{_includedir}/libnxz.h
@ -69,6 +81,10 @@ fi
%{_libdir}/libnxz.la
%changelog
* Thu Apr 14 2022 Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> - 0.63-2
- Backport fixes from upstream.
- Create the nx-gzip group and add udev rules.
* Fri Mar 04 2022 Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> - 0.63-1
- Update to libnxz 0.63.
- Fix the soname to the right string.

1
nx-gzip.udev Normal file
View File

@ -0,0 +1 @@
KERNEL=="nx-gzip", GROUP="nx-gzip", MODE="0660"

907
pr150.patch Normal file
View File

@ -0,0 +1,907 @@
From 943a7f434b10c19f8e8e865c3cc40685b9903822 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Thu, 24 Mar 2022 17:32:43 -0300
Subject: [PATCH 1/6] Provide a maximum job length depending on the
virtualization
Identify if a system is running on baremetal or PowerVM and provide
a maximum job length adapted to each case.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_inflate.c | 5 +++--
lib/nx_zlib.c | 25 +++++++++++++++++++++++--
lib/nx_zlib.h | 4 ++++
3 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
index ec32b4c..77ad33c 100644
--- a/lib/nx_inflate.c
+++ b/lib/nx_inflate.c
@@ -945,8 +945,9 @@ static int nx_inflate_(nx_streamp s, int flush)
uint32_t write_sz, source_sz, target_sz;
long loop_cnt = 0, loop_max = 0xffff;
- /* inflate benefits from large jobs; memcopies must be amortized */
- uint32_t inflate_per_job_len = 64 * nx_config.per_job_len;
+ /** \brief inflate benefits from large jobs; memcopies must be
+ * amortized. */
+ const uint32_t inflate_per_job_len = nx_config.per_job_len;
/* nx hardware */
uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
index 28ea482..a50d6f7 100644
--- a/lib/nx_zlib.c
+++ b/lib/nx_zlib.c
@@ -64,6 +64,18 @@
#include "nx_utils.h"
#include "nx_zlib.h"
+/* Use the following values as maximum length of NX jobs when the OS doesn't
+ provide the value itself, which is the default behavior until Linux 5.17 */
+
+/** \brief Maximum job length on baremetal
+ *
+ * While the system does allow up-to 2 GiB as the maximum job length, restrict
+ * it to 64 MiB.
+ */
+#define DEFAULT_MAX_JOB_BAREMETAL 64 * 1024 * 1024
+/** \brief Maximum job length on PowerVM */
+#define DEFAULT_MAX_JOB_POWERVM 1024 * 1024
+
struct nx_config_t nx_config;
static struct nx_dev_t nx_devices[NX_DEVICES_MAX];
static int nx_dev_count = 0;
@@ -639,8 +651,13 @@ static int nx_query_job_limits()
}
}
- /* On error return default value of 1 MB */
- return (1024 * 1024);
+ /* On error return default value. */
+ switch (nx_config.virtualization) {
+ case BAREMETAL:
+ return DEFAULT_MAX_JOB_BAREMETAL;
+ default:
+ return DEFAULT_MAX_JOB_POWERVM;
+ }
}
/*
@@ -659,6 +676,9 @@ static int nx_enumerate_engines()
int count = 0;
size_t n;
+ /* Assume baremetal by default. */
+ nx_config.virtualization = BAREMETAL;
+
d = opendir(DEVICE_TREE);
if (d == NULL){
prt_err("open device tree dir failed.\n");
@@ -712,6 +732,7 @@ static int nx_enumerate_engines()
}
/* On PowerVM, there is no concept of multiple NX engines. */
if (strncmp(de->d_name, "ibm,powervm", 11) == 0){
+ nx_config.virtualization = POWERVM;
closedir(d);
return 1;
}
diff --git a/lib/nx_zlib.h b/lib/nx_zlib.h
index e84bd7e..fa73b01 100644
--- a/lib/nx_zlib.h
+++ b/lib/nx_zlib.h
@@ -129,6 +129,8 @@ void nx_print_dde(nx_dde_t *ddep, const char *msg);
#define zlib_version zlibVersion()
extern const char *zlibVersion OF((void));
+enum virtualization {BAREMETAL=0, POWERVM=1};
+
/* common config variables for all streams */
struct nx_config_t {
long page_sz;
@@ -158,6 +160,8 @@ struct nx_config_t {
* dynamic huffman */
struct selector mode; /** mode selector: selects between software
* and hardware compression. */
+ uint8_t virtualization; /** Indicate the virtualization type being
+ * used. */
};
typedef struct nx_config_t *nx_configp_t;
extern struct nx_config_t nx_config;
From b22eb7bffe61e36f70661921a689e44370d3c7e5 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Thu, 24 Mar 2022 18:03:28 -0300
Subject: [PATCH 2/6] inflate: Move code that initializes the DDE to their own
functions
Create functions nx_reset_dde() and nx_init_dde() based on previous code
helping to reduce the size of nx_inflate_() and making it easier to
understand the code.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_inflate.c | 197 ++++++++++++++++++++++++++++-------------------
1 file changed, 116 insertions(+), 81 deletions(-)
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
index 77ad33c..f1d9adb 100644
--- a/lib/nx_inflate.c
+++ b/lib/nx_inflate.c
@@ -934,6 +934,120 @@ static int copy_data_to_fifo_in(nx_streamp s) {
return Z_OK;
}
+/** \brief Reset DDE to initial values.
+ *
+ * @param s nx_streamp to be processed.
+ * @return Function code as passed to CRB. The function will set the CRB and
+ * return the value here.
+ */
+static int nx_reset_dde(nx_streamp s) {
+ nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
+ uint32_t fc;
+
+ /* address/len lists */
+ clearp_dde(s->ddl_in);
+ clearp_dde(s->ddl_out);
+
+ /* FC, CRC, HistLen, Table 6-6 */
+ if (s->resuming || (s->dict_len > 0)) {
+ /* Resuming a partially decompressed input. */
+ fc = GZIP_FC_DECOMPRESS_RESUME;
+ } else {
+ /* First decompress job */
+ fc = GZIP_FC_DECOMPRESS;
+
+ /* We use the most recently measured compression ratio
+ as a heuristic to estimate the input and output
+ sizes. If we give too much input, the target buffer
+ overflows and NX cycles are wasted, and then we
+ must retry with smaller input size. 1000 is 100% */
+ s->last_comp_ratio = 1000UL;
+ }
+
+ /* clear then copy fc to the crb */
+ cmdp->crb.gzip_fc = 0;
+ putnn(cmdp->crb, gzip_fc, fc);
+
+ return fc;
+}
+
+/** \brief Initialize DDE, appending a dictionary, if necessary.
+ *
+ * @param s nx_streamp to be processed.
+ * @return The history length
+ */
+static int nx_init_dde(nx_streamp s) {
+ nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
+ int nx_history_len = s->history_len;
+
+ /* FC, CRC, HistLen, Table 6-6 */
+ if (s->resuming || (s->dict_len > 0)) {
+ /* Resuming a partially decompressed input. The key
+ to resume is supplying the max 32KB dictionary
+ (history) to NX, which is basically the last 32KB
+ or less of the output earlier produced. And also
+ make sure partial checksums are carried forward
+ */
+
+ /* Crc of prev job passed to the job to be resumed */
+ put32(cmdp->cpb, in_crc, s->crc32);
+ put32(cmdp->cpb, in_adler, s->adler32);
+
+ /* Round up the sizes to quadword. Section 2.10
+ Rounding up will not segfault because
+ nx_alloc_buffer has padding at the beginning */
+
+ if (s->dict_len > 0) {
+ /* lays dict on top of hist */
+ nx_history_len = nx_amend_history_with_dict(s);
+
+ if (s->wrap == HEADER_ZLIB) {
+ /* in the raw mode pass crc as is; in the zlib
+ mode initialize them */
+ put32(cmdp->cpb, in_crc, INIT_CRC );
+ put32(cmdp->cpb, in_adler, INIT_ADLER);
+ put32(cmdp->cpb, out_crc, INIT_CRC );
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+ }
+ print_dbg_info(s, __LINE__);
+ } else {
+ /* no dictionary here */
+ ASSERT( s->dict_len == 0 );
+ nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
+ putnn(cmdp->cpb, in_histlen, nx_history_len);
+ /* convert to bytes */
+ nx_history_len = nx_history_len * NXQWSZ;
+
+ if (nx_history_len > 0) {
+ /* deflate history goes in first */
+ ASSERT(s->cur_out >= nx_history_len);
+ nx_append_dde(s->ddl_in,
+ s->fifo_out + (s->cur_out
+ - nx_history_len),
+ nx_history_len);
+ }
+ print_dbg_info(s, __LINE__);
+ }
+ } else {
+ nx_history_len = s->history_len = 0;
+ /* writing a 0 clears out subc as well */
+ cmdp->cpb.in_histlen = 0;
+
+ /* initialize the crc values */
+ put32(cmdp->cpb, in_crc, INIT_CRC );
+ put32(cmdp->cpb, in_adler, INIT_ADLER);
+ put32(cmdp->cpb, out_crc, INIT_CRC );
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+ }
+
+ /* We use the most recently measured compression ratio as a heuristic
+ to estimate the input and output sizes. If we give too much input,
+ the target buffer overflows and NX cycles are wasted, and then we
+ must retry with smaller input size. 1000 is 100% */
+ s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
+ return nx_history_len;
+}
+
/** \brief Internal implementation of inflate.
*
* @param s nx_streamp to be processed.
@@ -1075,87 +1189,8 @@ static int nx_inflate_(nx_streamp s, int flush)
/* NX decompresses input data */
- /* address/len lists */
- clearp_dde(ddl_in);
- clearp_dde(ddl_out);
-
- nx_history_len = s->history_len;
-
- /* FC, CRC, HistLen, Table 6-6 */
- if (s->resuming || (s->dict_len > 0)) {
- /* Resuming a partially decompressed input. The key
- to resume is supplying the max 32KB dictionary
- (history) to NX, which is basically the last 32KB
- or less of the output earlier produced. And also
- make sure partial checksums are carried forward
- */
- fc = GZIP_FC_DECOMPRESS_RESUME;
-
- /* Crc of prev job passed to the job to be resumed */
- put32(cmdp->cpb, in_crc, s->crc32);
- put32(cmdp->cpb, in_adler, s->adler32);
-
- /* Round up the sizes to quadword. Section 2.10
- Rounding up will not segfault because
- nx_alloc_buffer has padding at the beginning */
-
- if (s->dict_len > 0) {
- /* lays dict on top of hist */
- nx_history_len = nx_amend_history_with_dict(s);
-
- if (s->wrap == HEADER_ZLIB) {
- /* in the raw mode pass crc as is; in the zlib mode
- initialize them */
- put32(cmdp->cpb, in_crc, INIT_CRC );
- put32(cmdp->cpb, in_adler, INIT_ADLER);
- put32(cmdp->cpb, out_crc, INIT_CRC );
- put32(cmdp->cpb, out_adler, INIT_ADLER);
- }
-
- s->last_comp_ratio = NX_MAX( NX_MIN(1000UL, s->last_comp_ratio), 100L );
-
- print_dbg_info(s, __LINE__);
- }
- else {
- /* no dictionary here */
- ASSERT( s->dict_len == 0 );
- nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
- putnn(cmdp->cpb, in_histlen, nx_history_len);
- nx_history_len = nx_history_len * NXQWSZ; /* convert to bytes */
-
- if (nx_history_len > 0) {
- /* deflate history goes in first */
- ASSERT(s->cur_out >= nx_history_len);
- nx_append_dde(ddl_in, s->fifo_out + (s->cur_out - nx_history_len), nx_history_len);
- }
- print_dbg_info(s, __LINE__);
- }
- }
- else {
- /* First decompress job */
- fc = GZIP_FC_DECOMPRESS;
-
- nx_history_len = s->history_len = 0;
- /* writing a 0 clears out subc as well */
- cmdp->cpb.in_histlen = 0;
-
- /* initialize the crc values */
- put32(cmdp->cpb, in_crc, INIT_CRC );
- put32(cmdp->cpb, in_adler, INIT_ADLER);
- put32(cmdp->cpb, out_crc, INIT_CRC );
- put32(cmdp->cpb, out_adler, INIT_ADLER);
-
- /* We use the most recently measured compression ratio
- as a heuristic to estimate the input and output
- sizes. If we give too much input, the target buffer
- overflows and NX cycles are wasted, and then we
- must retry with smaller input size. 1000 is 100% */
- s->last_comp_ratio = 1000UL;
- }
-
- /* clear then copy fc to the crb */
- cmdp->crb.gzip_fc = 0;
- putnn(cmdp->crb, gzip_fc, fc);
+ fc = nx_reset_dde(s);
+ nx_history_len = nx_init_dde(s);
/*
* NX source buffers
From e376d92fa704108f1258e3a41fc1ffcf551d1c5b Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Fri, 25 Mar 2022 09:57:32 -0300
Subject: [PATCH 3/6] Fix the total amount of pages being touched
Fix and error in nx_touch_pages_dde() that was causing the function to
touch a different number of pages than requested.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_zlib.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
index a50d6f7..bf2a6fc 100644
--- a/lib/nx_zlib.c
+++ b/lib/nx_zlib.c
@@ -398,7 +398,7 @@ int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr)
/* touching fewer pages than encoded in the ddebc */
if ( total > buf_sz) {
- buf_len = NX_MIN(buf_len, total - buf_sz);
+ buf_len = buf_sz - (total - buf_len);
nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
prt_trace("touch loop break len 0x%x ddead %p\n", buf_len, (void *)buf_addr);
break;
From 1f3dc128a476c9bbbb1b503d2fc8f54365101ebf Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Thu, 24 Mar 2022 18:18:34 -0300
Subject: [PATCH 4/6] inflate: Move code that sets the DDE
Create functions nx_set_dde_in() and nx_set_dde_out() based on old code.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_inflate.c | 64 +++++++++++++++++++++++++++++++++---------------
1 file changed, 44 insertions(+), 20 deletions(-)
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
index f1d9adb..a9671b2 100644
--- a/lib/nx_inflate.c
+++ b/lib/nx_inflate.c
@@ -1048,6 +1048,41 @@ static int nx_init_dde(nx_streamp s) {
return nx_history_len;
}
+/** \brief Append input data to DDE
+ *
+ * @param s nx_streamp to be processed.
+ *
+ * @return The total amount of bytes appended to DDE
+ */
+static uint32_t nx_set_dde_in(nx_streamp s) {
+ /* Buffered user input is next */
+ if (s->fifo_in != NULL)
+ nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
+ /* Then current user input. */
+ nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
+ /* Total bytes going in to engine. */
+ return getp32(s->ddl_in, ddebc);
+}
+
+/** \brief Append output data to DDE
+ *
+ * @param s nx_streamp to be processed.
+ *
+ * @return The total amount of bytes appended to DDE
+ */
+static uint32_t nx_set_dde_out(nx_streamp s) {
+ /* Decompress to user buffer first. */
+ nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
+
+ /* Overflow to fifo_out.
+ used_out == 0 required by definition. */
+ ASSERT(s->used_out == 0);
+ nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
+ s->len_out - s->cur_out);
+
+ return s->avail_out + s->len_out - s->cur_out;
+}
+
/** \brief Internal implementation of inflate.
*
* @param s nx_streamp to be processed.
@@ -1195,28 +1230,13 @@ static int nx_inflate_(nx_streamp s, int flush)
/*
* NX source buffers
*/
- /* buffered user input is next */
- if (s->fifo_in != NULL)
- nx_append_dde(ddl_in, s->fifo_in + s->cur_in, s->used_in);
- /* then current user input */
- nx_append_dde(ddl_in, s->next_in, s->avail_in);
- source_sz = getp32(ddl_in, ddebc); /* total bytes going in to engine */
- ASSERT( source_sz > nx_history_len );
+ source_sz = nx_set_dde_in(s);
+ ASSERT(source_sz > nx_history_len);
/*
* NX target buffers
*/
- ASSERT(s->used_out == 0);
-
- uint32_t len_next_out = s->avail_out;
- nx_append_dde(ddl_out, s->next_out, len_next_out); /* decomp in to user buffer */
-
- /* overflow, used_out == 0 required by definition, +used_out below is unnecessary */
- nx_append_dde(ddl_out, s->fifo_out + s->cur_out + s->used_out, s->len_out - s->cur_out - s->used_out);
- target_sz = len_next_out + s->len_out - s->cur_out - s->used_out;
-
- prt_info("len_next_out %d len_out %d cur_out %d used_out %d source_sz %d history_len %d\n",
- len_next_out, s->len_out, s->cur_out, s->used_out, source_sz, nx_history_len);
+ target_sz = nx_set_dde_out(s);
/* We want exactly the History size amount of 32KB to overflow
in to fifo_out. If overflow is less, the history spans
@@ -1228,6 +1248,7 @@ static int nx_inflate_(nx_streamp s, int flush)
these copies (memcpy) for performance. Therefore, the
heuristic here will estimate the source size for the
desired target size */
+ uint32_t len_next_out = s->avail_out;
/* avail_out plus 32 KB history plus a bit of overhead */
uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
@@ -1240,11 +1261,14 @@ static int nx_inflate_(nx_streamp s, int flush)
prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
+ prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
+ " used_out %d source_sz %d history_len %d\n",
+ __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
+ s->used_out, source_sz, nx_history_len);
+
/* do not include input side history in the estimation */
source_sz = source_sz - nx_history_len;
-
ASSERT(source_sz > 0);
-
source_sz = NX_MIN(source_sz, source_sz_expected);
/* add the history back */
From eb6cb7b01fe1fa337979353e905e3ad96514b233 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Thu, 24 Mar 2022 18:37:27 -0300
Subject: [PATCH 5/6] inflate: cosmetic improvements
- Add source code comments.
- Improve indentation.
- Break long lines.
- Fix error and information messages.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_inflate.c | 51 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 38 insertions(+), 13 deletions(-)
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
index a9671b2..a6070bd 100644
--- a/lib/nx_inflate.c
+++ b/lib/nx_inflate.c
@@ -1090,14 +1090,31 @@ static uint32_t nx_set_dde_out(nx_streamp s) {
*/
static int nx_inflate_(nx_streamp s, int flush)
{
- /* queuing, file ops, byte counting */
- uint32_t write_sz, source_sz, target_sz;
+ /** \brief Sum of the bytes that may be used by NX as input
+ *
+ * Total amount of bytes sent to the NX to be used as input,
+ * i.e. sum of the bytes in next_in and fifo_in. */
+ uint32_t source_sz;
+
+ /** \brief Sum of the bytes that may be used by NX as output
+ *
+ * Maximum amount of bytes available by the NX to be used as output,
+ * i.e. sum of the bytes available in next_out and fifo_out. */
+ uint32_t target_sz;
+
+ uint32_t write_sz;
long loop_cnt = 0, loop_max = 0xffff;
/** \brief inflate benefits from large jobs; memcopies must be
* amortized. */
const uint32_t inflate_per_job_len = nx_config.per_job_len;
+ /** \brief Estimated value for target_sz. Used to calculate
+ * source_sz_expected. */
+ uint32_t target_sz_expected;
+ /** \brief Estimated value for source_sz. */
+ uint32_t source_sz_expected;
+
/* nx hardware */
uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
@@ -1251,16 +1268,20 @@ static int nx_inflate_(nx_streamp s, int flush)
uint32_t len_next_out = s->avail_out;
/* avail_out plus 32 KB history plus a bit of overhead */
- uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
+ target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
/* e.g. if we want 100KB at the output and if the compression
ratio is 10% we want 10KB if input */
- uint32_t source_sz_expected = (uint32_t)(((uint64_t)target_sz_expected * s->last_comp_ratio + 1000L)/1000UL);
+ source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
+ * s->last_comp_ratio + 1000L)/1000UL);
- prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
+ prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
+ " source_sz %d last_comp_ratio %d nx_history_len %d\n",
+ __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
+ source_sz, s->last_comp_ratio, nx_history_len);
prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
" used_out %d source_sz %d history_len %d\n",
__FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
@@ -1280,8 +1301,11 @@ static int nx_inflate_(nx_streamp s, int flush)
/* fault in pages */
nx_touch_pages_dde(ddl_in, source_sz, nx_config.page_sz, 0);
- nx_touch_pages_dde(ddl_out, target_sz, nx_config.page_sz, 1);
- nx_touch_pages( (void *)cmdp, sizeof(nx_gzip_crb_cpb_t), nx_config.page_sz, 0);
+ nx_touch_pages_dde(ddl_out,
+ target_sz,
+ nx_config.page_sz, 1);
+ nx_touch_pages((void *) cmdp, sizeof(nx_gzip_crb_cpb_t),
+ nx_config.page_sz, 0);
/*
* send job to NX
@@ -1298,9 +1322,9 @@ static int nx_inflate_(nx_streamp s, int flush)
faulting address to fsaddr */
print_dbg_info(s, __LINE__);
- prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d ",
- (void *)cmdp->crb.csb.fsaddr, source_sz);
- prt_warn("target_sz %d\n", target_sz);
+ prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d "
+ "target_sz %d\n", (void *)cmdp->crb.csb.fsaddr,
+ source_sz, target_sz);
#ifdef NX_LOG_SOURCE_TARGET
nx_print_dde(ddl_in, "source");
nx_print_dde(ddl_out, "target");
@@ -1339,8 +1363,8 @@ static int nx_inflate_(nx_streamp s, int flush)
if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
/* TODO what to do when page faults are too many?
* Kernel MM would have killed the process. */
- prt_err("Cannot make progress; too many page");
- prt_err(" faults cc= %d\n", cc);
+ prt_err("Cannot make progress; too many page"
+ " faults cc= %d\n", cc);
}
else {
prt_warn("ERR_NX_AT_FAULT: more retry\n");
@@ -1397,7 +1421,8 @@ static int nx_inflate_(nx_streamp s, int flush)
cover the max expansion of INF_MIN_INPUT_LEN
bytes */
- prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data src %d hist %d\n", source_sz, nx_history_len);
+ prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
+ " src %d hist %d\n", source_sz, nx_history_len);
goto restart_nx;
case ERR_NX_OK:
From 806bf8e3ed1d0ae8a21bc6b2035df390f1062c26 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Mon, 28 Mar 2022 18:15:37 -0300
Subject: [PATCH 6/6] inflate: Limit the amount of data added to the DDE
Stop adding all input and output data to the DDE and limit based on the
calculated value for source_sz_expected and target_sz_expected.
By limiting these values, we end up better estimating the amount of
pages that need to be touched, reducing the amount of time spent
touching pages that might not be used.
Reported-by: Puvichakravarthy Ramachandran <puvichakravarthy@in.ibm.com>
Reported-by: Poorna Chandra Vemula <Poorna.Chandra.Vemula@ibm.com>
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_inflate.c | 180 +++++++++++++++++++++++++++++------------------
1 file changed, 111 insertions(+), 69 deletions(-)
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
index a6070bd..b30cdf6 100644
--- a/lib/nx_inflate.c
+++ b/lib/nx_inflate.c
@@ -955,13 +955,6 @@ static int nx_reset_dde(nx_streamp s) {
} else {
/* First decompress job */
fc = GZIP_FC_DECOMPRESS;
-
- /* We use the most recently measured compression ratio
- as a heuristic to estimate the input and output
- sizes. If we give too much input, the target buffer
- overflows and NX cycles are wasted, and then we
- must retry with smaller input size. 1000 is 100% */
- s->last_comp_ratio = 1000UL;
}
/* clear then copy fc to the crb */
@@ -1051,15 +1044,24 @@ static int nx_init_dde(nx_streamp s) {
/** \brief Append input data to DDE
*
* @param s nx_streamp to be processed.
+ * @param source_sz_expected The total amount of bytes expected as input. It
+ * does not include dictionary or history.
*
* @return The total amount of bytes appended to DDE
*/
-static uint32_t nx_set_dde_in(nx_streamp s) {
+static uint32_t nx_set_dde_in(nx_streamp s, uint32_t source_sz_expected) {
+ uint32_t tmp = 0;
+
/* Buffered user input is next */
- if (s->fifo_in != NULL)
- nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
- /* Then current user input. */
- nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
+ if (s->fifo_in != NULL) {
+ tmp = NX_MIN(s->used_in, source_sz_expected);
+ nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, tmp);
+ }
+ if (tmp < source_sz_expected) {
+ tmp = NX_MIN(s->avail_in, source_sz_expected - tmp);
+ /* Then current user input. */
+ nx_append_dde(s->ddl_in, s->next_in, tmp);
+ }
/* Total bytes going in to engine. */
return getp32(s->ddl_in, ddebc);
}
@@ -1067,20 +1069,32 @@ static uint32_t nx_set_dde_in(nx_streamp s) {
/** \brief Append output data to DDE
*
* @param s nx_streamp to be processed.
+ * @param target_sz_expected The total amount of bytes expected as output.
*
* @return The total amount of bytes appended to DDE
*/
-static uint32_t nx_set_dde_out(nx_streamp s) {
+static uint32_t nx_set_dde_out(nx_streamp s, uint32_t target_sz_expected) {
+ uint32_t tmp;
+ uint32_t ret;
+
+ ret = NX_MIN(s->avail_out, target_sz_expected);
+
/* Decompress to user buffer first. */
- nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
+ nx_append_dde(s->ddl_out, s->next_out, ret);
+
+ if (ret < target_sz_expected) {
+ tmp = NX_MIN(s->len_out - s->cur_out,
+ target_sz_expected - ret);
+
+ /* Overflow to fifo_out.
+ used_out == 0 required by definition. */
+ ASSERT(s->used_out == 0);
+ nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, tmp);
- /* Overflow to fifo_out.
- used_out == 0 required by definition. */
- ASSERT(s->used_out == 0);
- nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
- s->len_out - s->cur_out);
+ ret += tmp;
+ }
- return s->avail_out + s->len_out - s->cur_out;
+ return ret;
}
/** \brief Internal implementation of inflate.
@@ -1094,7 +1108,7 @@ static int nx_inflate_(nx_streamp s, int flush)
*
* Total amount of bytes sent to the NX to be used as input,
* i.e. sum of the bytes in next_in and fifo_in. */
- uint32_t source_sz;
+ uint32_t source_sz = 0;
/** \brief Sum of the bytes that may be used by NX as output
*
@@ -1242,46 +1256,69 @@ static int nx_inflate_(nx_streamp s, int flush)
/* NX decompresses input data */
fc = nx_reset_dde(s);
- nx_history_len = nx_init_dde(s);
- /*
- * NX source buffers
- */
- source_sz = nx_set_dde_in(s);
- ASSERT(source_sz > nx_history_len);
+ /** Estimate the amount of data sent to the NX. Ideally, we want
+ * exactly the history size amount of 32 KiB to overflow in to fifo_out
+ * in order to minimize copies of memory.
+ * If overflow is less than 32 KiB, the history spans next_out and
+ * fifo_out and must be copied in to fifo_out to setup history for the
+ * next job. The fifo_out fraction is also copied back to user's
+ * next_out before the next job.
+ * If overflow is more, all the overflow must be copied back
+ * to user's next_out before the next job.
+ * If overflow is much more, we may get an ERR_NX_TARGET_SPACE, forcing
+ * us to reduce the source before trying again. A retry in this case
+ * will probably require NX to process much more than 32 KiB, which
+ * requires more time than copying 32 KiB of data.
+ *
+ * With that said, we want to minimize unecessary work (i.e. memcpy
+ * and retrying NX jobs) for performance. Therefore, the heuristic
+ * here will estimate the source size for the desired target size, but
+ * it prioritizes avoiding ERR_NX_TARGET_SPACE. */
- /*
- * NX target buffers
- */
- target_sz = nx_set_dde_out(s);
-
- /* We want exactly the History size amount of 32KB to overflow
- in to fifo_out. If overflow is less, the history spans
- next_out and fifo_out and must be copied in to fifo_out to
- setup history for the next job, and the fifo_out fraction is
- also copied back to user's next_out before the next job.
- If overflow is more, all the overflow must be copied back
- to user's next_out before the next job. We want to minimize
- these copies (memcpy) for performance. Therefore, the
- heuristic here will estimate the source size for the
- desired target size */
uint32_t len_next_out = s->avail_out;
+ s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
- /* avail_out plus 32 KB history plus a bit of overhead */
+ /* avail_out plus 32 KiB history plus a bit of overhead */
target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
-
target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
- /* e.g. if we want 100KB at the output and if the compression
- ratio is 10% we want 10KB if input */
+ /** Calculate source_sz_expected based on target_sz_expected and the
+ * last compression ratio, e.g. if we want 100KB at the output and if
+ * the compression ratio is 10% we want 10KB if input */
source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
* s->last_comp_ratio + 1000L)/1000UL);
+ /** After calculating source_sz_expected, try to provide extra
+ * target_sz_expected in order to avoid an ERR_NX_TARGET_SPACE. */
+ target_sz_expected = NX_MIN(len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2),
+ 4 * inflate_per_job_len);
+ prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
+ " source_sz %d last_comp_ratio %d\n",
+ __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
+ source_sz, s->last_comp_ratio);
+
+
+init_dde:
+ nx_history_len = nx_init_dde(s);
+
+ /*
+ * NX source buffers
+ */
+ source_sz = nx_set_dde_in(s, source_sz_expected);
+ ASSERT(source_sz > nx_history_len);
+ ASSERT(source_sz <= source_sz_expected + nx_history_len);
prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
" source_sz %d last_comp_ratio %d nx_history_len %d\n",
__FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
source_sz, s->last_comp_ratio, nx_history_len);
+
+ /*
+ * NX target buffers
+ */
+ target_sz = nx_set_dde_out(s, target_sz_expected);
+
prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
" used_out %d source_sz %d history_len %d\n",
__FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
@@ -1345,19 +1382,22 @@ static int nx_inflate_(nx_streamp s, int flush)
that is about 2 pages minimum for source and
and 6 pages for target; if the system does not
have 8 free pages then the loop will last forever */
- source_sz = source_sz - nx_history_len;
- if (source_sz > (2 * INF_MIN_INPUT_LEN))
- source_sz = (source_sz + 1) / 2;
- else if (source_sz > INF_MIN_INPUT_LEN)
- source_sz = INF_MIN_INPUT_LEN;
-
- /* else if caller gave fewer source bytes, keep it as is */
- source_sz = source_sz + nx_history_len;
-
- if (target_sz > (2 * INF_MAX_EXPANSION_BYTES))
- target_sz = (target_sz + 1) / 2;
- else if (target_sz > INF_MAX_EXPANSION_BYTES)
- target_sz = INF_MAX_EXPANSION_BYTES;
+ source_sz_expected = source_sz - nx_history_len;
+ if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
+ source_sz_expected
+ = (source_sz_expected + 1) / 2;
+ else if (source_sz_expected > INF_MIN_INPUT_LEN)
+ source_sz_expected = INF_MIN_INPUT_LEN;
+
+ /* else if caller gave fewer source bytes, keep it as
+ is. */
+ source_sz = source_sz_expected + nx_history_len;
+
+ if (target_sz_expected > (2 * INF_MAX_EXPANSION_BYTES))
+ target_sz_expected
+ = (target_sz_expected + 1) / 2;
+ else if (target_sz_expected > INF_MAX_EXPANSION_BYTES)
+ target_sz_expected = INF_MAX_EXPANSION_BYTES;
ticks_total = nx_wait_ticks(500, ticks_total, 0);
if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
@@ -1368,7 +1408,8 @@ static int nx_inflate_(nx_streamp s, int flush)
}
else {
prt_warn("ERR_NX_AT_FAULT: more retry\n");
- goto restart_nx;
+ fc = nx_reset_dde(s);
+ goto init_dde;
}
}
@@ -1403,18 +1444,17 @@ static int nx_inflate_(nx_streamp s, int flush)
/* Target buffer not large enough; retry smaller input
data; give at least 1 byte. SPBC/TPBC are not valid */
ASSERT( source_sz > nx_history_len );
- source_sz = ((source_sz - nx_history_len + 1) / 2) + nx_history_len;
+ source_sz_expected = (source_sz - nx_history_len + 1) / 2;
- source_sz = source_sz - nx_history_len;
/* reduce large source down to minimum viable; if
source is already small don't change it */
- if (source_sz > (2 * INF_MIN_INPUT_LEN))
- source_sz = (source_sz + 1) / 2;
- else if (source_sz > INF_MIN_INPUT_LEN)
- source_sz = INF_MIN_INPUT_LEN;
+ if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
+ source_sz_expected = (source_sz_expected + 1) / 2;
+ else if (source_sz_expected > INF_MIN_INPUT_LEN)
+ source_sz_expected = INF_MIN_INPUT_LEN;
/* else if caller gave fewer source bytes, keep it as is */
- source_sz = source_sz + nx_history_len;
+ source_sz = source_sz_expected + nx_history_len;
/* do not change target size because we allocated a
minimum of INF_MAX_EXPANSION_BYTES which should
@@ -1422,8 +1462,10 @@ static int nx_inflate_(nx_streamp s, int flush)
bytes */
prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
- " src %d hist %d\n", source_sz, nx_history_len);
- goto restart_nx;
+ " source_sz_expected %d nx_history_len %d\n",
+ source_sz_expected, nx_history_len);
+ fc = nx_reset_dde(s);
+ goto init_dde;
case ERR_NX_OK:

53
pr155.patch Normal file
View File

@ -0,0 +1,53 @@
From e639fe97eabe40445ad6c1308125ce8c2687ec6f Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Date: Thu, 7 Apr 2022 16:40:37 -0300
Subject: [PATCH] Increase the value for max_vas_reuse_count
While running tests on PowerVM with 1MB files, it has been noticed a
huge delay when processing the 100th stream.
The penalty for reopening the VAS window is still too high for 100
executions and is being increased to 10000 in order to keep its execution less
than 10% of the time spent executing those 10000 streams.
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
lib/nx_zlib.c | 13 ++++++++++++-
test/nx-zlib.conf | 4 ++--
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
index fa7000e..b3737a2 100644
--- a/lib/nx_zlib.c
+++ b/lib/nx_zlib.c
@@ -982,7 +982,18 @@ void nx_hw_init(void)
nx_config.max_byte_count_current = (1UL<<30);
nx_config.max_source_dde_count = MAX_DDE_COUNT;
nx_config.max_target_dde_count = MAX_DDE_COUNT;
- nx_config.max_vas_reuse_count = 100;
+ /** On PowerVM, reopening a VAS window takes 300x the time used to
+ * process a single stream with the maximum per job length allowed,
+ * causing serious performance issues when max_vas_reuse_count is too
+ * low. In order to avoid these issues, it has been decided the
+ * average impact of reopening a VAS window should be less than 10% of
+ * of the time spent processing max_vas_reuse_count streams at
+ * maximum per job length. That means 3000, which we rounded up.
+ * Notice that setting this number too high may impact the performance
+ * of multithreaded processes with a massive amount of threads. For
+ * those cases, it's recommended to use a lower value in the config
+ * file. */
+ nx_config.max_vas_reuse_count = 10000;
nx_config.per_job_len = nx_query_job_limits(); /* less than suspend limit */
nx_config.strm_def_bufsz = (1024 * 1024); /* affect the deflate fifo_out */
nx_config.soft_copy_threshold = 1024; /* choose memcpy or hwcopy */
diff --git a/test/nx-zlib.conf b/test/nx-zlib.conf
index 444d966..cb30c36 100644
--- a/test/nx-zlib.conf
+++ b/test/nx-zlib.conf
@@ -59,5 +59,5 @@ logfile = ./nx.log
#nx_ratio_s = 100
# Maximum number of times deflateInit/inflateInit can reuse an already-open VAS
-# window
-# max_vas_reuse_count = 100
+# window. Default: 10000
+# max_vas_reuse_count = 10000