From 134949a2b9cd7cc778977acc1cbcf5496170b12c Mon Sep 17 00:00:00 2001 From: Tulio Magno Quites Machado Filho Date: Thu, 14 Apr 2022 11:51:14 -0300 Subject: [PATCH] Backport fixes and add udev rules --- libnxz.spec | 20 +- nx-gzip.udev | 1 + pr150.patch | 907 +++++++++++++++++++++++++++++++++++++++++++++++++++ pr155.patch | 53 +++ 4 files changed, 979 insertions(+), 2 deletions(-) create mode 100644 nx-gzip.udev create mode 100644 pr150.patch create mode 100644 pr155.patch diff --git a/libnxz.spec b/libnxz.spec index 5cd1fff..37347d7 100644 --- a/libnxz.spec +++ b/libnxz.spec @@ -1,18 +1,25 @@ Name: libnxz Version: 0.63 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Zlib implementation for POWER processors License: ASL 2.0 or GPLv2+ Url: https://github.com/libnxz/power-gzip BuildRequires: zlib-devel Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz +Source1: nx-gzip.udev + +# https://github.com/libnxz/power-gzip/pull/150 +Patch0: pr150.patch +# https://github.com/libnxz/power-gzip/pull/155 +Patch1: pr155.patch # Be explicit about the soname in order to avoid unintentional changes. %global soname libnxz.so.0 ExclusiveArch: ppc64le BuildRequires: gcc -BuildRequires: make +BuildRequires: make +BuildRequires: systemd-rpm-macros %description libnxz is a zlib-compatible library that uses the NX GZIP Engine available on @@ -52,6 +59,10 @@ fi %install %make_install +install -Dm 644 %{SOURCE1} %{buildroot}%{_udevrulesdir}/90-nx-gzip.rules + +%pre +%{_sbindir}/groupadd -r -f nx-gzip %files %{_libdir}/%{soname} @@ -59,6 +70,7 @@ fi %license %{_docdir}/%{name}/APACHE-2.0.txt %license %{_docdir}/%{name}/gpl-2.0.txt %doc README.md +%{_udevrulesdir}/90-nx-gzip.rules %files devel %{_includedir}/libnxz.h @@ -69,6 +81,10 @@ fi %{_libdir}/libnxz.la %changelog +* Thu Apr 14 2022 Tulio Magno Quites Machado Filho - 0.63-2 +- Backport fixes from upstream. +- Create the nx-gzip group and add udev rules. + * Fri Mar 04 2022 Tulio Magno Quites Machado Filho - 0.63-1 - Update to libnxz 0.63. - Fix the soname to the right string. diff --git a/nx-gzip.udev b/nx-gzip.udev new file mode 100644 index 0000000..8d37f22 --- /dev/null +++ b/nx-gzip.udev @@ -0,0 +1 @@ +KERNEL=="nx-gzip", GROUP="nx-gzip", MODE="0660" diff --git a/pr150.patch b/pr150.patch new file mode 100644 index 0000000..a4e5542 --- /dev/null +++ b/pr150.patch @@ -0,0 +1,907 @@ +From 943a7f434b10c19f8e8e865c3cc40685b9903822 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Thu, 24 Mar 2022 17:32:43 -0300 +Subject: [PATCH 1/6] Provide a maximum job length depending on the + virtualization + +Identify if a system is running on baremetal or PowerVM and provide +a maximum job length adapted to each case. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_inflate.c | 5 +++-- + lib/nx_zlib.c | 25 +++++++++++++++++++++++-- + lib/nx_zlib.h | 4 ++++ + 3 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c +index ec32b4c..77ad33c 100644 +--- a/lib/nx_inflate.c ++++ b/lib/nx_inflate.c +@@ -945,8 +945,9 @@ static int nx_inflate_(nx_streamp s, int flush) + uint32_t write_sz, source_sz, target_sz; + long loop_cnt = 0, loop_max = 0xffff; + +- /* inflate benefits from large jobs; memcopies must be amortized */ +- uint32_t inflate_per_job_len = 64 * nx_config.per_job_len; ++ /** \brief inflate benefits from large jobs; memcopies must be ++ * amortized. */ ++ const uint32_t inflate_per_job_len = nx_config.per_job_len; + + /* nx hardware */ + uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc; +diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c +index 28ea482..a50d6f7 100644 +--- a/lib/nx_zlib.c ++++ b/lib/nx_zlib.c +@@ -64,6 +64,18 @@ + #include "nx_utils.h" + #include "nx_zlib.h" + ++/* Use the following values as maximum length of NX jobs when the OS doesn't ++ provide the value itself, which is the default behavior until Linux 5.17 */ ++ ++/** \brief Maximum job length on baremetal ++ * ++ * While the system does allow up-to 2 GiB as the maximum job length, restrict ++ * it to 64 MiB. ++ */ ++#define DEFAULT_MAX_JOB_BAREMETAL 64 * 1024 * 1024 ++/** \brief Maximum job length on PowerVM */ ++#define DEFAULT_MAX_JOB_POWERVM 1024 * 1024 ++ + struct nx_config_t nx_config; + static struct nx_dev_t nx_devices[NX_DEVICES_MAX]; + static int nx_dev_count = 0; +@@ -639,8 +651,13 @@ static int nx_query_job_limits() + } + } + +- /* On error return default value of 1 MB */ +- return (1024 * 1024); ++ /* On error return default value. */ ++ switch (nx_config.virtualization) { ++ case BAREMETAL: ++ return DEFAULT_MAX_JOB_BAREMETAL; ++ default: ++ return DEFAULT_MAX_JOB_POWERVM; ++ } + } + + /* +@@ -659,6 +676,9 @@ static int nx_enumerate_engines() + int count = 0; + size_t n; + ++ /* Assume baremetal by default. */ ++ nx_config.virtualization = BAREMETAL; ++ + d = opendir(DEVICE_TREE); + if (d == NULL){ + prt_err("open device tree dir failed.\n"); +@@ -712,6 +732,7 @@ static int nx_enumerate_engines() + } + /* On PowerVM, there is no concept of multiple NX engines. */ + if (strncmp(de->d_name, "ibm,powervm", 11) == 0){ ++ nx_config.virtualization = POWERVM; + closedir(d); + return 1; + } +diff --git a/lib/nx_zlib.h b/lib/nx_zlib.h +index e84bd7e..fa73b01 100644 +--- a/lib/nx_zlib.h ++++ b/lib/nx_zlib.h +@@ -129,6 +129,8 @@ void nx_print_dde(nx_dde_t *ddep, const char *msg); + #define zlib_version zlibVersion() + extern const char *zlibVersion OF((void)); + ++enum virtualization {BAREMETAL=0, POWERVM=1}; ++ + /* common config variables for all streams */ + struct nx_config_t { + long page_sz; +@@ -158,6 +160,8 @@ struct nx_config_t { + * dynamic huffman */ + struct selector mode; /** mode selector: selects between software + * and hardware compression. */ ++ uint8_t virtualization; /** Indicate the virtualization type being ++ * used. */ + }; + typedef struct nx_config_t *nx_configp_t; + extern struct nx_config_t nx_config; + +From b22eb7bffe61e36f70661921a689e44370d3c7e5 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Thu, 24 Mar 2022 18:03:28 -0300 +Subject: [PATCH 2/6] inflate: Move code that initializes the DDE to their own + functions + +Create functions nx_reset_dde() and nx_init_dde() based on previous code +helping to reduce the size of nx_inflate_() and making it easier to +understand the code. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_inflate.c | 197 ++++++++++++++++++++++++++++------------------- + 1 file changed, 116 insertions(+), 81 deletions(-) + +diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c +index 77ad33c..f1d9adb 100644 +--- a/lib/nx_inflate.c ++++ b/lib/nx_inflate.c +@@ -934,6 +934,120 @@ static int copy_data_to_fifo_in(nx_streamp s) { + return Z_OK; + } + ++/** \brief Reset DDE to initial values. ++ * ++ * @param s nx_streamp to be processed. ++ * @return Function code as passed to CRB. The function will set the CRB and ++ * return the value here. ++ */ ++static int nx_reset_dde(nx_streamp s) { ++ nx_gzip_crb_cpb_t *cmdp = s->nxcmdp; ++ uint32_t fc; ++ ++ /* address/len lists */ ++ clearp_dde(s->ddl_in); ++ clearp_dde(s->ddl_out); ++ ++ /* FC, CRC, HistLen, Table 6-6 */ ++ if (s->resuming || (s->dict_len > 0)) { ++ /* Resuming a partially decompressed input. */ ++ fc = GZIP_FC_DECOMPRESS_RESUME; ++ } else { ++ /* First decompress job */ ++ fc = GZIP_FC_DECOMPRESS; ++ ++ /* We use the most recently measured compression ratio ++ as a heuristic to estimate the input and output ++ sizes. If we give too much input, the target buffer ++ overflows and NX cycles are wasted, and then we ++ must retry with smaller input size. 1000 is 100% */ ++ s->last_comp_ratio = 1000UL; ++ } ++ ++ /* clear then copy fc to the crb */ ++ cmdp->crb.gzip_fc = 0; ++ putnn(cmdp->crb, gzip_fc, fc); ++ ++ return fc; ++} ++ ++/** \brief Initialize DDE, appending a dictionary, if necessary. ++ * ++ * @param s nx_streamp to be processed. ++ * @return The history length ++ */ ++static int nx_init_dde(nx_streamp s) { ++ nx_gzip_crb_cpb_t *cmdp = s->nxcmdp; ++ int nx_history_len = s->history_len; ++ ++ /* FC, CRC, HistLen, Table 6-6 */ ++ if (s->resuming || (s->dict_len > 0)) { ++ /* Resuming a partially decompressed input. The key ++ to resume is supplying the max 32KB dictionary ++ (history) to NX, which is basically the last 32KB ++ or less of the output earlier produced. And also ++ make sure partial checksums are carried forward ++ */ ++ ++ /* Crc of prev job passed to the job to be resumed */ ++ put32(cmdp->cpb, in_crc, s->crc32); ++ put32(cmdp->cpb, in_adler, s->adler32); ++ ++ /* Round up the sizes to quadword. Section 2.10 ++ Rounding up will not segfault because ++ nx_alloc_buffer has padding at the beginning */ ++ ++ if (s->dict_len > 0) { ++ /* lays dict on top of hist */ ++ nx_history_len = nx_amend_history_with_dict(s); ++ ++ if (s->wrap == HEADER_ZLIB) { ++ /* in the raw mode pass crc as is; in the zlib ++ mode initialize them */ ++ put32(cmdp->cpb, in_crc, INIT_CRC ); ++ put32(cmdp->cpb, in_adler, INIT_ADLER); ++ put32(cmdp->cpb, out_crc, INIT_CRC ); ++ put32(cmdp->cpb, out_adler, INIT_ADLER); ++ } ++ print_dbg_info(s, __LINE__); ++ } else { ++ /* no dictionary here */ ++ ASSERT( s->dict_len == 0 ); ++ nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ; ++ putnn(cmdp->cpb, in_histlen, nx_history_len); ++ /* convert to bytes */ ++ nx_history_len = nx_history_len * NXQWSZ; ++ ++ if (nx_history_len > 0) { ++ /* deflate history goes in first */ ++ ASSERT(s->cur_out >= nx_history_len); ++ nx_append_dde(s->ddl_in, ++ s->fifo_out + (s->cur_out ++ - nx_history_len), ++ nx_history_len); ++ } ++ print_dbg_info(s, __LINE__); ++ } ++ } else { ++ nx_history_len = s->history_len = 0; ++ /* writing a 0 clears out subc as well */ ++ cmdp->cpb.in_histlen = 0; ++ ++ /* initialize the crc values */ ++ put32(cmdp->cpb, in_crc, INIT_CRC ); ++ put32(cmdp->cpb, in_adler, INIT_ADLER); ++ put32(cmdp->cpb, out_crc, INIT_CRC ); ++ put32(cmdp->cpb, out_adler, INIT_ADLER); ++ } ++ ++ /* We use the most recently measured compression ratio as a heuristic ++ to estimate the input and output sizes. If we give too much input, ++ the target buffer overflows and NX cycles are wasted, and then we ++ must retry with smaller input size. 1000 is 100% */ ++ s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L); ++ return nx_history_len; ++} ++ + /** \brief Internal implementation of inflate. + * + * @param s nx_streamp to be processed. +@@ -1075,87 +1189,8 @@ static int nx_inflate_(nx_streamp s, int flush) + + /* NX decompresses input data */ + +- /* address/len lists */ +- clearp_dde(ddl_in); +- clearp_dde(ddl_out); +- +- nx_history_len = s->history_len; +- +- /* FC, CRC, HistLen, Table 6-6 */ +- if (s->resuming || (s->dict_len > 0)) { +- /* Resuming a partially decompressed input. The key +- to resume is supplying the max 32KB dictionary +- (history) to NX, which is basically the last 32KB +- or less of the output earlier produced. And also +- make sure partial checksums are carried forward +- */ +- fc = GZIP_FC_DECOMPRESS_RESUME; +- +- /* Crc of prev job passed to the job to be resumed */ +- put32(cmdp->cpb, in_crc, s->crc32); +- put32(cmdp->cpb, in_adler, s->adler32); +- +- /* Round up the sizes to quadword. Section 2.10 +- Rounding up will not segfault because +- nx_alloc_buffer has padding at the beginning */ +- +- if (s->dict_len > 0) { +- /* lays dict on top of hist */ +- nx_history_len = nx_amend_history_with_dict(s); +- +- if (s->wrap == HEADER_ZLIB) { +- /* in the raw mode pass crc as is; in the zlib mode +- initialize them */ +- put32(cmdp->cpb, in_crc, INIT_CRC ); +- put32(cmdp->cpb, in_adler, INIT_ADLER); +- put32(cmdp->cpb, out_crc, INIT_CRC ); +- put32(cmdp->cpb, out_adler, INIT_ADLER); +- } +- +- s->last_comp_ratio = NX_MAX( NX_MIN(1000UL, s->last_comp_ratio), 100L ); +- +- print_dbg_info(s, __LINE__); +- } +- else { +- /* no dictionary here */ +- ASSERT( s->dict_len == 0 ); +- nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ; +- putnn(cmdp->cpb, in_histlen, nx_history_len); +- nx_history_len = nx_history_len * NXQWSZ; /* convert to bytes */ +- +- if (nx_history_len > 0) { +- /* deflate history goes in first */ +- ASSERT(s->cur_out >= nx_history_len); +- nx_append_dde(ddl_in, s->fifo_out + (s->cur_out - nx_history_len), nx_history_len); +- } +- print_dbg_info(s, __LINE__); +- } +- } +- else { +- /* First decompress job */ +- fc = GZIP_FC_DECOMPRESS; +- +- nx_history_len = s->history_len = 0; +- /* writing a 0 clears out subc as well */ +- cmdp->cpb.in_histlen = 0; +- +- /* initialize the crc values */ +- put32(cmdp->cpb, in_crc, INIT_CRC ); +- put32(cmdp->cpb, in_adler, INIT_ADLER); +- put32(cmdp->cpb, out_crc, INIT_CRC ); +- put32(cmdp->cpb, out_adler, INIT_ADLER); +- +- /* We use the most recently measured compression ratio +- as a heuristic to estimate the input and output +- sizes. If we give too much input, the target buffer +- overflows and NX cycles are wasted, and then we +- must retry with smaller input size. 1000 is 100% */ +- s->last_comp_ratio = 1000UL; +- } +- +- /* clear then copy fc to the crb */ +- cmdp->crb.gzip_fc = 0; +- putnn(cmdp->crb, gzip_fc, fc); ++ fc = nx_reset_dde(s); ++ nx_history_len = nx_init_dde(s); + + /* + * NX source buffers + +From e376d92fa704108f1258e3a41fc1ffcf551d1c5b Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Fri, 25 Mar 2022 09:57:32 -0300 +Subject: [PATCH 3/6] Fix the total amount of pages being touched + +Fix and error in nx_touch_pages_dde() that was causing the function to +touch a different number of pages than requested. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_zlib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c +index a50d6f7..bf2a6fc 100644 +--- a/lib/nx_zlib.c ++++ b/lib/nx_zlib.c +@@ -398,7 +398,7 @@ int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr) + + /* touching fewer pages than encoded in the ddebc */ + if ( total > buf_sz) { +- buf_len = NX_MIN(buf_len, total - buf_sz); ++ buf_len = buf_sz - (total - buf_len); + nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr); + prt_trace("touch loop break len 0x%x ddead %p\n", buf_len, (void *)buf_addr); + break; + +From 1f3dc128a476c9bbbb1b503d2fc8f54365101ebf Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Thu, 24 Mar 2022 18:18:34 -0300 +Subject: [PATCH 4/6] inflate: Move code that sets the DDE + +Create functions nx_set_dde_in() and nx_set_dde_out() based on old code. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_inflate.c | 64 +++++++++++++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 20 deletions(-) + +diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c +index f1d9adb..a9671b2 100644 +--- a/lib/nx_inflate.c ++++ b/lib/nx_inflate.c +@@ -1048,6 +1048,41 @@ static int nx_init_dde(nx_streamp s) { + return nx_history_len; + } + ++/** \brief Append input data to DDE ++ * ++ * @param s nx_streamp to be processed. ++ * ++ * @return The total amount of bytes appended to DDE ++ */ ++static uint32_t nx_set_dde_in(nx_streamp s) { ++ /* Buffered user input is next */ ++ if (s->fifo_in != NULL) ++ nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in); ++ /* Then current user input. */ ++ nx_append_dde(s->ddl_in, s->next_in, s->avail_in); ++ /* Total bytes going in to engine. */ ++ return getp32(s->ddl_in, ddebc); ++} ++ ++/** \brief Append output data to DDE ++ * ++ * @param s nx_streamp to be processed. ++ * ++ * @return The total amount of bytes appended to DDE ++ */ ++static uint32_t nx_set_dde_out(nx_streamp s) { ++ /* Decompress to user buffer first. */ ++ nx_append_dde(s->ddl_out, s->next_out, s->avail_out); ++ ++ /* Overflow to fifo_out. ++ used_out == 0 required by definition. */ ++ ASSERT(s->used_out == 0); ++ nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, ++ s->len_out - s->cur_out); ++ ++ return s->avail_out + s->len_out - s->cur_out; ++} ++ + /** \brief Internal implementation of inflate. + * + * @param s nx_streamp to be processed. +@@ -1195,28 +1230,13 @@ static int nx_inflate_(nx_streamp s, int flush) + /* + * NX source buffers + */ +- /* buffered user input is next */ +- if (s->fifo_in != NULL) +- nx_append_dde(ddl_in, s->fifo_in + s->cur_in, s->used_in); +- /* then current user input */ +- nx_append_dde(ddl_in, s->next_in, s->avail_in); +- source_sz = getp32(ddl_in, ddebc); /* total bytes going in to engine */ +- ASSERT( source_sz > nx_history_len ); ++ source_sz = nx_set_dde_in(s); ++ ASSERT(source_sz > nx_history_len); + + /* + * NX target buffers + */ +- ASSERT(s->used_out == 0); +- +- uint32_t len_next_out = s->avail_out; +- nx_append_dde(ddl_out, s->next_out, len_next_out); /* decomp in to user buffer */ +- +- /* overflow, used_out == 0 required by definition, +used_out below is unnecessary */ +- nx_append_dde(ddl_out, s->fifo_out + s->cur_out + s->used_out, s->len_out - s->cur_out - s->used_out); +- target_sz = len_next_out + s->len_out - s->cur_out - s->used_out; +- +- prt_info("len_next_out %d len_out %d cur_out %d used_out %d source_sz %d history_len %d\n", +- len_next_out, s->len_out, s->cur_out, s->used_out, source_sz, nx_history_len); ++ target_sz = nx_set_dde_out(s); + + /* We want exactly the History size amount of 32KB to overflow + in to fifo_out. If overflow is less, the history spans +@@ -1228,6 +1248,7 @@ static int nx_inflate_(nx_streamp s, int flush) + these copies (memcpy) for performance. Therefore, the + heuristic here will estimate the source size for the + desired target size */ ++ uint32_t len_next_out = s->avail_out; + + /* avail_out plus 32 KB history plus a bit of overhead */ + uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); +@@ -1240,11 +1261,14 @@ static int nx_inflate_(nx_streamp s, int flush) + + prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len); + ++ prt_info("%s:%d len_next_out %d len_out %d cur_out %d" ++ " used_out %d source_sz %d history_len %d\n", ++ __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, ++ s->used_out, source_sz, nx_history_len); ++ + /* do not include input side history in the estimation */ + source_sz = source_sz - nx_history_len; +- + ASSERT(source_sz > 0); +- + source_sz = NX_MIN(source_sz, source_sz_expected); + + /* add the history back */ + +From eb6cb7b01fe1fa337979353e905e3ad96514b233 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Thu, 24 Mar 2022 18:37:27 -0300 +Subject: [PATCH 5/6] inflate: cosmetic improvements + +- Add source code comments. +- Improve indentation. +- Break long lines. +- Fix error and information messages. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_inflate.c | 51 ++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 38 insertions(+), 13 deletions(-) + +diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c +index a9671b2..a6070bd 100644 +--- a/lib/nx_inflate.c ++++ b/lib/nx_inflate.c +@@ -1090,14 +1090,31 @@ static uint32_t nx_set_dde_out(nx_streamp s) { + */ + static int nx_inflate_(nx_streamp s, int flush) + { +- /* queuing, file ops, byte counting */ +- uint32_t write_sz, source_sz, target_sz; ++ /** \brief Sum of the bytes that may be used by NX as input ++ * ++ * Total amount of bytes sent to the NX to be used as input, ++ * i.e. sum of the bytes in next_in and fifo_in. */ ++ uint32_t source_sz; ++ ++ /** \brief Sum of the bytes that may be used by NX as output ++ * ++ * Maximum amount of bytes available by the NX to be used as output, ++ * i.e. sum of the bytes available in next_out and fifo_out. */ ++ uint32_t target_sz; ++ ++ uint32_t write_sz; + long loop_cnt = 0, loop_max = 0xffff; + + /** \brief inflate benefits from large jobs; memcopies must be + * amortized. */ + const uint32_t inflate_per_job_len = nx_config.per_job_len; + ++ /** \brief Estimated value for target_sz. Used to calculate ++ * source_sz_expected. */ ++ uint32_t target_sz_expected; ++ /** \brief Estimated value for source_sz. */ ++ uint32_t source_sz_expected; ++ + /* nx hardware */ + uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc; + +@@ -1251,16 +1268,20 @@ static int nx_inflate_(nx_streamp s, int flush) + uint32_t len_next_out = s->avail_out; + + /* avail_out plus 32 KB history plus a bit of overhead */ +- uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); ++ target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); + + target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len); + + /* e.g. if we want 100KB at the output and if the compression + ratio is 10% we want 10KB if input */ +- uint32_t source_sz_expected = (uint32_t)(((uint64_t)target_sz_expected * s->last_comp_ratio + 1000L)/1000UL); ++ source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected ++ * s->last_comp_ratio + 1000L)/1000UL); + +- prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len); + ++ prt_info("%s:%d target_sz_expected %d source_sz_expected %d" ++ " source_sz %d last_comp_ratio %d nx_history_len %d\n", ++ __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, ++ source_sz, s->last_comp_ratio, nx_history_len); + prt_info("%s:%d len_next_out %d len_out %d cur_out %d" + " used_out %d source_sz %d history_len %d\n", + __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, +@@ -1280,8 +1301,11 @@ static int nx_inflate_(nx_streamp s, int flush) + + /* fault in pages */ + nx_touch_pages_dde(ddl_in, source_sz, nx_config.page_sz, 0); +- nx_touch_pages_dde(ddl_out, target_sz, nx_config.page_sz, 1); +- nx_touch_pages( (void *)cmdp, sizeof(nx_gzip_crb_cpb_t), nx_config.page_sz, 0); ++ nx_touch_pages_dde(ddl_out, ++ target_sz, ++ nx_config.page_sz, 1); ++ nx_touch_pages((void *) cmdp, sizeof(nx_gzip_crb_cpb_t), ++ nx_config.page_sz, 0); + + /* + * send job to NX +@@ -1298,9 +1322,9 @@ static int nx_inflate_(nx_streamp s, int flush) + faulting address to fsaddr */ + print_dbg_info(s, __LINE__); + +- prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d ", +- (void *)cmdp->crb.csb.fsaddr, source_sz); +- prt_warn("target_sz %d\n", target_sz); ++ prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d " ++ "target_sz %d\n", (void *)cmdp->crb.csb.fsaddr, ++ source_sz, target_sz); + #ifdef NX_LOG_SOURCE_TARGET + nx_print_dde(ddl_in, "source"); + nx_print_dde(ddl_out, "target"); +@@ -1339,8 +1363,8 @@ static int nx_inflate_(nx_streamp s, int flush) + if (ticks_total > (timeout_pgfaults * nx_get_freq())) { + /* TODO what to do when page faults are too many? + * Kernel MM would have killed the process. */ +- prt_err("Cannot make progress; too many page"); +- prt_err(" faults cc= %d\n", cc); ++ prt_err("Cannot make progress; too many page" ++ " faults cc= %d\n", cc); + } + else { + prt_warn("ERR_NX_AT_FAULT: more retry\n"); +@@ -1397,7 +1421,8 @@ static int nx_inflate_(nx_streamp s, int flush) + cover the max expansion of INF_MIN_INPUT_LEN + bytes */ + +- prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data src %d hist %d\n", source_sz, nx_history_len); ++ prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data" ++ " src %d hist %d\n", source_sz, nx_history_len); + goto restart_nx; + + case ERR_NX_OK: + +From 806bf8e3ed1d0ae8a21bc6b2035df390f1062c26 Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Mon, 28 Mar 2022 18:15:37 -0300 +Subject: [PATCH 6/6] inflate: Limit the amount of data added to the DDE + +Stop adding all input and output data to the DDE and limit based on the +calculated value for source_sz_expected and target_sz_expected. +By limiting these values, we end up better estimating the amount of +pages that need to be touched, reducing the amount of time spent +touching pages that might not be used. + +Reported-by: Puvichakravarthy Ramachandran +Reported-by: Poorna Chandra Vemula +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_inflate.c | 180 +++++++++++++++++++++++++++++------------------ + 1 file changed, 111 insertions(+), 69 deletions(-) + +diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c +index a6070bd..b30cdf6 100644 +--- a/lib/nx_inflate.c ++++ b/lib/nx_inflate.c +@@ -955,13 +955,6 @@ static int nx_reset_dde(nx_streamp s) { + } else { + /* First decompress job */ + fc = GZIP_FC_DECOMPRESS; +- +- /* We use the most recently measured compression ratio +- as a heuristic to estimate the input and output +- sizes. If we give too much input, the target buffer +- overflows and NX cycles are wasted, and then we +- must retry with smaller input size. 1000 is 100% */ +- s->last_comp_ratio = 1000UL; + } + + /* clear then copy fc to the crb */ +@@ -1051,15 +1044,24 @@ static int nx_init_dde(nx_streamp s) { + /** \brief Append input data to DDE + * + * @param s nx_streamp to be processed. ++ * @param source_sz_expected The total amount of bytes expected as input. It ++ * does not include dictionary or history. + * + * @return The total amount of bytes appended to DDE + */ +-static uint32_t nx_set_dde_in(nx_streamp s) { ++static uint32_t nx_set_dde_in(nx_streamp s, uint32_t source_sz_expected) { ++ uint32_t tmp = 0; ++ + /* Buffered user input is next */ +- if (s->fifo_in != NULL) +- nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in); +- /* Then current user input. */ +- nx_append_dde(s->ddl_in, s->next_in, s->avail_in); ++ if (s->fifo_in != NULL) { ++ tmp = NX_MIN(s->used_in, source_sz_expected); ++ nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, tmp); ++ } ++ if (tmp < source_sz_expected) { ++ tmp = NX_MIN(s->avail_in, source_sz_expected - tmp); ++ /* Then current user input. */ ++ nx_append_dde(s->ddl_in, s->next_in, tmp); ++ } + /* Total bytes going in to engine. */ + return getp32(s->ddl_in, ddebc); + } +@@ -1067,20 +1069,32 @@ static uint32_t nx_set_dde_in(nx_streamp s) { + /** \brief Append output data to DDE + * + * @param s nx_streamp to be processed. ++ * @param target_sz_expected The total amount of bytes expected as output. + * + * @return The total amount of bytes appended to DDE + */ +-static uint32_t nx_set_dde_out(nx_streamp s) { ++static uint32_t nx_set_dde_out(nx_streamp s, uint32_t target_sz_expected) { ++ uint32_t tmp; ++ uint32_t ret; ++ ++ ret = NX_MIN(s->avail_out, target_sz_expected); ++ + /* Decompress to user buffer first. */ +- nx_append_dde(s->ddl_out, s->next_out, s->avail_out); ++ nx_append_dde(s->ddl_out, s->next_out, ret); ++ ++ if (ret < target_sz_expected) { ++ tmp = NX_MIN(s->len_out - s->cur_out, ++ target_sz_expected - ret); ++ ++ /* Overflow to fifo_out. ++ used_out == 0 required by definition. */ ++ ASSERT(s->used_out == 0); ++ nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, tmp); + +- /* Overflow to fifo_out. +- used_out == 0 required by definition. */ +- ASSERT(s->used_out == 0); +- nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, +- s->len_out - s->cur_out); ++ ret += tmp; ++ } + +- return s->avail_out + s->len_out - s->cur_out; ++ return ret; + } + + /** \brief Internal implementation of inflate. +@@ -1094,7 +1108,7 @@ static int nx_inflate_(nx_streamp s, int flush) + * + * Total amount of bytes sent to the NX to be used as input, + * i.e. sum of the bytes in next_in and fifo_in. */ +- uint32_t source_sz; ++ uint32_t source_sz = 0; + + /** \brief Sum of the bytes that may be used by NX as output + * +@@ -1242,46 +1256,69 @@ static int nx_inflate_(nx_streamp s, int flush) + /* NX decompresses input data */ + + fc = nx_reset_dde(s); +- nx_history_len = nx_init_dde(s); + +- /* +- * NX source buffers +- */ +- source_sz = nx_set_dde_in(s); +- ASSERT(source_sz > nx_history_len); ++ /** Estimate the amount of data sent to the NX. Ideally, we want ++ * exactly the history size amount of 32 KiB to overflow in to fifo_out ++ * in order to minimize copies of memory. ++ * If overflow is less than 32 KiB, the history spans next_out and ++ * fifo_out and must be copied in to fifo_out to setup history for the ++ * next job. The fifo_out fraction is also copied back to user's ++ * next_out before the next job. ++ * If overflow is more, all the overflow must be copied back ++ * to user's next_out before the next job. ++ * If overflow is much more, we may get an ERR_NX_TARGET_SPACE, forcing ++ * us to reduce the source before trying again. A retry in this case ++ * will probably require NX to process much more than 32 KiB, which ++ * requires more time than copying 32 KiB of data. ++ * ++ * With that said, we want to minimize unecessary work (i.e. memcpy ++ * and retrying NX jobs) for performance. Therefore, the heuristic ++ * here will estimate the source size for the desired target size, but ++ * it prioritizes avoiding ERR_NX_TARGET_SPACE. */ + +- /* +- * NX target buffers +- */ +- target_sz = nx_set_dde_out(s); +- +- /* We want exactly the History size amount of 32KB to overflow +- in to fifo_out. If overflow is less, the history spans +- next_out and fifo_out and must be copied in to fifo_out to +- setup history for the next job, and the fifo_out fraction is +- also copied back to user's next_out before the next job. +- If overflow is more, all the overflow must be copied back +- to user's next_out before the next job. We want to minimize +- these copies (memcpy) for performance. Therefore, the +- heuristic here will estimate the source size for the +- desired target size */ + uint32_t len_next_out = s->avail_out; ++ s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L); + +- /* avail_out plus 32 KB history plus a bit of overhead */ ++ /* avail_out plus 32 KiB history plus a bit of overhead */ + target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); +- + target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len); + +- /* e.g. if we want 100KB at the output and if the compression +- ratio is 10% we want 10KB if input */ ++ /** Calculate source_sz_expected based on target_sz_expected and the ++ * last compression ratio, e.g. if we want 100KB at the output and if ++ * the compression ratio is 10% we want 10KB if input */ + source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected + * s->last_comp_ratio + 1000L)/1000UL); + ++ /** After calculating source_sz_expected, try to provide extra ++ * target_sz_expected in order to avoid an ERR_NX_TARGET_SPACE. */ ++ target_sz_expected = NX_MIN(len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2), ++ 4 * inflate_per_job_len); ++ prt_info("%s:%d target_sz_expected %d source_sz_expected %d" ++ " source_sz %d last_comp_ratio %d\n", ++ __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, ++ source_sz, s->last_comp_ratio); ++ ++ ++init_dde: ++ nx_history_len = nx_init_dde(s); ++ ++ /* ++ * NX source buffers ++ */ ++ source_sz = nx_set_dde_in(s, source_sz_expected); ++ ASSERT(source_sz > nx_history_len); ++ ASSERT(source_sz <= source_sz_expected + nx_history_len); + + prt_info("%s:%d target_sz_expected %d source_sz_expected %d" + " source_sz %d last_comp_ratio %d nx_history_len %d\n", + __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, + source_sz, s->last_comp_ratio, nx_history_len); ++ ++ /* ++ * NX target buffers ++ */ ++ target_sz = nx_set_dde_out(s, target_sz_expected); ++ + prt_info("%s:%d len_next_out %d len_out %d cur_out %d" + " used_out %d source_sz %d history_len %d\n", + __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, +@@ -1345,19 +1382,22 @@ static int nx_inflate_(nx_streamp s, int flush) + that is about 2 pages minimum for source and + and 6 pages for target; if the system does not + have 8 free pages then the loop will last forever */ +- source_sz = source_sz - nx_history_len; +- if (source_sz > (2 * INF_MIN_INPUT_LEN)) +- source_sz = (source_sz + 1) / 2; +- else if (source_sz > INF_MIN_INPUT_LEN) +- source_sz = INF_MIN_INPUT_LEN; +- +- /* else if caller gave fewer source bytes, keep it as is */ +- source_sz = source_sz + nx_history_len; +- +- if (target_sz > (2 * INF_MAX_EXPANSION_BYTES)) +- target_sz = (target_sz + 1) / 2; +- else if (target_sz > INF_MAX_EXPANSION_BYTES) +- target_sz = INF_MAX_EXPANSION_BYTES; ++ source_sz_expected = source_sz - nx_history_len; ++ if (source_sz_expected > (2 * INF_MIN_INPUT_LEN)) ++ source_sz_expected ++ = (source_sz_expected + 1) / 2; ++ else if (source_sz_expected > INF_MIN_INPUT_LEN) ++ source_sz_expected = INF_MIN_INPUT_LEN; ++ ++ /* else if caller gave fewer source bytes, keep it as ++ is. */ ++ source_sz = source_sz_expected + nx_history_len; ++ ++ if (target_sz_expected > (2 * INF_MAX_EXPANSION_BYTES)) ++ target_sz_expected ++ = (target_sz_expected + 1) / 2; ++ else if (target_sz_expected > INF_MAX_EXPANSION_BYTES) ++ target_sz_expected = INF_MAX_EXPANSION_BYTES; + + ticks_total = nx_wait_ticks(500, ticks_total, 0); + if (ticks_total > (timeout_pgfaults * nx_get_freq())) { +@@ -1368,7 +1408,8 @@ static int nx_inflate_(nx_streamp s, int flush) + } + else { + prt_warn("ERR_NX_AT_FAULT: more retry\n"); +- goto restart_nx; ++ fc = nx_reset_dde(s); ++ goto init_dde; + } + } + +@@ -1403,18 +1444,17 @@ static int nx_inflate_(nx_streamp s, int flush) + /* Target buffer not large enough; retry smaller input + data; give at least 1 byte. SPBC/TPBC are not valid */ + ASSERT( source_sz > nx_history_len ); +- source_sz = ((source_sz - nx_history_len + 1) / 2) + nx_history_len; ++ source_sz_expected = (source_sz - nx_history_len + 1) / 2; + +- source_sz = source_sz - nx_history_len; + /* reduce large source down to minimum viable; if + source is already small don't change it */ +- if (source_sz > (2 * INF_MIN_INPUT_LEN)) +- source_sz = (source_sz + 1) / 2; +- else if (source_sz > INF_MIN_INPUT_LEN) +- source_sz = INF_MIN_INPUT_LEN; ++ if (source_sz_expected > (2 * INF_MIN_INPUT_LEN)) ++ source_sz_expected = (source_sz_expected + 1) / 2; ++ else if (source_sz_expected > INF_MIN_INPUT_LEN) ++ source_sz_expected = INF_MIN_INPUT_LEN; + + /* else if caller gave fewer source bytes, keep it as is */ +- source_sz = source_sz + nx_history_len; ++ source_sz = source_sz_expected + nx_history_len; + + /* do not change target size because we allocated a + minimum of INF_MAX_EXPANSION_BYTES which should +@@ -1422,8 +1462,10 @@ static int nx_inflate_(nx_streamp s, int flush) + bytes */ + + prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data" +- " src %d hist %d\n", source_sz, nx_history_len); +- goto restart_nx; ++ " source_sz_expected %d nx_history_len %d\n", ++ source_sz_expected, nx_history_len); ++ fc = nx_reset_dde(s); ++ goto init_dde; + + case ERR_NX_OK: + diff --git a/pr155.patch b/pr155.patch new file mode 100644 index 0000000..8f48d58 --- /dev/null +++ b/pr155.patch @@ -0,0 +1,53 @@ +From e639fe97eabe40445ad6c1308125ce8c2687ec6f Mon Sep 17 00:00:00 2001 +From: Tulio Magno Quites Machado Filho +Date: Thu, 7 Apr 2022 16:40:37 -0300 +Subject: [PATCH] Increase the value for max_vas_reuse_count + +While running tests on PowerVM with 1MB files, it has been noticed a +huge delay when processing the 100th stream. +The penalty for reopening the VAS window is still too high for 100 +executions and is being increased to 10000 in order to keep its execution less +than 10% of the time spent executing those 10000 streams. + +Signed-off-by: Tulio Magno Quites Machado Filho +--- + lib/nx_zlib.c | 13 ++++++++++++- + test/nx-zlib.conf | 4 ++-- + 2 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c +index fa7000e..b3737a2 100644 +--- a/lib/nx_zlib.c ++++ b/lib/nx_zlib.c +@@ -982,7 +982,18 @@ void nx_hw_init(void) + nx_config.max_byte_count_current = (1UL<<30); + nx_config.max_source_dde_count = MAX_DDE_COUNT; + nx_config.max_target_dde_count = MAX_DDE_COUNT; +- nx_config.max_vas_reuse_count = 100; ++ /** On PowerVM, reopening a VAS window takes 300x the time used to ++ * process a single stream with the maximum per job length allowed, ++ * causing serious performance issues when max_vas_reuse_count is too ++ * low. In order to avoid these issues, it has been decided the ++ * average impact of reopening a VAS window should be less than 10% of ++ * of the time spent processing max_vas_reuse_count streams at ++ * maximum per job length. That means 3000, which we rounded up. ++ * Notice that setting this number too high may impact the performance ++ * of multithreaded processes with a massive amount of threads. For ++ * those cases, it's recommended to use a lower value in the config ++ * file. */ ++ nx_config.max_vas_reuse_count = 10000; + nx_config.per_job_len = nx_query_job_limits(); /* less than suspend limit */ + nx_config.strm_def_bufsz = (1024 * 1024); /* affect the deflate fifo_out */ + nx_config.soft_copy_threshold = 1024; /* choose memcpy or hwcopy */ +diff --git a/test/nx-zlib.conf b/test/nx-zlib.conf +index 444d966..cb30c36 100644 +--- a/test/nx-zlib.conf ++++ b/test/nx-zlib.conf +@@ -59,5 +59,5 @@ logfile = ./nx.log + #nx_ratio_s = 100 + + # Maximum number of times deflateInit/inflateInit can reuse an already-open VAS +-# window +-# max_vas_reuse_count = 100 ++# window. Default: 10000 ++# max_vas_reuse_count = 10000