From b327bc1e96f619371643e579100c21f74c8ced9f Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 5 Nov 2019 15:55:22 -0500 Subject: [PATCH] import libfabric-1.8.0-2.el8 --- .gitignore | 2 +- .libfabric.metadata | 2 +- ...ment-fault-issue-for-linux-container.patch | 52 ++++++++ ...-Fix-scalalble-endpoint-handling-in-.patch | 40 +++++++ ...-Clean-up-connection-state-in-fi_av_.patch | 112 ++++++++++++++++++ SPECS/libfabric.spec | 18 ++- 6 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 SOURCES/0001-Fix-segment-fault-issue-for-linux-container.patch create mode 100644 SOURCES/0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch create mode 100644 SOURCES/0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch diff --git a/.gitignore b/.gitignore index a4a9ef3..4818fab 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/libfabric-1.6.2.tar.bz2 +SOURCES/libfabric-1.8.0.tar.bz2 diff --git a/.libfabric.metadata b/.libfabric.metadata index 8558b4f..275f75d 100644 --- a/.libfabric.metadata +++ b/.libfabric.metadata @@ -1 +1 @@ -cb22495577b80d649e9e1307ab6e772e02937215 SOURCES/libfabric-1.6.2.tar.bz2 +5ed296b739f50e7c97f9575e9f8b2cfbe3c69ec7 SOURCES/libfabric-1.8.0.tar.bz2 diff --git a/SOURCES/0001-Fix-segment-fault-issue-for-linux-container.patch b/SOURCES/0001-Fix-segment-fault-issue-for-linux-container.patch new file mode 100644 index 0000000..688d39c --- /dev/null +++ b/SOURCES/0001-Fix-segment-fault-issue-for-linux-container.patch @@ -0,0 +1,52 @@ +From b091a17b1ec7a5b546c2450bbd24bd26716c2f67 Mon Sep 17 00:00:00 2001 +From: Honggang Li +Date: Sun, 4 Aug 2019 21:26:04 -0400 +Subject: [PATCH] Fix segment fault issue for linux container + +While run openmpi/mpirun with linux containers, the libfabric failed +with segment fault message. + + Signal: Segmentation fault (11) + Signal code: Address not mapped (1) + Failing at address: 0xfffffffffffffff0 + [ 0] /lib64/libpthread.so.0(+0x12d80)[0x14feb5d4dd80] + [ 1] /lib64/libfabric.so.1(+0x23cd1)[0x14fea8105cd1] + [ 2] /lib64/libfabric.so.1(+0x18240)[0x14fea80fa240] + [ 3] /lib64/libfabric.so.1(fi_getinfo+0x695)[0x14fea80faea5] + [ 4] /lib64/libfabric.so.1(fi_getinfo+0x4e)[0x14fea80ffe9e] + [ 5] /usr/lib64/openmpi/lib/openmpi/mca_btl_usnic.so(+0xdf4e)[0x14fea8445f4e] + [ 6] /usr/lib64/openmpi/lib/libopen-pal.so.40(mca_btl_base_select+0xed)[0x14feb547815d] + [ 7] /usr/lib64/openmpi/lib/openmpi/mca_bml_r2.so(mca_bml_r2_component_init+0x16)[0x14fea9fab2f6] + [ 8] /usr/lib64/openmpi/lib/libmpi.so.40(mca_bml_base_init+0xa4)[0x14feb5ffef94] + [ 9] /usr/lib64/openmpi/lib/libmpi.so.40(ompi_mpi_init+0x654)[0x14feb5fac474] + [10] /usr/lib64/openmpi/lib/libmpi.so.40(MPI_Init+0x72)[0x14feb5fdc6b2] + [11] /home/mpi/ring[0x4009ad] + [12] /lib64/libc.so.6(__libc_start_main+0xf3)[0x14feb599a813] + [13] /home/mpi/ring[0x4008be] + +The 'scandir' function called by 'ofi_mem_init' returned -1 with errno +set to ENOENT. + +Fixes: 8ce14923ba67 (core/mem: Obtain a list of available huge pages in system) + +Signed-off-by: Honggang Li +--- + src/mem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/mem.c b/src/mem.c +index 91836a79c..23617a0a4 100644 +--- a/src/mem.c ++++ b/src/mem.c +@@ -84,7 +84,7 @@ void ofi_mem_init(void) + num_page_sizes = 1; + } + +- while (n--) { ++ while (n-- > 0) { + if (sscanf(pglist[n]->d_name, "hugepages-%zukB", &hpsize) == 1) { + hpsize *= 1024; + if (hpsize != page_sizes[OFI_DEF_HUGEPAGE_SIZE]) +-- +2.20.1 + diff --git a/SOURCES/0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch b/SOURCES/0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch new file mode 100644 index 0000000..d768f95 --- /dev/null +++ b/SOURCES/0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch @@ -0,0 +1,40 @@ +From 0e3df6e32ba46fda98979e2e4fb1997d17f04b6e Mon Sep 17 00:00:00 2001 +From: Honggang Li +Date: Thu, 4 Jul 2019 03:40:09 -0400 +Subject: [PATCH 1/2] Revert "prov/psm2: Fix scalalble endpoint handling in + fi_av_remove()" + +This reverts commit 2bb4bcba5a78db20bfc3f3505763e1a3b03dd353. +--- + prov/psm2/src/psmx2_av.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c +index bb4fd615c..2dabf93bb 100644 +--- a/prov/psm2/src/psmx2_av.c ++++ b/prov/psm2/src/psmx2_av.c +@@ -678,19 +678,16 @@ STATIC int psmx2_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, + av_priv->conn_info[j].epaddrs[idx] = NULL; + } + } else { +- if (!av_priv->sep_info[idx].epids) +- continue; +- + for (j = 0; j < av_priv->max_trx_ctxt; j++) { + if (!av_priv->conn_info[j].trx_ctxt) + continue; + +- if (!av_priv->conn_info[j].sepaddrs[idx]) ++ if (!av_priv->conn_info[j].sepaddrs) + continue; + + for (k = 0; k < av_priv->sep_info[idx].ctxt_cnt; k++) { + err = psmx2_av_disconnect_addr( +- j, av_priv->sep_info[idx].epids[k], ++ j, av_priv->table[idx].epid, + av_priv->conn_info[j].sepaddrs[idx][k]); + if (!err) + av_priv->conn_info[j].sepaddrs[idx][k] = NULL; +-- +2.20.1 + diff --git a/SOURCES/0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch b/SOURCES/0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch new file mode 100644 index 0000000..0119275 --- /dev/null +++ b/SOURCES/0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch @@ -0,0 +1,112 @@ +From 8bafb0be08d93743db66398471723fe49983df1b Mon Sep 17 00:00:00 2001 +From: Honggang Li +Date: Thu, 4 Jul 2019 03:48:39 -0400 +Subject: [PATCH 2/2] Revert "prov/psm2: Clean up connection state in + fi_av_remove" + +This reverts commit 5b892bd43c5a824d1e5709c3c1f686e48ee4e373. + + Conflicts: + prov/psm2/src/psmx2_av.c + +Simple context conflict. +--- + prov/psm2/src/psmx2_av.c | 78 ---------------------------------------- + 1 file changed, 78 deletions(-) + +diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c +index 2dabf93bb..aaa4624e4 100644 +--- a/prov/psm2/src/psmx2_av.c ++++ b/prov/psm2/src/psmx2_av.c +@@ -616,88 +616,10 @@ out: + return ret; + } + +-static int psmx2_av_disconnect_addr(int trx_ctxt_id, psm2_epid_t epid, +- psm2_epaddr_t epaddr) +-{ +- struct psmx2_epaddr_context *epaddr_context; +- psm2_error_t errors; +- int err; +- +- if (!epaddr) +- return 0; +- +- FI_INFO(&psmx2_prov, FI_LOG_AV, +- "trx_ctxt_id %d epid %lx epaddr %p\n", trx_ctxt_id, epid, epaddr); +- +- epaddr_context = psm2_epaddr_getctxt(epaddr); +- if (!epaddr_context) +- return -FI_EINVAL; +- +- if (trx_ctxt_id != epaddr_context->trx_ctxt->id) +- return -FI_EINVAL; +- +- if (epid != epaddr_context->epid) +- return -FI_EINVAL; +- +- err = psm2_ep_disconnect2(epaddr_context->trx_ctxt->psm2_ep, 1, &epaddr, +- NULL, &errors, PSM2_EP_DISCONNECT_FORCE, 0); +- +- return psmx2_errno(err); +-} +- + DIRECT_FN + STATIC int psmx2_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, + uint64_t flags) + { +- struct psmx2_fid_av *av_priv; +- int idx, i, j, k; +- int err; +- +- av_priv = container_of(av, struct psmx2_fid_av, av); +- +- av_priv->domain->av_lock_fn(&av_priv->lock, 1); +- +- for (i = 0; i < count; i++) { +- idx = PSMX2_ADDR_IDX(fi_addr[i]); +- if (idx >= av_priv->hdr->last) { +- FI_WARN(&psmx2_prov, FI_LOG_AV, +- "AV index out of range: fi_addr %lx idx %d last %ld\n", +- fi_addr[i], idx, av_priv->hdr->last); +- continue; +- } +- +- if (av_priv->table[idx].type == PSMX2_EP_REGULAR) { +- for (j = 0; j < av_priv->max_trx_ctxt; j++) { +- if (!av_priv->conn_info[j].trx_ctxt) +- continue; +- +- err = psmx2_av_disconnect_addr( +- j, av_priv->table[idx].epid, +- av_priv->conn_info[j].epaddrs[idx]); +- if (!err) +- av_priv->conn_info[j].epaddrs[idx] = NULL; +- } +- } else { +- for (j = 0; j < av_priv->max_trx_ctxt; j++) { +- if (!av_priv->conn_info[j].trx_ctxt) +- continue; +- +- if (!av_priv->conn_info[j].sepaddrs) +- continue; +- +- for (k = 0; k < av_priv->sep_info[idx].ctxt_cnt; k++) { +- err = psmx2_av_disconnect_addr( +- j, av_priv->table[idx].epid, +- av_priv->conn_info[j].sepaddrs[idx][k]); +- if (!err) +- av_priv->conn_info[j].sepaddrs[idx][k] = NULL; +- } +- } +- } +- } +- +- av_priv->domain->av_unlock_fn(&av_priv->lock, 1); +- + return 0; + } + +-- +2.20.1 + diff --git a/SPECS/libfabric.spec b/SPECS/libfabric.spec index bb5c443..b95c6a6 100644 --- a/SPECS/libfabric.spec +++ b/SPECS/libfabric.spec @@ -1,11 +1,14 @@ Name: libfabric -Version: 1.6.2 -Release: 1%{?dist} +Version: 1.8.0 +Release: 2%{?dist} Summary: Open Fabric Interfaces License: BSD or GPLv2 URL: http://ofiwg.github.io/libfabric/ Source0: https://github.com/ofiwg/libfabric/releases/download/v%{version}/libfabric-%{version}.tar.bz2 +Patch1: 0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch +Patch2: 0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch +Patch3: 0001-Fix-segment-fault-issue-for-linux-container.patch BuildRequires: libnl3-devel # RDMA not available on 32-bit ARM: #1484155 @@ -46,6 +49,9 @@ developing applications that use %{name}. %prep %setup -q +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 %build %configure --disable-static --disable-silent-rules @@ -83,6 +89,14 @@ find %{buildroot} -name '*.la' -exec rm -f {} ';' %changelog +* Wed Aug 14 2019 Honggang Li - 1.8.0-2 +- Fix segment fault issue for linux container +- Resolves: bz1731749 + +* Fri Jul 12 2019 Honggang Li - 1.8.0-1 +- Rebase to upstream release v1.8.0 +- Resolves: bz1660621 + * Mon Dec 10 2018 Honggang Li - 1.6.2-1 - Rebase to upstream release v1.6.2 - Resolves: bz1654870