import libfabric-1.8.0-2.el8

This commit is contained in:
CentOS Sources 2019-11-05 15:55:22 -05:00 committed by Andrew Lukoshko
parent 847fed366d
commit b327bc1e96
6 changed files with 222 additions and 4 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/libfabric-1.6.2.tar.bz2
SOURCES/libfabric-1.8.0.tar.bz2

View File

@ -1 +1 @@
cb22495577b80d649e9e1307ab6e772e02937215 SOURCES/libfabric-1.6.2.tar.bz2
5ed296b739f50e7c97f9575e9f8b2cfbe3c69ec7 SOURCES/libfabric-1.8.0.tar.bz2

View File

@ -0,0 +1,52 @@
From b091a17b1ec7a5b546c2450bbd24bd26716c2f67 Mon Sep 17 00:00:00 2001
From: Honggang Li <honli@redhat.com>
Date: Sun, 4 Aug 2019 21:26:04 -0400
Subject: [PATCH] Fix segment fault issue for linux container
While run openmpi/mpirun with linux containers, the libfabric failed
with segment fault message.
Signal: Segmentation fault (11)
Signal code: Address not mapped (1)
Failing at address: 0xfffffffffffffff0
[ 0] /lib64/libpthread.so.0(+0x12d80)[0x14feb5d4dd80]
[ 1] /lib64/libfabric.so.1(+0x23cd1)[0x14fea8105cd1]
[ 2] /lib64/libfabric.so.1(+0x18240)[0x14fea80fa240]
[ 3] /lib64/libfabric.so.1(fi_getinfo+0x695)[0x14fea80faea5]
[ 4] /lib64/libfabric.so.1(fi_getinfo+0x4e)[0x14fea80ffe9e]
[ 5] /usr/lib64/openmpi/lib/openmpi/mca_btl_usnic.so(+0xdf4e)[0x14fea8445f4e]
[ 6] /usr/lib64/openmpi/lib/libopen-pal.so.40(mca_btl_base_select+0xed)[0x14feb547815d]
[ 7] /usr/lib64/openmpi/lib/openmpi/mca_bml_r2.so(mca_bml_r2_component_init+0x16)[0x14fea9fab2f6]
[ 8] /usr/lib64/openmpi/lib/libmpi.so.40(mca_bml_base_init+0xa4)[0x14feb5ffef94]
[ 9] /usr/lib64/openmpi/lib/libmpi.so.40(ompi_mpi_init+0x654)[0x14feb5fac474]
[10] /usr/lib64/openmpi/lib/libmpi.so.40(MPI_Init+0x72)[0x14feb5fdc6b2]
[11] /home/mpi/ring[0x4009ad]
[12] /lib64/libc.so.6(__libc_start_main+0xf3)[0x14feb599a813]
[13] /home/mpi/ring[0x4008be]
The 'scandir' function called by 'ofi_mem_init' returned -1 with errno
set to ENOENT.
Fixes: 8ce14923ba67 (core/mem: Obtain a list of available huge pages in system)
Signed-off-by: Honggang Li <honli@redhat.com>
---
src/mem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mem.c b/src/mem.c
index 91836a79c..23617a0a4 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -84,7 +84,7 @@ void ofi_mem_init(void)
num_page_sizes = 1;
}
- while (n--) {
+ while (n-- > 0) {
if (sscanf(pglist[n]->d_name, "hugepages-%zukB", &hpsize) == 1) {
hpsize *= 1024;
if (hpsize != page_sizes[OFI_DEF_HUGEPAGE_SIZE])
--
2.20.1

View File

@ -0,0 +1,40 @@
From 0e3df6e32ba46fda98979e2e4fb1997d17f04b6e Mon Sep 17 00:00:00 2001
From: Honggang Li <honli@redhat.com>
Date: Thu, 4 Jul 2019 03:40:09 -0400
Subject: [PATCH 1/2] Revert "prov/psm2: Fix scalalble endpoint handling in
fi_av_remove()"
This reverts commit 2bb4bcba5a78db20bfc3f3505763e1a3b03dd353.
---
prov/psm2/src/psmx2_av.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c
index bb4fd615c..2dabf93bb 100644
--- a/prov/psm2/src/psmx2_av.c
+++ b/prov/psm2/src/psmx2_av.c
@@ -678,19 +678,16 @@ STATIC int psmx2_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
av_priv->conn_info[j].epaddrs[idx] = NULL;
}
} else {
- if (!av_priv->sep_info[idx].epids)
- continue;
-
for (j = 0; j < av_priv->max_trx_ctxt; j++) {
if (!av_priv->conn_info[j].trx_ctxt)
continue;
- if (!av_priv->conn_info[j].sepaddrs[idx])
+ if (!av_priv->conn_info[j].sepaddrs)
continue;
for (k = 0; k < av_priv->sep_info[idx].ctxt_cnt; k++) {
err = psmx2_av_disconnect_addr(
- j, av_priv->sep_info[idx].epids[k],
+ j, av_priv->table[idx].epid,
av_priv->conn_info[j].sepaddrs[idx][k]);
if (!err)
av_priv->conn_info[j].sepaddrs[idx][k] = NULL;
--
2.20.1

View File

@ -0,0 +1,112 @@
From 8bafb0be08d93743db66398471723fe49983df1b Mon Sep 17 00:00:00 2001
From: Honggang Li <honli@redhat.com>
Date: Thu, 4 Jul 2019 03:48:39 -0400
Subject: [PATCH 2/2] Revert "prov/psm2: Clean up connection state in
fi_av_remove"
This reverts commit 5b892bd43c5a824d1e5709c3c1f686e48ee4e373.
Conflicts:
prov/psm2/src/psmx2_av.c
Simple context conflict.
---
prov/psm2/src/psmx2_av.c | 78 ----------------------------------------
1 file changed, 78 deletions(-)
diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c
index 2dabf93bb..aaa4624e4 100644
--- a/prov/psm2/src/psmx2_av.c
+++ b/prov/psm2/src/psmx2_av.c
@@ -616,88 +616,10 @@ out:
return ret;
}
-static int psmx2_av_disconnect_addr(int trx_ctxt_id, psm2_epid_t epid,
- psm2_epaddr_t epaddr)
-{
- struct psmx2_epaddr_context *epaddr_context;
- psm2_error_t errors;
- int err;
-
- if (!epaddr)
- return 0;
-
- FI_INFO(&psmx2_prov, FI_LOG_AV,
- "trx_ctxt_id %d epid %lx epaddr %p\n", trx_ctxt_id, epid, epaddr);
-
- epaddr_context = psm2_epaddr_getctxt(epaddr);
- if (!epaddr_context)
- return -FI_EINVAL;
-
- if (trx_ctxt_id != epaddr_context->trx_ctxt->id)
- return -FI_EINVAL;
-
- if (epid != epaddr_context->epid)
- return -FI_EINVAL;
-
- err = psm2_ep_disconnect2(epaddr_context->trx_ctxt->psm2_ep, 1, &epaddr,
- NULL, &errors, PSM2_EP_DISCONNECT_FORCE, 0);
-
- return psmx2_errno(err);
-}
-
DIRECT_FN
STATIC int psmx2_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
uint64_t flags)
{
- struct psmx2_fid_av *av_priv;
- int idx, i, j, k;
- int err;
-
- av_priv = container_of(av, struct psmx2_fid_av, av);
-
- av_priv->domain->av_lock_fn(&av_priv->lock, 1);
-
- for (i = 0; i < count; i++) {
- idx = PSMX2_ADDR_IDX(fi_addr[i]);
- if (idx >= av_priv->hdr->last) {
- FI_WARN(&psmx2_prov, FI_LOG_AV,
- "AV index out of range: fi_addr %lx idx %d last %ld\n",
- fi_addr[i], idx, av_priv->hdr->last);
- continue;
- }
-
- if (av_priv->table[idx].type == PSMX2_EP_REGULAR) {
- for (j = 0; j < av_priv->max_trx_ctxt; j++) {
- if (!av_priv->conn_info[j].trx_ctxt)
- continue;
-
- err = psmx2_av_disconnect_addr(
- j, av_priv->table[idx].epid,
- av_priv->conn_info[j].epaddrs[idx]);
- if (!err)
- av_priv->conn_info[j].epaddrs[idx] = NULL;
- }
- } else {
- for (j = 0; j < av_priv->max_trx_ctxt; j++) {
- if (!av_priv->conn_info[j].trx_ctxt)
- continue;
-
- if (!av_priv->conn_info[j].sepaddrs)
- continue;
-
- for (k = 0; k < av_priv->sep_info[idx].ctxt_cnt; k++) {
- err = psmx2_av_disconnect_addr(
- j, av_priv->table[idx].epid,
- av_priv->conn_info[j].sepaddrs[idx][k]);
- if (!err)
- av_priv->conn_info[j].sepaddrs[idx][k] = NULL;
- }
- }
- }
- }
-
- av_priv->domain->av_unlock_fn(&av_priv->lock, 1);
-
return 0;
}
--
2.20.1

View File

@ -1,11 +1,14 @@
Name: libfabric
Version: 1.6.2
Release: 1%{?dist}
Version: 1.8.0
Release: 2%{?dist}
Summary: Open Fabric Interfaces
License: BSD or GPLv2
URL: http://ofiwg.github.io/libfabric/
Source0: https://github.com/ofiwg/libfabric/releases/download/v%{version}/libfabric-%{version}.tar.bz2
Patch1: 0001-Revert-prov-psm2-Fix-scalalble-endpoint-handling-in-.patch
Patch2: 0002-Revert-prov-psm2-Clean-up-connection-state-in-fi_av_.patch
Patch3: 0001-Fix-segment-fault-issue-for-linux-container.patch
BuildRequires: libnl3-devel
# RDMA not available on 32-bit ARM: #1484155
@ -46,6 +49,9 @@ developing applications that use %{name}.
%prep
%setup -q
%patch1 -p1
%patch2 -p1
%patch3 -p1
%build
%configure --disable-static --disable-silent-rules
@ -83,6 +89,14 @@ find %{buildroot} -name '*.la' -exec rm -f {} ';'
%changelog
* Wed Aug 14 2019 Honggang Li <honli@redhat.com> - 1.8.0-2
- Fix segment fault issue for linux container
- Resolves: bz1731749
* Fri Jul 12 2019 Honggang Li <honli@redhat.com> - 1.8.0-1
- Rebase to upstream release v1.8.0
- Resolves: bz1660621
* Mon Dec 10 2018 Honggang Li <honli@redhat.com> - 1.6.2-1
- Rebase to upstream release v1.6.2
- Resolves: bz1654870