Add upstream patch to fix race/hang on 32bit machines
This commit is contained in:
parent
f830453701
commit
3923dafb55
103
openmpi-vader.patch
Normal file
103
openmpi-vader.patch
Normal file
@ -0,0 +1,103 @@
|
||||
diff -up openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_endpoint.h.vader openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_endpoint.h
|
||||
--- openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_endpoint.h.vader 2015-03-24 19:21:54.000000000 -0600
|
||||
+++ openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_endpoint.h 2015-03-30 16:35:41.411110838 -0600
|
||||
@@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
- * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
+ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@@ -48,14 +48,16 @@ typedef struct mca_btl_base_endpoint_t {
|
||||
/* per peer buffers */
|
||||
struct {
|
||||
unsigned char *buffer; /**< starting address of peer's fast box out */
|
||||
- unsigned int start, seq;
|
||||
uint32_t *startp;
|
||||
+ unsigned int start;
|
||||
+ uint16_t seq;
|
||||
} fbox_in;
|
||||
|
||||
struct {
|
||||
unsigned char *buffer; /**< starting address of peer's fast box in */
|
||||
- unsigned int start, end, seq;
|
||||
uint32_t *startp; /**< pointer to location storing start offset */
|
||||
+ unsigned int start, end;
|
||||
+ uint16_t seq;
|
||||
} fbox_out;
|
||||
|
||||
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
|
||||
diff -up openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_fbox.h.vader openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_fbox.h
|
||||
--- openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_fbox.h.vader 2015-03-24 19:21:54.000000000 -0600
|
||||
+++ openmpi-v1.8.4-134-g9ad2aa8/ompi/mca/btl/vader/btl_vader_fbox.h 2015-03-30 16:35:41.412110833 -0600
|
||||
@@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
- * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
+ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@@ -18,9 +18,16 @@
|
||||
|
||||
typedef union mca_btl_vader_fbox_hdr_t {
|
||||
struct {
|
||||
+ /* NTH: on 32-bit platforms loading/unloading the header may be completed
|
||||
+ * in multiple instructions. To ensure that seq is never loaded before tag
|
||||
+ * and the tag is never read before seq put them in the same 32-bits of the
|
||||
+ * header. */
|
||||
+ /** message tag */
|
||||
uint16_t tag;
|
||||
- uint16_t size;
|
||||
- uint32_t seq;
|
||||
+ /** sequence number */
|
||||
+ uint16_t seq;
|
||||
+ /** message size */
|
||||
+ uint32_t size;
|
||||
} data;
|
||||
uint64_t ival;
|
||||
} mca_btl_vader_fbox_hdr_t;
|
||||
@@ -40,6 +47,13 @@ typedef union mca_btl_vader_fbox_hdr_t {
|
||||
|
||||
void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep);
|
||||
|
||||
+static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag,
|
||||
+ uint16_t seq, uint32_t size)
|
||||
+{
|
||||
+ mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}};
|
||||
+ hdr->ival = tmp.ival;
|
||||
+}
|
||||
+
|
||||
/* attempt to reserve a contiguous segment from the remote ep */
|
||||
static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t *ep, size_t size)
|
||||
{
|
||||
@@ -88,12 +102,10 @@ static inline unsigned char *mca_btl_vad
|
||||
/* if this is the end of the buffer and the fragment doesn't fit then mark the remaining buffer space to
|
||||
* be skipped and check if the fragment can be written at the beginning of the buffer. */
|
||||
if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
|
||||
- mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = buffer_free - sizeof (mca_btl_vader_fbox_hdr_t),
|
||||
- .seq = ep->fbox_out.seq++, .tag = 0xff}};
|
||||
-
|
||||
BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning"));
|
||||
|
||||
- MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival;
|
||||
+ mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0xff, ep->fbox_out.seq++,
|
||||
+ buffer_free - sizeof (mca_btl_vader_fbox_hdr_t));
|
||||
|
||||
end = MCA_BTL_VADER_FBOX_ALIGNMENT;
|
||||
/* toggle the high bit */
|
||||
@@ -114,11 +126,7 @@ static inline unsigned char *mca_btl_vad
|
||||
(unsigned int) size, end, start, end, hbs, buffer_free));
|
||||
|
||||
/* write out part of the header now. the tag will be written when the data is available */
|
||||
- {
|
||||
- mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = data_size, .tag = 0, .seq = ep->fbox_out.seq++}};
|
||||
-
|
||||
- MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival;
|
||||
- }
|
||||
+ mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0, ep->fbox_out.seq++, data_size);
|
||||
|
||||
end += size;
|
||||
|
@ -40,6 +40,9 @@ Patch1: openmpi-ltdl.patch
|
||||
# Fix typo in liboshmem name
|
||||
# https://github.com/open-mpi/ompi/pull/221
|
||||
Patch2: openmpi-oshmem.patch
|
||||
# Upstream patch to fix race/hang on 32-bit
|
||||
# https://github.com/open-mpi/ompi/pull/503
|
||||
Patch3: openmpi-vader.patch
|
||||
|
||||
BuildRequires: gcc-gfortran
|
||||
#sparc64 don't have valgrind
|
||||
@ -120,6 +123,7 @@ Contains development wrapper for compiling Java with openmpi.
|
||||
%patch0 -p1 -b .atomic
|
||||
%patch1 -p1 -b .ltdl
|
||||
%patch2 -p1 -b .oshmem
|
||||
%patch3 -p1 -b .vader
|
||||
# Make sure we don't use the local libltdl library
|
||||
rm -r opal/libltdl
|
||||
|
||||
@ -244,6 +248,9 @@ make check
|
||||
|
||||
|
||||
%changelog
|
||||
* Mon Mar 30 2015 Orion Poplawski <orion@cora.nwra.com> 1.8.4-7.20150324gitg9ad2aa8
|
||||
- Add upstream patch to fix race/hang on 32bit machines
|
||||
|
||||
* Fri Mar 27 2015 Orion Poplawski <orion@cora.nwra.com> 1.8.4-6.20150324gitg9ad2aa8
|
||||
- Update to latest 1.8.4 snapshot
|
||||
- Add upstream patch to fix atomics on 32bit
|
||||
|
Loading…
Reference in New Issue
Block a user