Enable AVX2 on x86_64

Enable NEON on aarch64
Clean up precision list
Fix for OpenMPI build with < 4 processors
Fix building with no enabled MPI types
Enable single precision Altivec on PPC
Enable CNTVCT_EL0 cycle counter support on aarch64

Signed-off-by: David Cantrell <dcantrell@redhat.com>
This commit is contained in:
David Cantrell 2023-03-27 10:31:36 -04:00
parent 424a4518bf
commit eed891dc9d

102
fftw.spec
View File

@ -13,7 +13,7 @@
Name: fftw Name: fftw
Version: 3.3.10 Version: 3.3.10
Release: 5%{?dist} Release: 6%{?dist}
Summary: A Fast Fourier Transform library Summary: A Fast Fourier Transform library
License: GPLv2+ License: GPLv2+
URL: http://www.fftw.org URL: http://www.fftw.org
@ -28,6 +28,16 @@ BuildRequires: gcc-gfortran
%global quad 1 %global quad 1
%endif %endif
# Names of precisions to (maybe) build
%global prec_names prec_name[0]=single;prec_name[1]=double;prec_name[2]=long;prec_name[3]=quad
# Number of precisions to build; sometimes quad is not possible
%global nprec 3
%if %{quad}
%global nprec 4
%endif
# Number of precisions to build for MPI
%global nmpiprec 3
# For check phase # For check phase
BuildRequires: time BuildRequires: time
BuildRequires: perl-interpreter BuildRequires: perl-interpreter
@ -278,10 +288,7 @@ BASEFLAGS="--enable-shared --disable-dependency-tracking --enable-threads"
BASEFLAGS+=" --enable-openmp" BASEFLAGS+=" --enable-openmp"
# Precisions to build # Precisions to build
prec_name[0]=single %prec_names
prec_name[1]=double
prec_name[2]=long
prec_name[3]=quad
# Corresponding flags # Corresponding flags
prec_flags[0]=--enable-single prec_flags[0]=--enable-single
@ -292,31 +299,27 @@ prec_flags[3]=--enable-quad-precision
%ifarch x86_64 %ifarch x86_64
# Enable SSE2 and AVX support for x86_64 # Enable SSE2 and AVX support for x86_64
for ((i=0; i<2; i++)) ; do for ((i=0; i<2; i++)) ; do
prec_flags[i]+=" --enable-sse2 --enable-avx" prec_flags[i]+=" --enable-sse2 --enable-avx --enable-avx2"
done done
%endif %endif
# No NEON run time detection, not all ARM SoCs have NEON %ifarch %{arm64}
#%ifarch %{arm} # Compile support for NEON instructions
## Compile support for NEON instructions for ((i=0; i<2; i++)) ; do
#for ((i=0; i<2; i++)) ; do prec_flags[i]+=" --enable-neon"
# prec_flags[i]+=" --enable-neon" done
#done BASEFLAGS+=" --enable-armv8-cntvct-el0"
#%endif %endif
#%ifarch ppc ppc64 %ifarch ppc ppc64
## Compile support for Altivec instructions # Compile support for Altivec instructions; only supported for single precision
#for ((i=0; i<2; i++)) ; do for ((i=0; i<1; i++)) ; do
# prec_flags[i]+=" --enable-altivec" prec_flags[i]+=" --enable-altivec"
#done done
#%endif %endif
# Loop over precisions # Loop over precisions
%if %{quad} for ((iprec=0; iprec<%{nprec}; iprec++)) ; do
for ((iprec=0; iprec<4; iprec++)) ; do
%else
for ((iprec=0; iprec<3; iprec++)) ; do
%endif
mkdir ${prec_name[iprec]}${ver_name[iver]} mkdir ${prec_name[iprec]}${ver_name[iver]}
cd ${prec_name[iprec]}${ver_name[iver]} cd ${prec_name[iprec]}${ver_name[iver]}
ln -s ../configure . ln -s ../configure .
@ -328,16 +331,19 @@ for ((iprec=0; iprec<3; iprec++)) ; do
done done
# MPI Builds - this duplicates the non-mpi builds, but oh well # MPI Builds - this duplicates the non-mpi builds, but oh well
for mpi in %{mpi_list} ; do for mpi in %{?mpi_list} ; do
module load mpi/${mpi}-%{_arch} module load mpi/${mpi}-%{_arch}
# Loop over precisions - no quad precision support with MPI # Loop over precisions - no quad precision support with MPI
for((iprec=0;iprec<3;iprec++)) ; do for((iprec=0;iprec<%{nmpiprec};iprec++)) ; do
mkdir ${mpi}-${prec_name[iprec]}${ver_name[iver]} mkdir ${mpi}-${prec_name[iprec]}${ver_name[iver]}
cd ${mpi}-${prec_name[iprec]}${ver_name[iver]} cd ${mpi}-${prec_name[iprec]}${ver_name[iver]}
ln -s ../configure . ln -s ../configure .
# Force linking the _mpi.so libraries with the mpi libs. This works because # Force linking the _mpi.so libraries with the mpi libs. This works because
# we get rid of all of the non-mpi components of these builds # we get rid of all of the non-mpi components of these builds
export CC=mpicc export CC=mpicc
if [ $mpi = "openmpi" ]; then
export MPIRUN="mpirun --oversubscribe"
fi
%{configure} ${BASEFLAGS} ${prec_flags[iprec]} \ %{configure} ${BASEFLAGS} ${prec_flags[iprec]} \
--enable-mpi \ --enable-mpi \
--libdir=%{_libdir}/$mpi/lib \ --libdir=%{_libdir}/$mpi/lib \
@ -352,23 +358,21 @@ for mpi in %{mpi_list} ; do
done done
%install %install
%prec_names
# Explicitly load shell support for the environment-modules package, used # Explicitly load shell support for the environment-modules package, used
# below via 'module' pseudo-command. # below via 'module' pseudo-command.
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
%if %{quad} for((iprec=0;iprec<%{nprec};iprec++)) ; do
for ver in single double long quad ; do %make_install -C ${prec_name[iprec]}
%else
for ver in single double long ; do
%endif
%make_install -C $ver
done done
# MPI # MPI
for mpi in %{mpi_list} ; do for mpi in %{?mpi_list} ; do
module load mpi/${mpi}-%{_arch} module load mpi/${mpi}-%{_arch}
for ver in single double long ; do for((iprec=0;iprec<%{nmpiprec};iprec++)) ; do
%make_install -C ${mpi}-${ver} %make_install -C ${mpi}-${prec_name[iprec]}
# Remove duplicated non-mpi libraries, binaries, and data # Remove duplicated non-mpi libraries, binaries, and data
find %{buildroot}%{_libdir}/${mpi}/lib -name libfftw\* -a \! -name \*_mpi.\* -delete find %{buildroot}%{_libdir}/${mpi}/lib -name libfftw\* -a \! -name \*_mpi.\* -delete
rm -r %{buildroot}%{_libdir}/${mpi}/{bin,share} rm -r %{buildroot}%{_libdir}/${mpi}/{bin,share}
@ -380,28 +384,25 @@ rm -f %{buildroot}%{_infodir}/dir
find %{buildroot} -name \*.la -delete find %{buildroot} -name \*.la -delete
%check %check
%prec_names
# Explicitly load shell support for the environment-modules package, used # Explicitly load shell support for the environment-modules package, used
# below via 'module' pseudo-command. # below via 'module' pseudo-command.
. /etc/profile.d/modules.sh . /etc/profile.d/modules.sh
bdir=$(pwd) bdir=$(pwd)
%if %{quad} for((iprec=0;iprec<%{nprec};iprec++)) ; do
for ver in single double long quad ; do export LD_LIBRARY_PATH=$bdir/${prec_name[iprec]}/.libs:$bdir/${prec_name[iprec]}/threads/.libs
%else %make_build -C ${prec_name[iprec]} check
for ver in single double long ; do
%endif
export LD_LIBRARY_PATH=$bdir/$ver/.libs:$bdir/$ver/threads/.libs
%make_build -C $ver check
done done
# MPI # MPI
# Allow oversubscription with openmpi # Allow oversubscription with openmpi
export OMPI_MCA_rmaps_base_oversubscribe=1 export OMPI_MCA_rmaps_base_oversubscribe=1
for mpi in %{mpi_list} ; do for mpi in %{?mpi_list} ; do
module load mpi/${mpi}-%{_arch} module load mpi/${mpi}-%{_arch}
for ver in single double long ; do for((iprec=0;iprec<%{nmpiprec};iprec++)) ; do
export LD_LIBRARY_PATH=$bdir/$ver/.libs:$bdir/$ver/threads/.libs export LD_LIBRARY_PATH=$bdir/${prec_name[iprec]}/.libs:$bdir/${prec_name[iprec]}/threads/.libs
%make_build -C ${mpi}-${ver}/mpi check %make_build -C ${mpi}-${prec_name[iprec]}/mpi check
done done
module unload mpi/${mpi}-%{_arch} module unload mpi/${mpi}-%{_arch}
done done
@ -525,6 +526,15 @@ done
%endif %endif
%changelog %changelog
* Mon Mar 27 2023 David cantrell <dcantrell@redhat.com> - 3.3.10-6
- Enable AVX2 on x86_64
- Enable NEON on aarch64
- Clean up precision list
- Fix for OpenMPI build with < 4 processors
- Fix building with no enabled MPI types
- Enable single precision Altivec on PPC
- Enable CNTVCT_EL0 cycle counter support on aarch64
* Thu Mar 02 2023 Orion Poplawski <orion@nwra.com> - 3.3.10-5 * Thu Mar 02 2023 Orion Poplawski <orion@nwra.com> - 3.3.10-5
- Use make macros - Use make macros
- Drop openmpi vader workaround - Drop openmpi vader workaround