From ef682559d1acb92b897412647d4468486aa4f0fc Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 12 Feb 2025 09:21:24 +0100 Subject: [PATCH 1/2] Intel x86 asm: Add endbr[64|32] to all symbols Suggested-by: Daiki Ueno Signed-off-by: Stephan Mueller --- CHANGES.md | 2 ++ internal/api/assembler_support.h | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/internal/api/assembler_support.h b/internal/api/assembler_support.h index f90fa4ba..54f33f6f 100644 --- a/internal/api/assembler_support.h +++ b/internal/api/assembler_support.h @@ -20,6 +20,14 @@ #ifndef ASSEMBLER_SUPPORT_H #define ASSEMBLER_SUPPORT_H +#ifdef __x86_64__ +#define LC_ENDBR "endbr64" +#elif defined(__i386__) +#define LC_ENDBR "endbr32" +#else +#define LC_ENDBR +#endif + #ifdef LINUX_KERNEL #include @@ -121,7 +129,8 @@ # define SYM_FUNC_START(name) \ .hidden SYM_FUNC(name) ; \ .global SYM_FUNC(name) ; \ - SYM_FUNC(name): + SYM_FUNC(name): \ + LC_ENDBR # define SYM_FUNC_ENTER(name) -- 2.48.1 From 07cbd3c7f28901f26604b3e8803b99d8ec755fdb Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 12 Feb 2025 09:50:31 +0100 Subject: [PATCH 2/2] Fix gcc build error Clang can handle instructions enclosed with quotation marks, GCC cannot... Signed-off-by: Stephan Mueller --- internal/api/assembler_support.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/api/assembler_support.h b/internal/api/assembler_support.h index 54f33f6f..de333805 100644 --- a/internal/api/assembler_support.h +++ b/internal/api/assembler_support.h @@ -21,9 +21,9 @@ #define ASSEMBLER_SUPPORT_H #ifdef __x86_64__ -#define LC_ENDBR "endbr64" +#define LC_ENDBR endbr64 #elif defined(__i386__) -#define LC_ENDBR "endbr32" +#define LC_ENDBR endbr32 #else #define LC_ENDBR #endif -- 2.48.1 From ef49175f0c2f89df027ca9a40075e290ba77d850 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 6 Feb 2025 08:39:07 +0100 Subject: [PATCH] prevent compiler warnings Signed-off-by: Stephan Mueller --- hash/src/asm/ARMv8_2x/keccakx2_armce.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hash/src/asm/ARMv8_2x/keccakx2_armce.S b/hash/src/asm/ARMv8_2x/keccakx2_armce.S index 7d4772e5..0cf8fddb 100644 --- a/hash/src/asm/ARMv8_2x/keccakx2_armce.S +++ b/hash/src/asm/ARMv8_2x/keccakx2_armce.S @@ -45,8 +45,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wundef" +#ifndef __APPLE__ +#define __APPLE__ 0 +#endif +#ifndef __ARM_FEATURE_SHA3 +#define __ARM_FEATURE_SHA3 0 +#endif #if (__APPLE__ && __ARM_FEATURE_CRYPTO) || (__ARM_FEATURE_SHA3) #include "assembler_support.h" @@ -191,4 +195,3 @@ loop: SYM_FUNC_END(keccak_f1600x2_armce) #endif -#pragma GCC diagnostic pop -- 2.48.1 From 038e226c8a4b024f2feda157989952843292c21c Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Thu, 13 Feb 2025 08:43:35 +0900 Subject: [PATCH] x86: embed .note.gnu.property for Intel CET in assembly files Instead of generating the note section with a linker option, this embeds it in each .S file through a preprocessor macro. The main motivation behind this is to simplify the build process when a shared library is statically linking to leancrypto. As a bonus, this mechanism could be used to support PACBTI on AArch64 in a future extension. Signed-off-by: Daiki Ueno --- curve25519/src/armv7/x25519-cortex-m4-gcc.S | 2 ++ curve25519/src/armv8/X25519-AArch64.S | 2 ++ curve25519/src/avx/curve25519_avx_asm.S | 2 ++ hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S | 2 ++ .../src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S | 4 +++ hash/src/asm/ARMv8A/sha2-256-ARMv8.S | 2 ++ hash/src/asm/ARMv8A/sha2-512-ARMv8.S | 2 ++ hash/src/asm/ARMv8_2x/keccakx2_armce.S | 2 ++ hash/src/asm/AVX2/KeccakP-1600-AVX2.S | 2 ++ hash/src/asm/AVX2/sha2-256-AVX2.S | 2 ++ hash/src/asm/AVX2/sha2-512-AVX2.S | 2 ++ hash/src/asm/AVX512/KeccakP-1600-AVX512.S | 2 ++ hash/src/asm/riscv32/keccakf1600_asm.S | 2 ++ hash/src/asm/riscv64/fips202_rv64im.S | 2 ++ hash/src/asm/riscv64/fips202_rv64imb.S | 2 ++ hash/src/asm/riscv64/sha2-256-riscv.S | 2 ++ hash/src/asm/riscv64/sha2-512-riscv.S | 2 ++ internal/api/assembler_support.h | 33 +++++++++++++++++++ meson.build | 6 ---- ml-dsa/src/armv7/dilithium_ntt_armv7.S | 2 ++ .../armv7/dilithium_pointwise_smull_armv7.S | 2 ++ ml-dsa/src/armv7/dilithium_poly_armv7.S | 2 ++ ml-dsa/src/armv8/dilithium_intt_armv8.S | 2 ++ ml-dsa/src/armv8/dilithium_ntt_armv8.S | 2 ++ ml-dsa/src/armv8/dilithium_poly_armv8.S | 2 ++ ml-dsa/src/avx2/dilithium_invntt_avx2.S | 2 ++ ml-dsa/src/avx2/dilithium_ntt_avx2.S | 2 ++ ml-dsa/src/avx2/dilithium_pointwise_avx2.S | 2 ++ ml-dsa/src/avx2/dilithium_shuffle_avx2.S | 2 ++ .../riscv64/ntt_8l_dualissue_plant_rv64im.S | 2 ++ ml-dsa/src/riscv64/ntt_rvv.S | 2 ++ ml-kem/src/armv7/kyber_fastinvntt_armv7.S | 2 ++ ml-kem/src/armv7/kyber_fastntt_armv7.S | 2 ++ ml-kem/src/armv7/kyber_poly_armv7.S | 2 ++ ml-kem/src/armv8/kyber_add_armv8.S | 2 ++ ml-kem/src/armv8/kyber_basemul_armv8.S | 2 ++ ml-kem/src/armv8/kyber_cbd_armv8.S | 2 ++ ml-kem/src/armv8/kyber_inv_ntt_armv8.S | 2 ++ ml-kem/src/armv8/kyber_ntt_armv8_asm.S | 2 ++ ml-kem/src/armv8/kyber_poly_armv8_asm.S | 2 ++ ml-kem/src/armv8/kyber_reduce_armv8.S | 2 ++ ml-kem/src/avx2/kyber_basemul_avx2.S | 2 ++ ml-kem/src/avx2/kyber_fq_avx2.S | 2 ++ ml-kem/src/avx2/kyber_invntt_avx2.S | 2 ++ ml-kem/src/avx2/kyber_ntt_avx2.S | 2 ++ ml-kem/src/avx2/kyber_shuffle_avx2.S | 2 ++ .../src/riscv64/ntt_dualissue_plant_rv64im.S | 2 ++ ml-kem/src/riscv64/ntt_rvv_vlen128.S | 2 ++ ml-kem/src/riscv64/ntt_rvv_vlen256.S | 2 ++ ml-kem/src/riscv64/riscv_rvv_selector_test.S | 2 ++ sym/src/asm/AESNI_x86_64/aes_aesni_x86_64.S | 2 ++ sym/src/asm/ARMv8/aes_armv8_ce.S | 2 ++ sym/src/asm/riscv64/riscv64_aes_asm.S | 2 ++ sym/src/asm/riscv64/riscv64_aes_zkn_asm.S | 2 ++ 54 files changed, 139 insertions(+), 6 deletions(-) diff --git a/curve25519/src/armv7/x25519-cortex-m4-gcc.S b/curve25519/src/armv7/x25519-cortex-m4-gcc.S index 9151c401..c19cff26 100644 --- a/curve25519/src/armv7/x25519-cortex-m4-gcc.S +++ b/curve25519/src/armv7/x25519-cortex-m4-gcc.S @@ -1040,3 +1040,5 @@ SYM_FUNC_ENTER(crypto_scalarmult_curve25519_armv7) // in total for whole function 548 873 cycles SYM_FUNC_END(crypto_scalarmult_curve25519_armv7) + +ASM_END diff --git a/curve25519/src/armv8/X25519-AArch64.S b/curve25519/src/armv8/X25519-AArch64.S index a04cce4d..2d8e9985 100644 --- a/curve25519/src/armv8/X25519-AArch64.S +++ b/curve25519/src/armv8/X25519-AArch64.S @@ -1641,3 +1641,5 @@ invtable: .hword 50| (2<<9) .hword 5| (1<<9) .hword 0| (0<<9) + +ASM_END diff --git a/curve25519/src/avx/curve25519_avx_asm.S b/curve25519/src/avx/curve25519_avx_asm.S index e6c79b64..be16ca3c 100644 --- a/curve25519/src/avx/curve25519_avx_asm.S +++ b/curve25519/src/avx/curve25519_avx_asm.S @@ -37,3 +37,5 @@ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif + +ASM_END diff --git a/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S b/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S index 21299298..98e74d85 100644 --- a/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S +++ b/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S @@ -1039,3 +1039,5 @@ SYM_FUNC_ENTER(lc_keccak_squeeze_arm_ce) SYM_FUNC_END(lc_keccak_squeeze_arm_ce) .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 + +ASM_END diff --git a/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S b/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S index fa9aff40..5cb2de6d 100644 --- a/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S +++ b/hash/src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S @@ -31,6 +31,8 @@ // This implementation comes with KeccakP-1600-SnP.h in the same folder. // Please refer to LowLevel.build for the exact list of other files it must be combined with. +#include "assembler_support.h" + // INFO: Tested on Cortex-A53(odroid-c2), using gcc. // WARNING: These functions work only on little endian CPU with ARMv8a + NEON architecture // WARNING: State must be 512 bit (64 bytes) aligned. @@ -560,3 +562,5 @@ KeccakP1600_Permute_RoundLoop: KeccakP1600_Permute_Exit: StoreState ret + +ASM_END diff --git a/hash/src/asm/ARMv8A/sha2-256-ARMv8.S b/hash/src/asm/ARMv8A/sha2-256-ARMv8.S index ada570b0..1b9b23e1 100644 --- a/hash/src/asm/ARMv8A/sha2-256-ARMv8.S +++ b/hash/src/asm/ARMv8A/sha2-256-ARMv8.S @@ -2016,3 +2016,5 @@ SYM_FUNC_ENTER(sha256_block_neon) add sp,sp,#16*4+2*__SIZEOF_POINTER__ RET SYM_FUNC_END(sha256_block_neon) + +ASM_END diff --git a/hash/src/asm/ARMv8A/sha2-512-ARMv8.S b/hash/src/asm/ARMv8A/sha2-512-ARMv8.S index 88372b41..5990ad76 100644 --- a/hash/src/asm/ARMv8A/sha2-512-ARMv8.S +++ b/hash/src/asm/ARMv8A/sha2-512-ARMv8.S @@ -1565,3 +1565,5 @@ SYM_FUNC_ENTER(sha512_block_armv8ce) RET SYM_FUNC_END(sha512_block_armv8ce) + +ASM_END diff --git a/hash/src/asm/ARMv8_2x/keccakx2_armce.S b/hash/src/asm/ARMv8_2x/keccakx2_armce.S index 0cf8fddb..04c44c29 100644 --- a/hash/src/asm/ARMv8_2x/keccakx2_armce.S +++ b/hash/src/asm/ARMv8_2x/keccakx2_armce.S @@ -194,4 +194,6 @@ loop: SYM_FUNC_END(keccak_f1600x2_armce) +ASM_END + #endif diff --git a/hash/src/asm/AVX2/KeccakP-1600-AVX2.S b/hash/src/asm/AVX2/KeccakP-1600-AVX2.S index f59cd1b5..26f965df 100644 --- a/hash/src/asm/AVX2/KeccakP-1600-AVX2.S +++ b/hash/src/asm/AVX2/KeccakP-1600-AVX2.S @@ -1032,3 +1032,5 @@ mask6_17: .quad ALLON, ALLON, 0, 0 .asciz "Keccak-1600 for AVX2, CRYPTOGAMS by " + +ASM_END diff --git a/hash/src/asm/AVX2/sha2-256-AVX2.S b/hash/src/asm/AVX2/sha2-256-AVX2.S index e79b733f..e0b77dcc 100644 --- a/hash/src/asm/AVX2/sha2-256-AVX2.S +++ b/hash/src/asm/AVX2/sha2-256-AVX2.S @@ -5495,3 +5495,5 @@ K256: .long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + +ASM_END diff --git a/hash/src/asm/AVX2/sha2-512-AVX2.S b/hash/src/asm/AVX2/sha2-512-AVX2.S index d4a9d931..52c77715 100644 --- a/hash/src/asm/AVX2/sha2-512-AVX2.S +++ b/hash/src/asm/AVX2/sha2-512-AVX2.S @@ -5760,3 +5760,5 @@ K512_nodup: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +ASM_END diff --git a/hash/src/asm/AVX512/KeccakP-1600-AVX512.S b/hash/src/asm/AVX512/KeccakP-1600-AVX512.S index 70e8e4f7..83cc26a3 100644 --- a/hash/src/asm/AVX512/KeccakP-1600-AVX512.S +++ b/hash/src/asm/AVX512/KeccakP-1600-AVX512.S @@ -971,3 +971,5 @@ iotas: .quad 0x8000000080008008 iotas_end: .asciz "Keccak-1600 for AVX-512F, CRYPTOGAMS by " + +ASM_END diff --git a/hash/src/asm/riscv32/keccakf1600_asm.S b/hash/src/asm/riscv32/keccakf1600_asm.S index 2a1c7df6..1dc9b0d3 100644 --- a/hash/src/asm/riscv32/keccakf1600_asm.S +++ b/hash/src/asm/riscv32/keccakf1600_asm.S @@ -540,3 +540,5 @@ SYM_FUNC_START(lc_keccakf1600_riscv) RET SYM_FUNC_END(lc_keccakf1600_riscv) + +ASM_END diff --git a/hash/src/asm/riscv64/fips202_rv64im.S b/hash/src/asm/riscv64/fips202_rv64im.S index 8a01dcc9..13c434c2 100644 --- a/hash/src/asm/riscv64/fips202_rv64im.S +++ b/hash/src/asm/riscv64/fips202_rv64im.S @@ -451,3 +451,5 @@ loop: addi sp, sp, 8*19 RET SYM_FUNC_END(KeccakF1600_StatePermute_RV64ASM) + +ASM_END diff --git a/hash/src/asm/riscv64/fips202_rv64imb.S b/hash/src/asm/riscv64/fips202_rv64imb.S index e17e474c..b5a87ea1 100644 --- a/hash/src/asm/riscv64/fips202_rv64imb.S +++ b/hash/src/asm/riscv64/fips202_rv64imb.S @@ -355,3 +355,5 @@ rounds1_loop_start: addi sp, sp, 8*18 RET SYM_FUNC_END(KeccakF1600_StatePermute_RV64ZBB) + +ASM_END diff --git a/hash/src/asm/riscv64/sha2-256-riscv.S b/hash/src/asm/riscv64/sha2-256-riscv.S index 6aef4c83..75365bff 100644 --- a/hash/src/asm/riscv64/sha2-256-riscv.S +++ b/hash/src/asm/riscv64/sha2-256-riscv.S @@ -2927,3 +2927,5 @@ K256: .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 .string "SHA256 for RISC-V, CRYPTOGAMS by @dot-asm" .align 5 + +ASM_END diff --git a/hash/src/asm/riscv64/sha2-512-riscv.S b/hash/src/asm/riscv64/sha2-512-riscv.S index 28735e31..3137d5bb 100644 --- a/hash/src/asm/riscv64/sha2-512-riscv.S +++ b/hash/src/asm/riscv64/sha2-512-riscv.S @@ -3143,3 +3143,5 @@ K512: .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 .string "SHA512 for RISC-V, CRYPTOGAMS by @dot-asm" .align 5 + +ASM_END diff --git a/internal/api/assembler_support.h b/internal/api/assembler_support.h index de333805..7a683989 100644 --- a/internal/api/assembler_support.h +++ b/internal/api/assembler_support.h @@ -44,6 +44,8 @@ # define FRAME_END #endif +# define ASM_END + # define SYM_FUNC_ENTER(name) # define SYM_FUNC(name) name @@ -73,6 +75,7 @@ # define ANNOTATE_INTRA_FUNCTION_CALL # ifdef __APPLE__ +# define ASM_END # define SYM_FUNC(name) _##name # define SYM_TYPE_OBJ(name) # define SYM_TYPE_FUNC(name) @@ -91,6 +94,8 @@ # elif (defined(__CYGWIN__) || defined(_WIN32)) +# define ASM_END + # define SYM_FUNC(name) name # define SYM_TYPE_OBJ(name) \ @@ -115,6 +120,34 @@ # else /* __APPLE__ */ +# if defined __ELF__ && defined __CET__ +# ifdef __x86_64__ +# define ASM_X86_MARK_CET_ALIGN 3 +# else +# define ASM_X86_MARK_CET_ALIGN 2 +# endif +# define ASM_END \ + .pushsection ".note.gnu.property", "a"; \ + .p2align ASM_X86_MARK_CET_ALIGN; \ + .long 1f - 0f; \ + .long 4f - 1f; \ + .long 5; \ +0: \ + .asciz "GNU"; \ +1: \ + .p2align ASM_X86_MARK_CET_ALIGN; \ + .long 0xc0000002; \ + .long 3f - 2f; \ +2: \ + .long 3; \ +3: \ + .p2align ASM_X86_MARK_CET_ALIGN; \ +4: \ + .popsection +# else +# define ASM_END +# endif + # define SYM_FUNC(name) name # define SYM_TYPE_OBJ(name) \ diff --git a/ml-dsa/src/armv7/dilithium_ntt_armv7.S b/ml-dsa/src/armv7/dilithium_ntt_armv7.S index 878a2623..100eeb67 100644 --- a/ml-dsa/src/armv7/dilithium_ntt_armv7.S +++ b/ml-dsa/src/armv7/dilithium_ntt_armv7.S @@ -589,3 +589,5 @@ inv_ntt_asm_smull_q: .align 2 inv_ntt_asm_smull_64: .word 64 + +ASM_END diff --git a/ml-dsa/src/armv7/dilithium_pointwise_smull_armv7.S b/ml-dsa/src/armv7/dilithium_pointwise_smull_armv7.S index b84a7fac..dcd2706b 100644 --- a/ml-dsa/src/armv7/dilithium_pointwise_smull_armv7.S +++ b/ml-dsa/src/armv7/dilithium_pointwise_smull_armv7.S @@ -166,3 +166,5 @@ SYM_FUNC_ENTER(armv7_poly_pointwise_acc_invmontgomery_asm_smull) pop.w {r4-r11, pc} SYM_FUNC_END(armv7_poly_pointwise_acc_invmontgomery_asm_smull) + +ASM_END diff --git a/ml-dsa/src/armv7/dilithium_poly_armv7.S b/ml-dsa/src/armv7/dilithium_poly_armv7.S index e0a62a49..fa0994f2 100644 --- a/ml-dsa/src/armv7/dilithium_poly_armv7.S +++ b/ml-dsa/src/armv7/dilithium_poly_armv7.S @@ -131,3 +131,5 @@ end: bx lr SYM_FUNC_END(armv7_rej_uniform_asm) + +ASM_END diff --git a/ml-dsa/src/armv8/dilithium_intt_armv8.S b/ml-dsa/src/armv8/dilithium_intt_armv8.S index ddcae06e..1b10e466 100644 --- a/ml-dsa/src/armv8/dilithium_intt_armv8.S +++ b/ml-dsa/src/armv8/dilithium_intt_armv8.S @@ -545,3 +545,5 @@ SYM_FUNC_ENTER(intt_SIMD_bot_armv8) br lr SYM_FUNC_END(intt_SIMD_bot_armv8) + +ASM_END diff --git a/ml-dsa/src/armv8/dilithium_ntt_armv8.S b/ml-dsa/src/armv8/dilithium_ntt_armv8.S index 06fdc42a..2d89b9d8 100644 --- a/ml-dsa/src/armv8/dilithium_ntt_armv8.S +++ b/ml-dsa/src/armv8/dilithium_ntt_armv8.S @@ -300,3 +300,5 @@ SYM_FUNC_ENTER(ntt_SIMD_bot_armv8) br lr SYM_FUNC_END(ntt_SIMD_bot_armv8) + +ASM_END diff --git a/ml-dsa/src/armv8/dilithium_poly_armv8.S b/ml-dsa/src/armv8/dilithium_poly_armv8.S index 3fd814fb..fe28cda1 100644 --- a/ml-dsa/src/armv8/dilithium_poly_armv8.S +++ b/ml-dsa/src/armv8/dilithium_poly_armv8.S @@ -918,3 +918,5 @@ SYM_FUNC_ENTER(polyvecl_pointwise_acc_montgomery_armv8) br lr SYM_FUNC_END(polyvecl_pointwise_acc_montgomery_armv8) + +ASM_END diff --git a/ml-dsa/src/avx2/dilithium_invntt_avx2.S b/ml-dsa/src/avx2/dilithium_invntt_avx2.S index 893c95e9..9de0a07a 100644 --- a/ml-dsa/src/avx2/dilithium_invntt_avx2.S +++ b/ml-dsa/src/avx2/dilithium_invntt_avx2.S @@ -238,3 +238,5 @@ levels6t7 3 RET SYM_FUNC_END(dilithium_invntt_avx) + +ASM_END diff --git a/ml-dsa/src/avx2/dilithium_ntt_avx2.S b/ml-dsa/src/avx2/dilithium_ntt_avx2.S index e9e3c520..d9c1f391 100644 --- a/ml-dsa/src/avx2/dilithium_ntt_avx2.S +++ b/ml-dsa/src/avx2/dilithium_ntt_avx2.S @@ -196,3 +196,5 @@ levels2t7 3 RET SYM_FUNC_END(dilithium_ntt_avx) + +ASM_END diff --git a/ml-dsa/src/avx2/dilithium_pointwise_avx2.S b/ml-dsa/src/avx2/dilithium_pointwise_avx2.S index 58911a4c..bed14608 100644 --- a/ml-dsa/src/avx2/dilithium_pointwise_avx2.S +++ b/ml-dsa/src/avx2/dilithium_pointwise_avx2.S @@ -208,3 +208,5 @@ jb _looptop2 RET SYM_FUNC_END(dilithium_pointwise_acc_avx) + +ASM_END diff --git a/ml-dsa/src/avx2/dilithium_shuffle_avx2.S b/ml-dsa/src/avx2/dilithium_shuffle_avx2.S index 5b76acbf..028a4c27 100644 --- a/ml-dsa/src/avx2/dilithium_shuffle_avx2.S +++ b/ml-dsa/src/avx2/dilithium_shuffle_avx2.S @@ -50,3 +50,5 @@ add $256,%rdi nttunpack128_avx RET SYM_FUNC_END(dilithium_nttunpack_avx) + +ASM_END diff --git a/ml-dsa/src/riscv64/ntt_8l_dualissue_plant_rv64im.S b/ml-dsa/src/riscv64/ntt_8l_dualissue_plant_rv64im.S index a86380b2..142764aa 100644 --- a/ml-dsa/src/riscv64/ntt_8l_dualissue_plant_rv64im.S +++ b/ml-dsa/src/riscv64/ntt_8l_dualissue_plant_rv64im.S @@ -1168,3 +1168,5 @@ poly_reduce_rv64im_loop: bne a3, a0, poly_reduce_rv64im_loop RET SYM_FUNC_END(dilithium_poly_reduce_rv64im) + +ASM_END diff --git a/ml-dsa/src/riscv64/ntt_rvv.S b/ml-dsa/src/riscv64/ntt_rvv.S index 8254fca9..116a8c3b 100644 --- a/ml-dsa/src/riscv64/ntt_rvv.S +++ b/ml-dsa/src/riscv64/ntt_rvv.S @@ -1588,3 +1588,5 @@ poly_reduce_rvv_loop: bnez a3, poly_reduce_rvv_loop RET SYM_FUNC_END(dilithium_poly_reduce_rvv) + +ASM_END diff --git a/ml-kem/src/armv7/kyber_fastinvntt_armv7.S b/ml-kem/src/armv7/kyber_fastinvntt_armv7.S index 8b163a66..b3862483 100644 --- a/ml-kem/src/armv7/kyber_fastinvntt_armv7.S +++ b/ml-kem/src/armv7/kyber_fastinvntt_armv7.S @@ -272,3 +272,5 @@ SYM_FUNC_ENTER(kyber_invntt_armv7) pop.w {r4-r11, pc} SYM_FUNC_END(kyber_invntt_armv7) + +ASM_END diff --git a/ml-kem/src/armv7/kyber_fastntt_armv7.S b/ml-kem/src/armv7/kyber_fastntt_armv7.S index 9b8580c9..988a4cff 100644 --- a/ml-kem/src/armv7/kyber_fastntt_armv7.S +++ b/ml-kem/src/armv7/kyber_fastntt_armv7.S @@ -243,3 +243,5 @@ SYM_FUNC_ENTER(kyber_ntt_armv7) pop.w {r4-r11, pc} SYM_FUNC_END(kyber_ntt_armv7) + +ASM_END diff --git a/ml-kem/src/armv7/kyber_poly_armv7.S b/ml-kem/src/armv7/kyber_poly_armv7.S index 8810555f..08582c35 100644 --- a/ml-kem/src/armv7/kyber_poly_armv7.S +++ b/ml-kem/src/armv7/kyber_poly_armv7.S @@ -283,3 +283,5 @@ SYM_FUNC_ENTER(kyber_basemul_armv7) .unreq qinv SYM_FUNC_END(kyber_basemul_armv7) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_add_armv8.S b/ml-kem/src/armv8/kyber_add_armv8.S index c2cb39ed..487be1ff 100644 --- a/ml-kem/src/armv8/kyber_add_armv8.S +++ b/ml-kem/src/armv8/kyber_add_armv8.S @@ -79,3 +79,5 @@ SYM_FUNC_ENTER(kyber_add_armv8) RET SYM_FUNC_END(kyber_add_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_basemul_armv8.S b/ml-kem/src/armv8/kyber_basemul_armv8.S index 7bec9b9c..4058ccac 100644 --- a/ml-kem/src/armv8/kyber_basemul_armv8.S +++ b/ml-kem/src/armv8/kyber_basemul_armv8.S @@ -99,3 +99,5 @@ SYM_FUNC_ENTER(kyber_basemul_armv8) RET SYM_FUNC_END(kyber_basemul_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_cbd_armv8.S b/ml-kem/src/armv8/kyber_cbd_armv8.S index 28a1724f..287078a9 100644 --- a/ml-kem/src/armv8/kyber_cbd_armv8.S +++ b/ml-kem/src/armv8/kyber_cbd_armv8.S @@ -263,3 +263,5 @@ SYM_FUNC_ENTER(kyber_cbd3_armv8) // RET SYM_FUNC_END(kyber_cbd3_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_inv_ntt_armv8.S b/ml-kem/src/armv8/kyber_inv_ntt_armv8.S index 4264445f..f0238c65 100644 --- a/ml-kem/src/armv8/kyber_inv_ntt_armv8.S +++ b/ml-kem/src/armv8/kyber_inv_ntt_armv8.S @@ -239,3 +239,5 @@ SYM_FUNC_ENTER(kyber_inv_ntt_armv8) RET SYM_FUNC_END(kyber_inv_ntt_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_ntt_armv8_asm.S b/ml-kem/src/armv8/kyber_ntt_armv8_asm.S index ae4eb3a5..da2cbff9 100644 --- a/ml-kem/src/armv8/kyber_ntt_armv8_asm.S +++ b/ml-kem/src/armv8/kyber_ntt_armv8_asm.S @@ -217,3 +217,5 @@ SYM_FUNC_ENTER(kyber_ntt_armv8) RET SYM_FUNC_END(kyber_ntt_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_poly_armv8_asm.S b/ml-kem/src/armv8/kyber_poly_armv8_asm.S index d9ee3819..8a9bb6ec 100644 --- a/ml-kem/src/armv8/kyber_poly_armv8_asm.S +++ b/ml-kem/src/armv8/kyber_poly_armv8_asm.S @@ -143,3 +143,5 @@ SYM_FUNC_ENTER(kyber_poly_frombytes_armv8) RET SYM_FUNC_END(kyber_poly_frombytes_armv8) + +ASM_END diff --git a/ml-kem/src/armv8/kyber_reduce_armv8.S b/ml-kem/src/armv8/kyber_reduce_armv8.S index 47f1b7bf..2a341b8f 100644 --- a/ml-kem/src/armv8/kyber_reduce_armv8.S +++ b/ml-kem/src/armv8/kyber_reduce_armv8.S @@ -261,3 +261,5 @@ SYM_FUNC_ENTER(kyber_add_add_reduce_armv8) RET SYM_FUNC_END(kyber_add_add_reduce_armv8) + +ASM_END diff --git a/ml-kem/src/avx2/kyber_basemul_avx2.S b/ml-kem/src/avx2/kyber_basemul_avx2.S index 0b81bdd3..fc1ec793 100644 --- a/ml-kem/src/avx2/kyber_basemul_avx2.S +++ b/ml-kem/src/avx2/kyber_basemul_avx2.S @@ -106,3 +106,5 @@ mov %r8,%rsp RET SYM_FUNC_END(kyber_basemul_avx) STACK_FRAME_NON_STANDARD kyber_basemul_avx + +ASM_END diff --git a/ml-kem/src/avx2/kyber_fq_avx2.S b/ml-kem/src/avx2/kyber_fq_avx2.S index fa0409b5..94ff5b0e 100644 --- a/ml-kem/src/avx2/kyber_fq_avx2.S +++ b/ml-kem/src/avx2/kyber_fq_avx2.S @@ -86,3 +86,5 @@ add $256,%rdi tomont128_avx RET SYM_FUNC_END(tomont_avx) + +ASM_END diff --git a/ml-kem/src/avx2/kyber_invntt_avx2.S b/ml-kem/src/avx2/kyber_invntt_avx2.S index 4090c5d7..66cc61cd 100644 --- a/ml-kem/src/avx2/kyber_invntt_avx2.S +++ b/ml-kem/src/avx2/kyber_invntt_avx2.S @@ -193,3 +193,5 @@ intt_level6 0 intt_level6 1 RET SYM_FUNC_END(kyber_invntt_avx) + +ASM_END diff --git a/ml-kem/src/avx2/kyber_ntt_avx2.S b/ml-kem/src/avx2/kyber_ntt_avx2.S index c4962fc5..b3633613 100644 --- a/ml-kem/src/avx2/kyber_ntt_avx2.S +++ b/ml-kem/src/avx2/kyber_ntt_avx2.S @@ -189,3 +189,5 @@ levels1t6 1 RET SYM_FUNC_END(kyber_ntt_avx) + +ASM_END diff --git a/ml-kem/src/avx2/kyber_shuffle_avx2.S b/ml-kem/src/avx2/kyber_shuffle_avx2.S index ffb642dc..0319e129 100644 --- a/ml-kem/src/avx2/kyber_shuffle_avx2.S +++ b/ml-kem/src/avx2/kyber_shuffle_avx2.S @@ -252,3 +252,5 @@ add $192,%rsi kyber_nttfrombytes128_avx RET SYM_FUNC_END(kyber_nttfrombytes_avx) + +ASM_END diff --git a/ml-kem/src/riscv64/ntt_dualissue_plant_rv64im.S b/ml-kem/src/riscv64/ntt_dualissue_plant_rv64im.S index 7eab10be..f509806e 100644 --- a/ml-kem/src/riscv64/ntt_dualissue_plant_rv64im.S +++ b/ml-kem/src/riscv64/ntt_dualissue_plant_rv64im.S @@ -1969,3 +1969,5 @@ SYM_FUNC_START(kyber_poly_toplant_rv64im) addi sp, sp, 8*1 RET SYM_FUNC_END(kyber_poly_toplant_rv64im) + +ASM_END diff --git a/ml-kem/src/riscv64/ntt_rvv_vlen128.S b/ml-kem/src/riscv64/ntt_rvv_vlen128.S index 2d3afc5d..d113d39f 100644 --- a/ml-kem/src/riscv64/ntt_rvv_vlen128.S +++ b/ml-kem/src/riscv64/ntt_rvv_vlen128.S @@ -1196,3 +1196,5 @@ cbd3_rvv_vlen128_loop: bnez a4, cbd3_rvv_vlen128_loop RET SYM_FUNC_END(kyber_cbd3_rvv_vlen128) + +ASM_END diff --git a/ml-kem/src/riscv64/ntt_rvv_vlen256.S b/ml-kem/src/riscv64/ntt_rvv_vlen256.S index 16b5aa2f..7f827f53 100644 --- a/ml-kem/src/riscv64/ntt_rvv_vlen256.S +++ b/ml-kem/src/riscv64/ntt_rvv_vlen256.S @@ -1199,3 +1199,5 @@ cbd3_rvv_vlen256_loop: bnez a4, cbd3_rvv_vlen256_loop RET SYM_FUNC_END(kyber_cbd3_rvv_vlen256) + +ASM_END diff --git a/ml-kem/src/riscv64/riscv_rvv_selector_test.S b/ml-kem/src/riscv64/riscv_rvv_selector_test.S index a420ffea..9f8aa904 100644 --- a/ml-kem/src/riscv64/riscv_rvv_selector_test.S +++ b/ml-kem/src/riscv64/riscv_rvv_selector_test.S @@ -24,3 +24,5 @@ SYM_FUNC_START(kyber_rvv_selector) sext.w a0,a0 RET SYM_FUNC_END(kyber_rvv_selector) + +ASM_END diff --git a/sym/src/asm/AESNI_x86_64/aes_aesni_x86_64.S b/sym/src/asm/AESNI_x86_64/aes_aesni_x86_64.S index d53a97a3..a79cc977 100644 --- a/sym/src/asm/AESNI_x86_64/aes_aesni_x86_64.S +++ b/sym/src/asm/AESNI_x86_64/aes_aesni_x86_64.S @@ -4568,3 +4568,5 @@ SYM_FUNC_END(aesni_set_encrypt_key) .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 + +ASM_END diff --git a/sym/src/asm/ARMv8/aes_armv8_ce.S b/sym/src/asm/ARMv8/aes_armv8_ce.S index 89f61553..894645fc 100644 --- a/sym/src/asm/ARMv8/aes_armv8_ce.S +++ b/sym/src/asm/ARMv8/aes_armv8_ce.S @@ -3215,3 +3215,5 @@ Lxts_dec_abort: Lxts_dec_final_abort: RET SYM_FUNC_END(aes_v8_xts_decrypt) + +ASM_END diff --git a/sym/src/asm/riscv64/riscv64_aes_asm.S b/sym/src/asm/riscv64/riscv64_aes_asm.S index 7a7ff426..4dc92bdd 100644 --- a/sym/src/asm/riscv64/riscv64_aes_asm.S +++ b/sym/src/asm/riscv64/riscv64_aes_asm.S @@ -1900,3 +1900,5 @@ AES_rcon: .word 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U .word 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U .word 0x0000001BU, 0x00000036U + +ASM_END diff --git a/sym/src/asm/riscv64/riscv64_aes_zkn_asm.S b/sym/src/asm/riscv64/riscv64_aes_zkn_asm.S index 2d8eac75..4f9f887d 100644 --- a/sym/src/asm/riscv64/riscv64_aes_zkn_asm.S +++ b/sym/src/asm/riscv64/riscv64_aes_zkn_asm.S @@ -732,3 +732,5 @@ rv64i_zknd_set_decrypt_key: ld x8,0(sp) addi sp,sp,16 RET + +ASM_END -- 2.48.1