From 809c8760436a5ee318d65d30f415c28838fee10b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Mon, 29 Apr 2019 11:28:51 +0200 Subject: [PATCH] x86 SIMD: Add endbr32/endbr64 instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow for indirect branch tracking with Intel CET (Control-Flow Enforcement Technology) [1], by making all exported routines a possible target for an indirect jump. Signed-off-by: Nikola Forró [1] https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf --- simd/i386/jccolext-avx2.asm | 1 + simd/i386/jccolext-mmx.asm | 1 + simd/i386/jccolext-sse2.asm | 1 + simd/i386/jcgryext-avx2.asm | 1 + simd/i386/jcgryext-mmx.asm | 1 + simd/i386/jcgryext-sse2.asm | 1 + simd/i386/jchuff-sse2.asm | 1 + simd/i386/jcphuff-sse2.asm | 2 ++ simd/i386/jcsample-avx2.asm | 2 ++ simd/i386/jcsample-mmx.asm | 2 ++ simd/i386/jcsample-sse2.asm | 2 ++ simd/i386/jdcolext-avx2.asm | 1 + simd/i386/jdcolext-mmx.asm | 1 + simd/i386/jdcolext-sse2.asm | 1 + simd/i386/jdmrgext-avx2.asm | 2 ++ simd/i386/jdmrgext-mmx.asm | 2 ++ simd/i386/jdmrgext-sse2.asm | 2 ++ simd/i386/jdsample-avx2.asm | 4 ++++ simd/i386/jdsample-mmx.asm | 4 ++++ simd/i386/jdsample-sse2.asm | 4 ++++ simd/i386/jfdctflt-3dn.asm | 1 + simd/i386/jfdctflt-sse.asm | 1 + simd/i386/jfdctfst-mmx.asm | 1 + simd/i386/jfdctfst-sse2.asm | 1 + simd/i386/jfdctint-avx2.asm | 1 + simd/i386/jfdctint-mmx.asm | 1 + simd/i386/jfdctint-sse2.asm | 1 + simd/i386/jidctflt-3dn.asm | 1 + simd/i386/jidctflt-sse.asm | 1 + simd/i386/jidctflt-sse2.asm | 1 + simd/i386/jidctfst-mmx.asm | 1 + simd/i386/jidctfst-sse2.asm | 1 + simd/i386/jidctint-avx2.asm | 1 + simd/i386/jidctint-mmx.asm | 1 + simd/i386/jidctint-sse2.asm | 1 + simd/i386/jidctred-mmx.asm | 2 ++ simd/i386/jidctred-sse2.asm | 2 ++ simd/i386/jquant-3dn.asm | 2 ++ simd/i386/jquant-mmx.asm | 2 ++ simd/i386/jquant-sse.asm | 2 ++ simd/i386/jquantf-sse2.asm | 2 ++ simd/i386/jquanti-avx2.asm | 2 ++ simd/i386/jquanti-sse2.asm | 2 ++ simd/nasm/jsimdext.inc | 8 ++++++++ simd/x86_64/jccolext-avx2.asm | 1 + simd/x86_64/jccolext-sse2.asm | 1 + simd/x86_64/jcgryext-avx2.asm | 1 + simd/x86_64/jcgryext-sse2.asm | 1 + simd/x86_64/jchuff-sse2.asm | 1 + simd/x86_64/jcphuff-sse2.asm | 2 ++ simd/x86_64/jcsample-avx2.asm | 2 ++ simd/x86_64/jcsample-sse2.asm | 2 ++ simd/x86_64/jdcolext-avx2.asm | 1 + simd/x86_64/jdcolext-sse2.asm | 1 + simd/x86_64/jdmrgext-avx2.asm | 2 ++ simd/x86_64/jdmrgext-sse2.asm | 2 ++ simd/x86_64/jdsample-avx2.asm | 4 ++++ simd/x86_64/jdsample-sse2.asm | 4 ++++ simd/x86_64/jfdctflt-sse.asm | 1 + simd/x86_64/jfdctfst-sse2.asm | 1 + simd/x86_64/jfdctint-avx2.asm | 1 + simd/x86_64/jfdctint-sse2.asm | 1 + simd/x86_64/jidctflt-sse2.asm | 1 + simd/x86_64/jidctfst-sse2.asm | 1 + simd/x86_64/jidctint-avx2.asm | 1 + simd/x86_64/jidctint-sse2.asm | 1 + simd/x86_64/jidctred-sse2.asm | 2 ++ simd/x86_64/jquantf-sse2.asm | 2 ++ simd/x86_64/jquanti-avx2.asm | 2 ++ simd/x86_64/jquanti-sse2.asm | 2 ++ 70 files changed, 116 insertions(+) diff --git a/simd/i386/jccolext-avx2.asm b/simd/i386/jccolext-avx2.asm index c46d684..7dc6e08 100644 --- a/simd/i386/jccolext-avx2.asm +++ b/simd/i386/jccolext-avx2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2) EXTN(jsimd_rgb_ycc_convert_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jccolext-mmx.asm b/simd/i386/jccolext-mmx.asm index 6357a42..8048abb 100644 --- a/simd/i386/jccolext-mmx.asm +++ b/simd/i386/jccolext-mmx.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_mmx) EXTN(jsimd_rgb_ycc_convert_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jccolext-sse2.asm b/simd/i386/jccolext-sse2.asm index c6c8085..5307ddc 100644 --- a/simd/i386/jccolext-sse2.asm +++ b/simd/i386/jccolext-sse2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2) EXTN(jsimd_rgb_ycc_convert_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jcgryext-avx2.asm b/simd/i386/jcgryext-avx2.asm index 3fa7973..27a0e11 100644 --- a/simd/i386/jcgryext-avx2.asm +++ b/simd/i386/jcgryext-avx2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2) EXTN(jsimd_rgb_gray_convert_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jcgryext-mmx.asm b/simd/i386/jcgryext-mmx.asm index 8af42e5..dda0e05 100644 --- a/simd/i386/jcgryext-mmx.asm +++ b/simd/i386/jcgryext-mmx.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_mmx) EXTN(jsimd_rgb_gray_convert_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jcgryext-sse2.asm b/simd/i386/jcgryext-sse2.asm index c9d6ff1..f8835bb 100644 --- a/simd/i386/jcgryext-sse2.asm +++ b/simd/i386/jcgryext-sse2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2) EXTN(jsimd_rgb_gray_convert_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jchuff-sse2.asm b/simd/i386/jchuff-sse2.asm index 79f0ca5..91e6e1d 100644 --- a/simd/i386/jchuff-sse2.asm +++ b/simd/i386/jchuff-sse2.asm @@ -180,6 +180,7 @@ EXTN(jconst_huff_encode_one_block): GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2) EXTN(jsimd_huff_encode_one_block_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jcphuff-sse2.asm b/simd/i386/jcphuff-sse2.asm index 8b73178..87190d1 100644 --- a/simd/i386/jcphuff-sse2.asm +++ b/simd/i386/jcphuff-sse2.asm @@ -281,6 +281,7 @@ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2) EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -460,6 +461,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2) EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jcsample-avx2.asm b/simd/i386/jcsample-avx2.asm index 0a20802..46eba8c 100644 --- a/simd/i386/jcsample-avx2.asm +++ b/simd/i386/jcsample-avx2.asm @@ -43,6 +43,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2) EXTN(jsimd_h2v1_downsample_avx2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -216,6 +217,7 @@ EXTN(jsimd_h2v1_downsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2) EXTN(jsimd_h2v2_downsample_avx2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jcsample-mmx.asm b/simd/i386/jcsample-mmx.asm index 2c223ee..b2b8ded 100644 --- a/simd/i386/jcsample-mmx.asm +++ b/simd/i386/jcsample-mmx.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_mmx) EXTN(jsimd_h2v1_downsample_mmx): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -185,6 +186,7 @@ EXTN(jsimd_h2v1_downsample_mmx): GLOBAL_FUNCTION(jsimd_h2v2_downsample_mmx) EXTN(jsimd_h2v2_downsample_mmx): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jcsample-sse2.asm b/simd/i386/jcsample-sse2.asm index 4fea60d..4c22b40 100644 --- a/simd/i386/jcsample-sse2.asm +++ b/simd/i386/jcsample-sse2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2) EXTN(jsimd_h2v1_downsample_sse2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -198,6 +199,7 @@ EXTN(jsimd_h2v1_downsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2) EXTN(jsimd_h2v2_downsample_sse2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jdcolext-avx2.asm b/simd/i386/jdcolext-avx2.asm index 015be04..b076765 100644 --- a/simd/i386/jdcolext-avx2.asm +++ b/simd/i386/jdcolext-avx2.asm @@ -43,6 +43,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2) EXTN(jsimd_ycc_rgb_convert_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jdcolext-mmx.asm b/simd/i386/jdcolext-mmx.asm index 5813cfc..150f5b6 100644 --- a/simd/i386/jdcolext-mmx.asm +++ b/simd/i386/jdcolext-mmx.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_mmx) EXTN(jsimd_ycc_rgb_convert_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jdcolext-sse2.asm b/simd/i386/jdcolext-sse2.asm index d5572b3..cd3ac70 100644 --- a/simd/i386/jdcolext-sse2.asm +++ b/simd/i386/jdcolext-sse2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2) EXTN(jsimd_ycc_rgb_convert_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jdmrgext-avx2.asm b/simd/i386/jdmrgext-avx2.asm index e35f728..0db0aa4 100644 --- a/simd/i386/jdmrgext-avx2.asm +++ b/simd/i386/jdmrgext-avx2.asm @@ -43,6 +43,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2) EXTN(jsimd_h2v1_merged_upsample_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -523,6 +524,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2) EXTN(jsimd_h2v2_merged_upsample_avx2): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jdmrgext-mmx.asm b/simd/i386/jdmrgext-mmx.asm index eb3e36b..6427a1a 100644 --- a/simd/i386/jdmrgext-mmx.asm +++ b/simd/i386/jdmrgext-mmx.asm @@ -40,6 +40,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_mmx) EXTN(jsimd_h2v1_merged_upsample_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -408,6 +409,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_mmx) EXTN(jsimd_h2v2_merged_upsample_mmx): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jdmrgext-sse2.asm b/simd/i386/jdmrgext-sse2.asm index c113dc4..6897fa1 100644 --- a/simd/i386/jdmrgext-sse2.asm +++ b/simd/i386/jdmrgext-sse2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2) EXTN(jsimd_h2v1_merged_upsample_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -465,6 +466,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2) EXTN(jsimd_h2v2_merged_upsample_sse2): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jdsample-avx2.asm b/simd/i386/jdsample-avx2.asm index a800c35..7d52708 100644 --- a/simd/i386/jdsample-avx2.asm +++ b/simd/i386/jdsample-avx2.asm @@ -60,6 +60,7 @@ PW_EIGHT times 16 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2) EXTN(jsimd_h2v1_fancy_upsample_avx2): + _endbr32 push ebp mov ebp, esp pushpic ebx @@ -227,6 +228,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2) EXTN(jsimd_h2v2_fancy_upsample_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -570,6 +572,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2) EXTN(jsimd_h2v1_upsample_avx2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -669,6 +672,7 @@ EXTN(jsimd_h2v1_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2) EXTN(jsimd_h2v2_upsample_avx2): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jdsample-mmx.asm b/simd/i386/jdsample-mmx.asm index 12c49f0..7f2ab40 100644 --- a/simd/i386/jdsample-mmx.asm +++ b/simd/i386/jdsample-mmx.asm @@ -59,6 +59,7 @@ PW_EIGHT times 4 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_mmx) EXTN(jsimd_h2v1_fancy_upsample_mmx): + _endbr32 push ebp mov ebp, esp pushpic ebx @@ -217,6 +218,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_mmx) EXTN(jsimd_h2v2_fancy_upsample_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -541,6 +543,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx): GLOBAL_FUNCTION(jsimd_h2v1_upsample_mmx) EXTN(jsimd_h2v1_upsample_mmx): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -640,6 +643,7 @@ EXTN(jsimd_h2v1_upsample_mmx): GLOBAL_FUNCTION(jsimd_h2v2_upsample_mmx) EXTN(jsimd_h2v2_upsample_mmx): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jdsample-sse2.asm b/simd/i386/jdsample-sse2.asm index 4e28d2f..3311b25 100644 --- a/simd/i386/jdsample-sse2.asm +++ b/simd/i386/jdsample-sse2.asm @@ -59,6 +59,7 @@ PW_EIGHT times 8 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2) EXTN(jsimd_h2v1_fancy_upsample_sse2): + _endbr32 push ebp mov ebp, esp pushpic ebx @@ -216,6 +217,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2) EXTN(jsimd_h2v2_fancy_upsample_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -538,6 +540,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2) EXTN(jsimd_h2v1_upsample_sse2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused @@ -635,6 +638,7 @@ EXTN(jsimd_h2v1_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2) EXTN(jsimd_h2v2_upsample_sse2): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jfdctflt-3dn.asm b/simd/i386/jfdctflt-3dn.asm index 322ab16..109e36e 100644 --- a/simd/i386/jfdctflt-3dn.asm +++ b/simd/i386/jfdctflt-3dn.asm @@ -56,6 +56,7 @@ PD_1_306 times 2 dd 1.306562964876376527856643 GLOBAL_FUNCTION(jsimd_fdct_float_3dnow) EXTN(jsimd_fdct_float_3dnow): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jfdctflt-sse.asm b/simd/i386/jfdctflt-sse.asm index 86952c6..b1e0576 100644 --- a/simd/i386/jfdctflt-sse.asm +++ b/simd/i386/jfdctflt-sse.asm @@ -67,6 +67,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643 GLOBAL_FUNCTION(jsimd_fdct_float_sse) EXTN(jsimd_fdct_float_sse): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jfdctfst-mmx.asm b/simd/i386/jfdctfst-mmx.asm index 80645a5..be84fdb 100644 --- a/simd/i386/jfdctfst-mmx.asm +++ b/simd/i386/jfdctfst-mmx.asm @@ -81,6 +81,7 @@ PW_F1306 times 4 dw F_1_306 << CONST_SHIFT GLOBAL_FUNCTION(jsimd_fdct_ifast_mmx) EXTN(jsimd_fdct_ifast_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jfdctfst-sse2.asm b/simd/i386/jfdctfst-sse2.asm index 446fa7a..945f9cf 100644 --- a/simd/i386/jfdctfst-sse2.asm +++ b/simd/i386/jfdctfst-sse2.asm @@ -82,6 +82,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2) EXTN(jsimd_fdct_ifast_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jfdctint-avx2.asm b/simd/i386/jfdctint-avx2.asm index 97de230..165c52a 100644 --- a/simd/i386/jfdctint-avx2.asm +++ b/simd/i386/jfdctint-avx2.asm @@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_fdct_islow_avx2) EXTN(jsimd_fdct_islow_avx2): + _endbr32 push ebp mov ebp, esp pushpic ebx diff --git a/simd/i386/jfdctint-mmx.asm b/simd/i386/jfdctint-mmx.asm index 3ade9d4..983d263 100644 --- a/simd/i386/jfdctint-mmx.asm +++ b/simd/i386/jfdctint-mmx.asm @@ -102,6 +102,7 @@ PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1) GLOBAL_FUNCTION(jsimd_fdct_islow_mmx) EXTN(jsimd_fdct_islow_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jfdctint-sse2.asm b/simd/i386/jfdctint-sse2.asm index 71b684c..3451f52 100644 --- a/simd/i386/jfdctint-sse2.asm +++ b/simd/i386/jfdctint-sse2.asm @@ -103,6 +103,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1) GLOBAL_FUNCTION(jsimd_fdct_islow_sse2) EXTN(jsimd_fdct_islow_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctflt-3dn.asm b/simd/i386/jidctflt-3dn.asm index 8795191..eb49902 100644 --- a/simd/i386/jidctflt-3dn.asm +++ b/simd/i386/jidctflt-3dn.asm @@ -65,6 +65,7 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_float_3dnow) EXTN(jsimd_idct_float_3dnow): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctflt-sse.asm b/simd/i386/jidctflt-sse.asm index b27ecfd..ffe54f8 100644 --- a/simd/i386/jidctflt-sse.asm +++ b/simd/i386/jidctflt-sse.asm @@ -75,6 +75,7 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_float_sse) EXTN(jsimd_idct_float_sse): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctflt-sse2.asm b/simd/i386/jidctflt-sse2.asm index c646eae..fd1fe35 100644 --- a/simd/i386/jidctflt-sse2.asm +++ b/simd/i386/jidctflt-sse2.asm @@ -75,6 +75,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_float_sse2) EXTN(jsimd_idct_float_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctfst-mmx.asm b/simd/i386/jidctfst-mmx.asm index 24622d4..00940b8 100644 --- a/simd/i386/jidctfst-mmx.asm +++ b/simd/i386/jidctfst-mmx.asm @@ -96,6 +96,7 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_ifast_mmx) EXTN(jsimd_idct_ifast_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctfst-sse2.asm b/simd/i386/jidctfst-sse2.asm index 19704ff..1f4af33 100644 --- a/simd/i386/jidctfst-sse2.asm +++ b/simd/i386/jidctfst-sse2.asm @@ -94,6 +94,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_ifast_sse2) EXTN(jsimd_idct_ifast_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctint-avx2.asm b/simd/i386/jidctint-avx2.asm index c371985..23633ae 100644 --- a/simd/i386/jidctint-avx2.asm +++ b/simd/i386/jidctint-avx2.asm @@ -296,6 +296,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_idct_islow_avx2) EXTN(jsimd_idct_islow_avx2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctint-mmx.asm b/simd/i386/jidctint-mmx.asm index 4f07f56..6cf6153 100644 --- a/simd/i386/jidctint-mmx.asm +++ b/simd/i386/jidctint-mmx.asm @@ -109,6 +109,7 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_islow_mmx) EXTN(jsimd_idct_islow_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctint-sse2.asm b/simd/i386/jidctint-sse2.asm index e442fdd..dae5b7d 100644 --- a/simd/i386/jidctint-sse2.asm +++ b/simd/i386/jidctint-sse2.asm @@ -107,6 +107,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_islow_sse2) EXTN(jsimd_idct_islow_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 diff --git a/simd/i386/jidctred-mmx.asm b/simd/i386/jidctred-mmx.asm index e2307e1..cb43106 100644 --- a/simd/i386/jidctred-mmx.asm +++ b/simd/i386/jidctred-mmx.asm @@ -117,6 +117,7 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_4x4_mmx) EXTN(jsimd_idct_4x4_mmx): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -504,6 +505,7 @@ EXTN(jsimd_idct_4x4_mmx): GLOBAL_FUNCTION(jsimd_idct_2x2_mmx) EXTN(jsimd_idct_2x2_mmx): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jidctred-sse2.asm b/simd/i386/jidctred-sse2.asm index 6e56494..2a61b9e 100644 --- a/simd/i386/jidctred-sse2.asm +++ b/simd/i386/jidctred-sse2.asm @@ -115,6 +115,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_4x4_sse2) EXTN(jsimd_idct_4x4_sse2): + _endbr32 push ebp mov eax, esp ; eax = original ebp sub esp, byte 4 @@ -425,6 +426,7 @@ EXTN(jsimd_idct_4x4_sse2): GLOBAL_FUNCTION(jsimd_idct_2x2_sse2) EXTN(jsimd_idct_2x2_sse2): + _endbr32 push ebp mov ebp, esp push ebx diff --git a/simd/i386/jquant-3dn.asm b/simd/i386/jquant-3dn.asm index 5cb60ca..a0599eb 100644 --- a/simd/i386/jquant-3dn.asm +++ b/simd/i386/jquant-3dn.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_float_3dnow) EXTN(jsimd_convsamp_float_3dnow): + _endbr32 push ebp mov ebp, esp push ebx @@ -138,6 +139,7 @@ EXTN(jsimd_convsamp_float_3dnow): GLOBAL_FUNCTION(jsimd_quantize_float_3dnow) EXTN(jsimd_quantize_float_3dnow): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jquant-mmx.asm b/simd/i386/jquant-mmx.asm index 61305c6..080021b 100644 --- a/simd/i386/jquant-mmx.asm +++ b/simd/i386/jquant-mmx.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_mmx) EXTN(jsimd_convsamp_mmx): + _endbr32 push ebp mov ebp, esp push ebx @@ -145,6 +146,7 @@ EXTN(jsimd_convsamp_mmx): GLOBAL_FUNCTION(jsimd_quantize_mmx) EXTN(jsimd_quantize_mmx): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jquant-sse.asm b/simd/i386/jquant-sse.asm index 218adc9..cacd2a9 100644 --- a/simd/i386/jquant-sse.asm +++ b/simd/i386/jquant-sse.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_float_sse) EXTN(jsimd_convsamp_float_sse): + _endbr32 push ebp mov ebp, esp push ebx @@ -138,6 +139,7 @@ EXTN(jsimd_convsamp_float_sse): GLOBAL_FUNCTION(jsimd_quantize_float_sse) EXTN(jsimd_quantize_float_sse): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jquantf-sse2.asm b/simd/i386/jquantf-sse2.asm index a881ab5..6f4789c 100644 --- a/simd/i386/jquantf-sse2.asm +++ b/simd/i386/jquantf-sse2.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_float_sse2) EXTN(jsimd_convsamp_float_sse2): + _endbr32 push ebp mov ebp, esp push ebx @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_float_sse2): GLOBAL_FUNCTION(jsimd_quantize_float_sse2) EXTN(jsimd_quantize_float_sse2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jquanti-avx2.asm b/simd/i386/jquanti-avx2.asm index 5ed6bec..efcddd2 100644 --- a/simd/i386/jquanti-avx2.asm +++ b/simd/i386/jquanti-avx2.asm @@ -37,6 +37,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_avx2) EXTN(jsimd_convsamp_avx2): + _endbr32 push ebp mov ebp, esp push ebx @@ -130,6 +131,7 @@ EXTN(jsimd_convsamp_avx2): GLOBAL_FUNCTION(jsimd_quantize_avx2) EXTN(jsimd_quantize_avx2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/i386/jquanti-sse2.asm b/simd/i386/jquanti-sse2.asm index 0a50940..98d39e0 100644 --- a/simd/i386/jquanti-sse2.asm +++ b/simd/i386/jquanti-sse2.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_sse2) EXTN(jsimd_convsamp_sse2): + _endbr32 push ebp mov ebp, esp push ebx @@ -121,6 +122,7 @@ EXTN(jsimd_convsamp_sse2): GLOBAL_FUNCTION(jsimd_quantize_sse2) EXTN(jsimd_quantize_sse2): + _endbr32 push ebp mov ebp, esp ; push ebx ; unused diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc index 9930d80..c9babc5 100644 --- a/simd/nasm/jsimdext.inc +++ b/simd/nasm/jsimdext.inc @@ -471,6 +471,14 @@ const_base: %endif +%imacro _endbr32 0 + dd 0xfb1e0ff3 +%endmacro + +%imacro _endbr64 0 + dd 0xfa1e0ff3 +%endmacro + ; -------------------------------------------------------------------------- ; Defines picked up from the C headers ; diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm index 10d2834..878fe70 100644 --- a/simd/x86_64/jccolext-avx2.asm +++ b/simd/x86_64/jccolext-avx2.asm @@ -39,6 +39,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2) EXTN(jsimd_rgb_ycc_convert_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm index 2c914d3..9cc65d0 100644 --- a/simd/x86_64/jccolext-sse2.asm +++ b/simd/x86_64/jccolext-sse2.asm @@ -38,6 +38,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2) EXTN(jsimd_rgb_ycc_convert_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm index 175b60d..ab7dc6d 100644 --- a/simd/x86_64/jcgryext-avx2.asm +++ b/simd/x86_64/jcgryext-avx2.asm @@ -39,6 +39,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2) EXTN(jsimd_rgb_gray_convert_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm index 873be80..7e95b80 100644 --- a/simd/x86_64/jcgryext-sse2.asm +++ b/simd/x86_64/jcgryext-sse2.asm @@ -38,6 +38,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2) EXTN(jsimd_rgb_gray_convert_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm index aa78fd5..cb82975 100644 --- a/simd/x86_64/jchuff-sse2.asm +++ b/simd/x86_64/jchuff-sse2.asm @@ -184,6 +184,7 @@ EXTN(jconst_huff_encode_one_block): GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2) EXTN(jsimd_huff_encode_one_block_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jcphuff-sse2.asm b/simd/x86_64/jcphuff-sse2.asm index 8ed4472..f316b27 100644 --- a/simd/x86_64/jcphuff-sse2.asm +++ b/simd/x86_64/jcphuff-sse2.asm @@ -281,6 +281,7 @@ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2) EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -449,6 +450,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2) EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jcsample-avx2.asm b/simd/x86_64/jcsample-avx2.asm index d9922bb..2c7051e 100644 --- a/simd/x86_64/jcsample-avx2.asm +++ b/simd/x86_64/jcsample-avx2.asm @@ -43,6 +43,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2) EXTN(jsimd_h2v1_downsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2) EXTN(jsimd_h2v2_downsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jcsample-sse2.asm b/simd/x86_64/jcsample-sse2.asm index 0f107e9..c6f3537 100644 --- a/simd/x86_64/jcsample-sse2.asm +++ b/simd/x86_64/jcsample-sse2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2) EXTN(jsimd_h2v1_downsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2) EXTN(jsimd_h2v2_downsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm index 677b8ed..c2f0ed9 100644 --- a/simd/x86_64/jdcolext-avx2.asm +++ b/simd/x86_64/jdcolext-avx2.asm @@ -40,6 +40,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2) EXTN(jsimd_ycc_rgb_convert_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm index 071aa62..f870222 100644 --- a/simd/x86_64/jdcolext-sse2.asm +++ b/simd/x86_64/jdcolext-sse2.asm @@ -39,6 +39,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2) EXTN(jsimd_ycc_rgb_convert_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm index bb733c5..6441f83 100644 --- a/simd/x86_64/jdmrgext-avx2.asm +++ b/simd/x86_64/jdmrgext-avx2.asm @@ -40,6 +40,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2) EXTN(jsimd_h2v1_merged_upsample_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -504,6 +505,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2) EXTN(jsimd_h2v2_merged_upsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm index b176a4c..9190b82 100644 --- a/simd/x86_64/jdmrgext-sse2.asm +++ b/simd/x86_64/jdmrgext-sse2.asm @@ -39,6 +39,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2) EXTN(jsimd_h2v1_merged_upsample_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -446,6 +447,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2) EXTN(jsimd_h2v2_merged_upsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jdsample-avx2.asm b/simd/x86_64/jdsample-avx2.asm index fc274a9..0edc993 100644 --- a/simd/x86_64/jdsample-avx2.asm +++ b/simd/x86_64/jdsample-avx2.asm @@ -60,6 +60,7 @@ PW_EIGHT times 16 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2) EXTN(jsimd_h2v1_fancy_upsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -214,6 +215,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2) EXTN(jsimd_h2v2_fancy_upsample_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -523,6 +525,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2) EXTN(jsimd_h2v1_upsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2) EXTN(jsimd_h2v2_upsample_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jdsample-sse2.asm b/simd/x86_64/jdsample-sse2.asm index 20e0767..b6bfb61 100644 --- a/simd/x86_64/jdsample-sse2.asm +++ b/simd/x86_64/jdsample-sse2.asm @@ -59,6 +59,7 @@ PW_EIGHT times 8 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2) EXTN(jsimd_h2v1_fancy_upsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -201,6 +202,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2) EXTN(jsimd_h2v2_fancy_upsample_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -496,6 +498,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2) EXTN(jsimd_h2v1_upsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2) EXTN(jsimd_h2v2_upsample_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jfdctflt-sse.asm b/simd/x86_64/jfdctflt-sse.asm index ef27966..90d06eb 100644 --- a/simd/x86_64/jfdctflt-sse.asm +++ b/simd/x86_64/jfdctflt-sse.asm @@ -65,6 +65,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643 GLOBAL_FUNCTION(jsimd_fdct_float_sse) EXTN(jsimd_fdct_float_sse): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jfdctfst-sse2.asm b/simd/x86_64/jfdctfst-sse2.asm index 2e1bfe6..5a74aaf 100644 --- a/simd/x86_64/jfdctfst-sse2.asm +++ b/simd/x86_64/jfdctfst-sse2.asm @@ -80,6 +80,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2) EXTN(jsimd_fdct_ifast_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jfdctint-avx2.asm b/simd/x86_64/jfdctint-avx2.asm index 6ad4cf0..df0e720 100644 --- a/simd/x86_64/jfdctint-avx2.asm +++ b/simd/x86_64/jfdctint-avx2.asm @@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_fdct_islow_avx2) EXTN(jsimd_fdct_islow_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jfdctint-sse2.asm b/simd/x86_64/jfdctint-sse2.asm index 5d0de3c..19c192d 100644 --- a/simd/x86_64/jfdctint-sse2.asm +++ b/simd/x86_64/jfdctint-sse2.asm @@ -101,6 +101,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1) GLOBAL_FUNCTION(jsimd_fdct_islow_sse2) EXTN(jsimd_fdct_islow_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm index ab95e1a..bf7d1b4 100644 --- a/simd/x86_64/jidctflt-sse2.asm +++ b/simd/x86_64/jidctflt-sse2.asm @@ -75,6 +75,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_float_sse2) EXTN(jsimd_idct_float_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm index a66a681..0614f19 100644 --- a/simd/x86_64/jidctfst-sse2.asm +++ b/simd/x86_64/jidctfst-sse2.asm @@ -94,6 +94,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_ifast_sse2) EXTN(jsimd_idct_ifast_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm index 50270f4..bc2dd4c 100644 --- a/simd/x86_64/jidctint-avx2.asm +++ b/simd/x86_64/jidctint-avx2.asm @@ -281,6 +281,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_idct_islow_avx2) EXTN(jsimd_idct_islow_avx2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp mov rbp, rsp ; rbp = aligned rbp diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm index 034530c..6212d1d 100644 --- a/simd/x86_64/jidctint-sse2.asm +++ b/simd/x86_64/jidctint-sse2.asm @@ -107,6 +107,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_islow_sse2) EXTN(jsimd_idct_islow_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm index 7fbfcc5..231e7c5 100644 --- a/simd/x86_64/jidctred-sse2.asm +++ b/simd/x86_64/jidctred-sse2.asm @@ -115,6 +115,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_4x4_sse2) EXTN(jsimd_idct_4x4_sse2): + _endbr64 push rbp mov rax, rsp ; rax = original rbp sub rsp, byte 4 @@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2): GLOBAL_FUNCTION(jsimd_idct_2x2_sse2) EXTN(jsimd_idct_2x2_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jquantf-sse2.asm b/simd/x86_64/jquantf-sse2.asm index 83596a9..0f139ed 100644 --- a/simd/x86_64/jquantf-sse2.asm +++ b/simd/x86_64/jquantf-sse2.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_float_sse2) EXTN(jsimd_convsamp_float_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2): GLOBAL_FUNCTION(jsimd_quantize_float_sse2) EXTN(jsimd_quantize_float_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jquanti-avx2.asm b/simd/x86_64/jquanti-avx2.asm index 5f04d22..7e042f4 100644 --- a/simd/x86_64/jquanti-avx2.asm +++ b/simd/x86_64/jquanti-avx2.asm @@ -37,6 +37,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_avx2) EXTN(jsimd_convsamp_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2): GLOBAL_FUNCTION(jsimd_quantize_avx2) EXTN(jsimd_quantize_avx2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp diff --git a/simd/x86_64/jquanti-sse2.asm b/simd/x86_64/jquanti-sse2.asm index bb6fa69..0a729c7 100644 --- a/simd/x86_64/jquanti-sse2.asm +++ b/simd/x86_64/jquanti-sse2.asm @@ -36,6 +36,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_sse2) EXTN(jsimd_convsamp_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_sse2): GLOBAL_FUNCTION(jsimd_quantize_sse2) EXTN(jsimd_quantize_sse2): + _endbr64 push rbp mov rax, rsp mov rbp, rsp -- 2.21.1