Add fix for ppc64le code generation issue found after 5.2.0 was released

This commit is contained in:
Richard W.M. Jones 2024-06-19 08:01:03 +01:00
parent 7f5edb92c0
commit 13ff7d0244
8 changed files with 360 additions and 6 deletions

View File

@ -1,7 +1,7 @@
From 5538fa66e94fad3d2b4f110d23bef3b4d2d6342c Mon Sep 17 00:00:00 2001
From: Florian Angeletti <florian.angeletti@inria.fr>
Date: Mon, 13 May 2024 11:39:37 +0200
Subject: [PATCH 1/5] Changes: synchronisation and consistency with trunk
Subject: [PATCH 1/7] Changes: synchronisation and consistency with trunk
---
Changes | 25 ++++++++++++++-----------

View File

@ -1,7 +1,7 @@
From 7a20c9322f827923baa6a9907998f670463ce447 Mon Sep 17 00:00:00 2001
From: Florian Angeletti <florian.angeletti@inria.fr>
Date: Mon, 13 May 2024 14:28:08 +0200
Subject: [PATCH 2/5] Changes copy-editing
Subject: [PATCH 2/7] Changes copy-editing
---
Changes | 398 ++++++++++++++++++++++++++++----------------------------

View File

@ -1,7 +1,7 @@
From 507a1382cb82160c2a6cfc0ea5bcb3e33ece7307 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 24 Jun 2014 10:00:15 +0100
Subject: [PATCH 3/5] Don't add rpaths to libraries.
Subject: [PATCH 3/7] Don't add rpaths to libraries.
---
configure.ac | 2 --

View File

@ -1,7 +1,7 @@
From edd903fc73b98eb784b307a47110985967cb1d09 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 29 May 2012 20:44:18 +0100
Subject: [PATCH 4/5] configure: Allow user defined C compiler flags.
Subject: [PATCH 4/7] configure: Allow user defined C compiler flags.
---
configure.ac | 8 ++++++--

View File

@ -1,7 +1,7 @@
From acdc441ff1acb5390467e649bc9a9bfddd7df774 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Thu, 9 May 2024 10:03:23 +0200
Subject: [PATCH 5/5] flambda: Improve transitive closure in
Subject: [PATCH 5/7] flambda: Improve transitive closure in
invariant_params_in_recursion (#13150)
The old implementation did not really exploit the sparseness of the

View File

@ -0,0 +1,165 @@
From 8637cac022907501c3c0d941e07e436b70c9d4ac Mon Sep 17 00:00:00 2001
From: Miod Vallat <118974489+dustanddreams@users.noreply.github.com>
Date: Thu, 30 May 2024 09:57:41 +0000
Subject: [PATCH 6/7] Reload exception pointer register in caml_c_call*
The invoked code may end up causing caml_try_realloc_stack() to be invoked,
which in turn may replace the stack TRAP_PTR points to, leading to
either crashes with the debug runtime (thanks to the old stack contents
being overwritten) or all kinds of memory or control flow corruption otherwise.
Added test for stack reallocation in callback followed by exception raising.
(cherry picked from commit 6964d3a90f84402ed6066fb1821679435e063067)
(cherry picked from commit 1e8a91d305f1fa4668444fb1cce97952dbc39810)
---
Changes | 9 +++++++++
runtime/arm64.S | 6 ++++--
runtime/power.S | 6 ++++--
runtime/riscv.S | 6 ++++--
runtime/s390x.S | 5 +++--
testsuite/tests/callback/test1.ml | 5 +++++
6 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/Changes b/Changes
index d26512067d..53bb5369b9 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,12 @@
+OCaml 5.2 maintenance version
+-----------------------------
+
+- #13207: Be sure to reload the register caching the exception handler in
+ caml_c_call and caml_c_call_stack_args, as its value may have been changed
+ if the OCaml stack is expanded during a callback.
+ (Miod Vallat, report by Vesa Karvonen, review by Gabriel Scherer and
+ Xavier Leroy)
+
OCaml 5.2.0 (13 May 2024)
-------------------------
diff --git a/runtime/arm64.S b/runtime/arm64.S
index e71f25ebba..6c6495a0a8 100644
--- a/runtime/arm64.S
+++ b/runtime/arm64.S
@@ -569,8 +569,9 @@ FUNCTION(caml_c_call)
str TRAP_PTR, Caml_state(exn_handler)
/* Call the function */
blr ADDITIONAL_ARG
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
ldr ALLOC_PTR, Caml_state(young_ptr)
+ ldr TRAP_PTR, Caml_state(exn_handler)
/* Load ocaml stack */
SWITCH_C_TO_OCAML
#if defined(WITH_THREAD_SANITIZER)
@@ -625,8 +626,9 @@ FUNCTION(caml_c_call_stack_args)
blr ADDITIONAL_ARG
/* Restore stack */
mov sp, x19
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
ldr ALLOC_PTR, Caml_state(young_ptr)
+ ldr TRAP_PTR, Caml_state(exn_handler)
/* Switch from C to OCaml */
SWITCH_C_TO_OCAML
/* Return */
diff --git a/runtime/power.S b/runtime/power.S
index bfb37fa989..257678100e 100644
--- a/runtime/power.S
+++ b/runtime/power.S
@@ -445,8 +445,9 @@ FUNCTION caml_c_call
mr 2, C_CALL_TOC /* restore current TOC */
/* Restore return address (in register C_CALL_RET_ADDR, preserved by C) */
mtlr C_CALL_RET_ADDR
- /* Reload allocation pointer*/
+ /* Reload new allocation pointer and exception pointer */
ld ALLOC_PTR, Caml_state(young_ptr)
+ ld TRAP_PTR, Caml_state(exn_handler)
#if defined(WITH_THREAD_SANITIZER)
TSAN_SETUP_C_CALL 16
/* Save return value registers. Since the called function could be anything,
@@ -497,8 +498,9 @@ FUNCTION caml_c_call_stack_args
add SP, SP, STACK_ARG_BYTES
/* Restore return address (in register C_CALL_RET_ADDR, preserved by C) */
mtlr C_CALL_RET_ADDR
- /* Reload allocation pointer*/
+ /* Reload new allocation pointer and exception pointer */
ld ALLOC_PTR, Caml_state(young_ptr)
+ ld TRAP_PTR, Caml_state(exn_handler)
/* Switch from C to OCaml */
SWITCH_C_TO_OCAML
/* Return to caller */
diff --git a/runtime/riscv.S b/runtime/riscv.S
index a2eca7a315..8934db0bb3 100644
--- a/runtime/riscv.S
+++ b/runtime/riscv.S
@@ -516,8 +516,9 @@ L(caml_c_call):
sd TRAP_PTR, Caml_state(exn_handler)
/* Call the function */
jalr ADDITIONAL_ARG
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
ld ALLOC_PTR, Caml_state(young_ptr)
+ ld TRAP_PTR, Caml_state(exn_handler)
/* Load ocaml stack */
SWITCH_C_TO_OCAML
#if defined(WITH_THREAD_SANITIZER)
@@ -575,8 +576,9 @@ FUNCTION(caml_c_call_stack_args)
jalr ADDITIONAL_ARG
/* Restore stack */
mv sp, s2
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
ld ALLOC_PTR, Caml_state(young_ptr)
+ ld TRAP_PTR, Caml_state(exn_handler)
/* Switch from C to OCaml */
SWITCH_C_TO_OCAML
/* Return */
diff --git a/runtime/s390x.S b/runtime/s390x.S
index b59822ce57..113831a376 100644
--- a/runtime/s390x.S
+++ b/runtime/s390x.S
@@ -515,7 +515,7 @@ LBL(caml_c_call):
#endif
basr %r14, ADDITIONAL_ARG
CLEANUP_AFTER_C_CALL
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
lg ALLOC_PTR, Caml_state(young_ptr)
lg TRAP_PTR, Caml_state(exn_handler)
/* Load ocaml stack and restore global variables */
@@ -584,8 +584,9 @@ LBL(106):
CLEANUP_AFTER_C_CALL
/* Restore stack */
lgr %r15, %r12
- /* Reload alloc ptr */
+ /* Reload new allocation pointer & exn handler */
lg ALLOC_PTR, Caml_state(young_ptr)
+ lg TRAP_PTR, Caml_state(exn_handler)
/* Switch from C to OCaml */
SWITCH_C_TO_OCAML
/* Return */
diff --git a/testsuite/tests/callback/test1.ml b/testsuite/tests/callback/test1.ml
index c39be0c586..f6ad4356cf 100644
--- a/testsuite/tests/callback/test1.ml
+++ b/testsuite/tests/callback/test1.ml
@@ -11,6 +11,9 @@ external mycallback3 : ('a -> 'b -> 'c -> 'd) -> 'a -> 'b -> 'c -> 'd
external mycallback4 :
('a -> 'b -> 'c -> 'd -> 'e) -> 'a -> 'b -> 'c -> 'd -> 'e = "mycallback4"
+let rec growstack n =
+ if n <= 0 then 0 else 1 + growstack (n - 1)
+
let rec tak (x, y, z as _tuple) =
if x > y then tak(tak (x-1, y, z), tak (y-1, z, x), tak (z-1, x, y))
else z
@@ -46,3 +49,5 @@ let _ =
print_int(trapexit ()); print_newline();
print_string(tripwire mypushroot); print_newline();
print_string(tripwire mycamlparam); print_newline();
+ begin try ignore (mycallback1 growstack 1_000); raise Exit
+ with Exit -> () end
--
2.44.0

View File

@ -0,0 +1,178 @@
From 4eb80b13779125fcd76a445ab0004ca064fab634 Mon Sep 17 00:00:00 2001
From: Miod Vallat <miod@tarides.com>
Date: Fri, 7 Jun 2024 06:19:45 +0000
Subject: [PATCH 7/7] Compute more accurate instruction sizes for branch
relaxation.
(cherry picked from commit 114ddae2d4c85391a4f939dc6623424ae35a07aa)
---
Changes | 4 ++
asmcomp/power/emit.mlp | 87 ++++++++++++++++++++++++------------------
2 files changed, 53 insertions(+), 38 deletions(-)
diff --git a/Changes b/Changes
index 53bb5369b9..1a81509247 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,10 @@ OCaml 5.2 maintenance version
(Miod Vallat, report by Vesa Karvonen, review by Gabriel Scherer and
Xavier Leroy)
+- #13221: Compute more accurate instruction sizes for branch relocation on
+ POWER.
+ (Miod Vallat, review by Gabriel Scherer)
+
OCaml 5.2.0 (13 May 2024)
-------------------------
diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp
index 47f5419a92..fdf22996fa 100644
--- a/asmcomp/power/emit.mlp
+++ b/asmcomp/power/emit.mlp
@@ -177,6 +177,28 @@ let emit_tocload emit_dest dest entry =
(* Output a load or store operation *)
+let load_mnemonic = function
+ | Byte_unsigned -> "lbz"
+ | Byte_signed -> "lbz"
+ | Sixteen_unsigned -> "lhz"
+ | Sixteen_signed -> "lha"
+ | Thirtytwo_unsigned -> "lwz"
+ | Thirtytwo_signed -> "lwa"
+ | Word_int | Word_val -> "ld"
+ | Single -> "lfs"
+ | Double -> "lfd"
+
+let store_mnemonic = function
+ | Byte_unsigned | Byte_signed -> "stb"
+ | Sixteen_unsigned | Sixteen_signed -> "sth"
+ | Thirtytwo_unsigned | Thirtytwo_signed -> "stw"
+ | Word_int | Word_val -> "std"
+ | Single -> "stfs"
+ | Double -> "stfd"
+
+let store_needs_lwsync chunk assignment =
+ assignment && (chunk = Word_int || chunk = Word_val)
+
let valid_offset instr ofs =
ofs land 3 = 0 || (instr <> "ld" && instr <> "std" && instr <> "lwa")
@@ -383,11 +405,17 @@ module BR = Branch_relaxation.Make (struct
let tocload_size = 2
- let load_store_size = function
+ let load_store_size instr = function
| Ibased(_s, d) ->
- let (_lo, hi) = low_high_s d in
- tocload_size + (if hi = 0 then 1 else 2)
- | Iindexed ofs -> if is_immediate ofs then 1 else 3
+ let (lo, hi) = low_high_s d in
+ tocload_size +
+ (if hi <> 0 then 1 else 0) +
+ (if valid_offset instr lo then 1 else 2)
+ | Iindexed ofs ->
+ if is_immediate ofs && valid_offset instr ofs then 1 else begin
+ let (lo, _hi) = low_high_u ofs in
+ if lo <> 0 then 3 else 2
+ end
| Iindexed2 -> 1
let instr_size f = function
@@ -415,16 +443,16 @@ module BR = Branch_relaxation.Make (struct
else if alloc then tocload_size + 2
else 5
| Lop(Istackoffset _) -> 1
- | Lop(Iload {memory_chunk; addressing_mode; _ }) ->
- if memory_chunk = Byte_signed
- then load_store_size addressing_mode + 1
- else load_store_size addressing_mode
+ | Lop(Iload {memory_chunk; addressing_mode; is_atomic }) ->
+ let loadinstr = load_mnemonic memory_chunk in
+ (if is_atomic then 4 else 0) +
+ (if memory_chunk = Byte_signed then 1 else 0) +
+ load_store_size loadinstr addressing_mode
| Lop(Istore(chunk, addr, assignment)) ->
- (match chunk with
- | Single -> 1
- | Word_int | Word_val when assignment -> 1
- | _ -> 0)
- + load_store_size addr
+ let storeinstr = store_mnemonic chunk in
+ (if chunk = Single then 1 else 0) +
+ (if store_needs_lwsync chunk assignment then 1 else 0) +
+ load_store_size storeinstr addr
| Lop(Ialloc _) -> 5
| Lop(Ispecific(Ialloc_far _)) -> 6
| Lop(Ipoll { return_label = Some(_) }) -> 5
@@ -442,12 +470,12 @@ module BR = Branch_relaxation.Make (struct
| Lop(Ispecific(Icheckbound_imm_far _)) -> 3
| Lop(Iintop_imm _) -> 1
| Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1
- | Lop(Ifloatofint) -> 9
- | Lop(Iintoffloat) -> 4
+ | Lop(Ifloatofint) -> 3
+ | Lop(Iintoffloat) -> 3
| Lop(Iopaque) -> 0
| Lop(Ispecific _) -> 1
- | Lop (Idls_get) -> 1
- | Lop (Ireturn_addr) -> 1
+ | Lop(Idls_get) -> 1
+ | Lop(Ireturn_addr) -> 1
| Lreloadretaddr -> 2
| Lreturn -> 2
| Llabel _ -> 0
@@ -457,7 +485,7 @@ module BR = Branch_relaxation.Make (struct
1 + (if lbl0 = None then 0 else 1)
+ (if lbl1 = None then 0 else 1)
+ (if lbl2 = None then 0 else 1)
- | Lswitch _ -> 5 + tocload_size
+ | Lswitch _ -> 7 + tocload_size
| Lentertrap -> 1
| Ladjust_trap_depth _ -> 0
| Lpushtrap _ -> 4 + tocload_size
@@ -705,17 +733,7 @@ let emit_instr env i =
` addi 1, 1, {emit_int (-n)}\n`;
adjust_stack_offset env n
| Lop(Iload { memory_chunk; addressing_mode; is_atomic }) ->
- let loadinstr =
- match memory_chunk with
- | Byte_unsigned -> "lbz"
- | Byte_signed -> "lbz"
- | Sixteen_unsigned -> "lhz"
- | Sixteen_signed -> "lha"
- | Thirtytwo_unsigned -> "lwz"
- | Thirtytwo_signed -> "lwa"
- | Word_int | Word_val -> "ld"
- | Single -> "lfs"
- | Double -> "lfd" in
+ let loadinstr = load_mnemonic memory_chunk in
if is_atomic then
` sync\n`;
emit_load_store loadinstr addressing_mode i.arg 0 i.res.(0);
@@ -731,19 +749,12 @@ let emit_instr env i =
` frsp {emit_reg tmp}, {emit_reg i.arg.(0)}\n`;
emit_load_store "stfs" addr i.arg 1 tmp
| Lop(Istore(chunk, addr, assignment)) ->
- let storeinstr =
- match chunk with
- | Byte_unsigned | Byte_signed -> "stb"
- | Sixteen_unsigned | Sixteen_signed -> "sth"
- | Thirtytwo_unsigned | Thirtytwo_signed -> "stw"
- | Word_int | Word_val -> "std"
- | Single -> assert false
- | Double -> "stfd" in
+ let storeinstr = store_mnemonic chunk in
(* Non-initializing stores need a memory barrier to follow the
Multicore OCaml memory model. Stores of size other than
Word_int and Word_val do not follow the memory model and therefore
do not need a barrier *)
- if assignment && (chunk = Word_int || chunk = Word_val) then
+ if store_needs_lwsync chunk assignment then
` lwsync\n`;
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc { bytes; dbginfo }) ->
--
2.44.0

View File

@ -44,7 +44,7 @@ ExcludeArch: %{ix86}
Name: ocaml
Version: 5.2.0
Release: 1%{?dist}
Release: 2%{?dist}
Summary: OCaml compiler and programming environment
@ -86,6 +86,14 @@ Patch: 0004-configure-Allow-user-defined-C-compiler-flags.patch
# https://github.com/ocaml/ocaml/pull/13150
Patch: 0005-flambda-Improve-transitive-closure-in-invariant_para.patch
# Upstream after 5.2.0:
Patch: 0006-Reload-exception-pointer-register-in-caml_c_call.patch
# Fix for ppc64le code generation issue found after 5.2.0 was released.
# https://github.com/ocaml/ocaml/issues/13220
# https://github.com/ocaml/ocaml/commit/114ddae2d4c85391a4f939dc6623424ae35a07aa
Patch: 0007-Compute-more-accurate-instruction-sizes-for-branch-r.patch
BuildRequires: make
BuildRequires: git
BuildRequires: gcc
@ -472,6 +480,9 @@ hardlink -t $RPM_BUILD_ROOT%{_libdir}/ocaml/stublibs
%changelog
* Wed Jun 19 2024 Richard W.M. Jones <rjones@redhat.com> - 5.2.0-2
- Add fix for ppc64le code generation issue found after 5.2.0 was released
* Thu May 23 2024 Jerry James <loganjerry@gmail.com> - 5.2.0-1
- New upstream version 5.2.0 (RHBZ#2269805)
- Drop upstreamed frame pointer and s390x patches