ocaml/0007-Compute-more-accurate-instruction-sizes-for-branch-r.patch

179 lines
6.3 KiB
Diff

From 4eb80b13779125fcd76a445ab0004ca064fab634 Mon Sep 17 00:00:00 2001
From: Miod Vallat <miod@tarides.com>
Date: Fri, 7 Jun 2024 06:19:45 +0000
Subject: [PATCH 7/7] Compute more accurate instruction sizes for branch
relaxation.
(cherry picked from commit 114ddae2d4c85391a4f939dc6623424ae35a07aa)
---
Changes | 4 ++
asmcomp/power/emit.mlp | 87 ++++++++++++++++++++++++------------------
2 files changed, 53 insertions(+), 38 deletions(-)
diff --git a/Changes b/Changes
index 53bb5369b9..1a81509247 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,10 @@ OCaml 5.2 maintenance version
(Miod Vallat, report by Vesa Karvonen, review by Gabriel Scherer and
Xavier Leroy)
+- #13221: Compute more accurate instruction sizes for branch relocation on
+ POWER.
+ (Miod Vallat, review by Gabriel Scherer)
+
OCaml 5.2.0 (13 May 2024)
-------------------------
diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp
index 47f5419a92..fdf22996fa 100644
--- a/asmcomp/power/emit.mlp
+++ b/asmcomp/power/emit.mlp
@@ -177,6 +177,28 @@ let emit_tocload emit_dest dest entry =
(* Output a load or store operation *)
+let load_mnemonic = function
+ | Byte_unsigned -> "lbz"
+ | Byte_signed -> "lbz"
+ | Sixteen_unsigned -> "lhz"
+ | Sixteen_signed -> "lha"
+ | Thirtytwo_unsigned -> "lwz"
+ | Thirtytwo_signed -> "lwa"
+ | Word_int | Word_val -> "ld"
+ | Single -> "lfs"
+ | Double -> "lfd"
+
+let store_mnemonic = function
+ | Byte_unsigned | Byte_signed -> "stb"
+ | Sixteen_unsigned | Sixteen_signed -> "sth"
+ | Thirtytwo_unsigned | Thirtytwo_signed -> "stw"
+ | Word_int | Word_val -> "std"
+ | Single -> "stfs"
+ | Double -> "stfd"
+
+let store_needs_lwsync chunk assignment =
+ assignment && (chunk = Word_int || chunk = Word_val)
+
let valid_offset instr ofs =
ofs land 3 = 0 || (instr <> "ld" && instr <> "std" && instr <> "lwa")
@@ -383,11 +405,17 @@ module BR = Branch_relaxation.Make (struct
let tocload_size = 2
- let load_store_size = function
+ let load_store_size instr = function
| Ibased(_s, d) ->
- let (_lo, hi) = low_high_s d in
- tocload_size + (if hi = 0 then 1 else 2)
- | Iindexed ofs -> if is_immediate ofs then 1 else 3
+ let (lo, hi) = low_high_s d in
+ tocload_size +
+ (if hi <> 0 then 1 else 0) +
+ (if valid_offset instr lo then 1 else 2)
+ | Iindexed ofs ->
+ if is_immediate ofs && valid_offset instr ofs then 1 else begin
+ let (lo, _hi) = low_high_u ofs in
+ if lo <> 0 then 3 else 2
+ end
| Iindexed2 -> 1
let instr_size f = function
@@ -415,16 +443,16 @@ module BR = Branch_relaxation.Make (struct
else if alloc then tocload_size + 2
else 5
| Lop(Istackoffset _) -> 1
- | Lop(Iload {memory_chunk; addressing_mode; _ }) ->
- if memory_chunk = Byte_signed
- then load_store_size addressing_mode + 1
- else load_store_size addressing_mode
+ | Lop(Iload {memory_chunk; addressing_mode; is_atomic }) ->
+ let loadinstr = load_mnemonic memory_chunk in
+ (if is_atomic then 4 else 0) +
+ (if memory_chunk = Byte_signed then 1 else 0) +
+ load_store_size loadinstr addressing_mode
| Lop(Istore(chunk, addr, assignment)) ->
- (match chunk with
- | Single -> 1
- | Word_int | Word_val when assignment -> 1
- | _ -> 0)
- + load_store_size addr
+ let storeinstr = store_mnemonic chunk in
+ (if chunk = Single then 1 else 0) +
+ (if store_needs_lwsync chunk assignment then 1 else 0) +
+ load_store_size storeinstr addr
| Lop(Ialloc _) -> 5
| Lop(Ispecific(Ialloc_far _)) -> 6
| Lop(Ipoll { return_label = Some(_) }) -> 5
@@ -442,12 +470,12 @@ module BR = Branch_relaxation.Make (struct
| Lop(Ispecific(Icheckbound_imm_far _)) -> 3
| Lop(Iintop_imm _) -> 1
| Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1
- | Lop(Ifloatofint) -> 9
- | Lop(Iintoffloat) -> 4
+ | Lop(Ifloatofint) -> 3
+ | Lop(Iintoffloat) -> 3
| Lop(Iopaque) -> 0
| Lop(Ispecific _) -> 1
- | Lop (Idls_get) -> 1
- | Lop (Ireturn_addr) -> 1
+ | Lop(Idls_get) -> 1
+ | Lop(Ireturn_addr) -> 1
| Lreloadretaddr -> 2
| Lreturn -> 2
| Llabel _ -> 0
@@ -457,7 +485,7 @@ module BR = Branch_relaxation.Make (struct
1 + (if lbl0 = None then 0 else 1)
+ (if lbl1 = None then 0 else 1)
+ (if lbl2 = None then 0 else 1)
- | Lswitch _ -> 5 + tocload_size
+ | Lswitch _ -> 7 + tocload_size
| Lentertrap -> 1
| Ladjust_trap_depth _ -> 0
| Lpushtrap _ -> 4 + tocload_size
@@ -705,17 +733,7 @@ let emit_instr env i =
` addi 1, 1, {emit_int (-n)}\n`;
adjust_stack_offset env n
| Lop(Iload { memory_chunk; addressing_mode; is_atomic }) ->
- let loadinstr =
- match memory_chunk with
- | Byte_unsigned -> "lbz"
- | Byte_signed -> "lbz"
- | Sixteen_unsigned -> "lhz"
- | Sixteen_signed -> "lha"
- | Thirtytwo_unsigned -> "lwz"
- | Thirtytwo_signed -> "lwa"
- | Word_int | Word_val -> "ld"
- | Single -> "lfs"
- | Double -> "lfd" in
+ let loadinstr = load_mnemonic memory_chunk in
if is_atomic then
` sync\n`;
emit_load_store loadinstr addressing_mode i.arg 0 i.res.(0);
@@ -731,19 +749,12 @@ let emit_instr env i =
` frsp {emit_reg tmp}, {emit_reg i.arg.(0)}\n`;
emit_load_store "stfs" addr i.arg 1 tmp
| Lop(Istore(chunk, addr, assignment)) ->
- let storeinstr =
- match chunk with
- | Byte_unsigned | Byte_signed -> "stb"
- | Sixteen_unsigned | Sixteen_signed -> "sth"
- | Thirtytwo_unsigned | Thirtytwo_signed -> "stw"
- | Word_int | Word_val -> "std"
- | Single -> assert false
- | Double -> "stfd" in
+ let storeinstr = store_mnemonic chunk in
(* Non-initializing stores need a memory barrier to follow the
Multicore OCaml memory model. Stores of size other than
Word_int and Word_val do not follow the memory model and therefore
do not need a barrier *)
- if assignment && (chunk = Word_int || chunk = Word_val) then
+ if store_needs_lwsync chunk assignment then
` lwsync\n`;
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc { bytes; dbginfo }) ->
--
2.44.0