ocaml/0007-New-ARM-backend-written-by-Benedikt-Meurer-PR-5433.patch

3108 lines
116 KiB
Diff
Raw Normal View History

From 22fadc3ed91cb380f7303e8a83ff5806d4576cb5 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 29 May 2012 20:50:42 +0100
Subject: [PATCH] New ARM backend, written by Benedikt Meurer (PR#5433).
Backported from upstream sources to 3.12.1 by RWMJ.
Includes svn rev 12548 to fix invalid generation of Thumb-2 branch
instruction TBH (upstream PR#5623, RHBZ#821153).
---
asmcomp/amd64/selection.ml | 14 +-
asmcomp/arm/arch.ml | 152 +++++++-
asmcomp/arm/emit.mlp | 857 ++++++++++++++++++++++++++++--------------
asmcomp/arm/proc.ml | 185 ++++++---
asmcomp/arm/reload.ml | 4 +-
asmcomp/arm/scheduling.ml | 80 ++--
asmcomp/arm/selection.ml | 343 ++++++++++-------
asmcomp/i386/selection.ml | 14 +-
asmcomp/power/selection.ml | 2 +-
asmcomp/power64/selection.ml | 2 +-
asmcomp/selectgen.ml | 13 +-
asmcomp/selectgen.mli | 2 +-
asmcomp/sparc/selection.ml | 2 +-
asmrun/arm.S | 544 ++++++++++++++++-----------
asmrun/signals_osdep.h | 2 +-
configure | 11 +-
16 files changed, 1485 insertions(+), 742 deletions(-)
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index f0546cf..5d9f6fa 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -121,7 +121,7 @@ method is_immediate n = n <= 0x7FFFFFFF && n >= -0x80000000
method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
-method select_addressing exp =
+method select_addressing chunk exp =
let (a, d) = select_addr exp in
(* PR#4625: displacement must be a signed 32-bit immediate *)
if d < -0x8000_0000 || d > 0x7FFF_FFFF
@@ -157,7 +157,7 @@ method! select_operation op args =
match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
- begin match self#select_addressing (Cop(op, args)) with
+ begin match self#select_addressing Word (Cop(op, args)) with
(Iindexed d, _) -> super#select_operation op args
| (Iindexed2 0, _) -> super#select_operation op args
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
@@ -191,7 +191,7 @@ method! select_operation op args =
begin match args with
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
when loc = loc' && self#is_immediate n ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ioffset_loc(n, addr)), [arg])
| _ ->
super#select_operation op args
@@ -202,12 +202,12 @@ method! select_operation op args =
method select_floatarith commutative regular_op mem_op args =
match args with
- [arg1; Cop(Cload (Double|Double_u), [loc2])] ->
- let (addr, arg2) = self#select_addressing loc2 in
+ [arg1; Cop(Cload (Double|Double_u as chunk), [loc2])] ->
+ let (addr, arg2) = self#select_addressing chunk loc2 in
(Ispecific(Ifloatarithmem(mem_op, addr)),
[arg1; arg2])
- | [Cop(Cload (Double|Double_u), [loc1]); arg2] when commutative ->
- let (addr, arg1) = self#select_addressing loc1 in
+ | [Cop(Cload (Double|Double_u as chunk), [loc1]); arg2] when commutative ->
+ let (addr, arg1) = self#select_addressing chunk loc1 in
(Ispecific(Ifloatarithmem(mem_op, addr)),
[arg2; arg1])
| [arg1; arg2] ->
diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml
index 998fa4b..c4aca8d 100644
--- a/asmcomp/arm/arch.ml
+++ b/asmcomp/arm/arch.ml
@@ -1,25 +1,98 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: arch.ml 5304 2002-11-29 15:03:37Z xleroy $ *)
+(* $Id$ *)
(* Specific operations for the ARM processor *)
open Misc
open Format
+type abi = EABI | EABI_VFP
+type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7
+type fpu = Soft | VFPv3_D16 | VFPv3
+
+let abi =
+ match Config.system with
+ "linux_eabi" -> EABI
+ | "linux_eabihf" -> EABI_VFP
+ | _ -> assert false
+
+let string_of_arch = function
+ ARMv4 -> "armv4"
+ | ARMv5 -> "armv5"
+ | ARMv5TE -> "armv5te"
+ | ARMv6 -> "armv6"
+ | ARMv6T2 -> "armv6t2"
+ | ARMv7 -> "armv7"
+
+let string_of_fpu = function
+ Soft -> "soft"
+ | VFPv3_D16 -> "vfpv3-d16"
+ | VFPv3 -> "vfpv3"
+
(* Machine-specific command-line options *)
-let command_line_options = []
+let (arch, fpu, thumb) =
+ let (def_arch, def_fpu, def_thumb) =
+ begin match abi, Config.model with
+ (* Defaults for architecture, FPU and Thumb *)
+ EABI, "armv5" -> ARMv5, Soft, false
+ | EABI, "armv5te" -> ARMv5TE, Soft, false
+ | EABI, "armv6" -> ARMv6, Soft, false
+ | EABI, "armv6t2" -> ARMv6T2, Soft, false
+ | EABI, "armv7" -> ARMv7, Soft, false
+ | EABI, _ -> ARMv4, Soft, false
+ | EABI_VFP, _ -> ARMv7, VFPv3_D16, true
+ end in
+ (ref def_arch, ref def_fpu, ref def_thumb)
+
+let pic_code = ref false
+
+let farch spec =
+ arch := (match spec with
+ "armv4" when abi <> EABI_VFP -> ARMv4
+ | "armv5" when abi <> EABI_VFP -> ARMv5
+ | "armv5te" when abi <> EABI_VFP -> ARMv5TE
+ | "armv6" when abi <> EABI_VFP -> ARMv6
+ | "armv6t2" when abi <> EABI_VFP -> ARMv6T2
+ | "armv7" -> ARMv7
+ | spec -> raise (Arg.Bad spec))
+
+let ffpu spec =
+ fpu := (match spec with
+ "soft" when abi <> EABI_VFP -> Soft
+ | "vfpv3-d16" when abi = EABI_VFP -> VFPv3_D16
+ | "vfpv3" when abi = EABI_VFP -> VFPv3
+ | spec -> raise (Arg.Bad spec))
+
+let command_line_options =
+ [ "-farch", Arg.String farch,
+ "<arch> Select the ARM target architecture"
+ ^ " (default: " ^ (string_of_arch !arch) ^ ")";
+ "-ffpu", Arg.String ffpu,
+ "<fpu> Select the floating-point hardware"
+ ^ " (default: " ^ (string_of_fpu !fpu) ^ ")";
+ "-fPIC", Arg.Set pic_code,
+ " Generate position-independent machine code";
+ "-fno-PIC", Arg.Clear pic_code,
+ " Generate position-dependent machine code";
+ "-fthumb", Arg.Set thumb,
+ " Enable Thumb/Thumb-2 code generation"
+ ^ (if !thumb then " (default)" else "");
+ "-fno-thumb", Arg.Clear thumb,
+ " Disable Thumb/Thumb-2 code generation"
+ ^ (if not !thumb then " (default" else "")]
(* Addressing modes *)
@@ -37,6 +110,14 @@ type specific_operation =
Ishiftarith of arith_operation * int
| Ishiftcheckbound of int
| Irevsubimm of int
+ | Imuladd (* multiply and add *)
+ | Imulsub (* multiply and subtract *)
+ | Inegmulf (* floating-point negate and multiply *)
+ | Imuladdf (* floating-point multiply and add *)
+ | Inegmuladdf (* floating-point negate, multiply and add *)
+ | Imulsubf (* floating-point multiply and subtract *)
+ | Inegmulsubf (* floating-point negate, multiply and subtract *)
+ | Isqrtf (* floating-point square root *)
and arith_operation =
Ishiftadd
@@ -51,6 +132,10 @@ let size_addr = 4
let size_int = 4
let size_float = 8
+(* Behavior of division *)
+
+let division_crashes_on_overflow = false
+
(* Operations on addressing modes *)
let identity_addressing = Iindexed 0
@@ -84,3 +169,56 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
| Irevsubimm n ->
fprintf ppf "%i %s %a" n "-" printreg arg.(0)
+ | Imuladd ->
+ fprintf ppf "(%a * %a) + %a"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Imulsub ->
+ fprintf ppf "-(%a * %a) + %a"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmulf ->
+ fprintf ppf "-f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ | Imuladdf ->
+ fprintf ppf "%a +f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmuladdf ->
+ fprintf ppf "%a -f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Imulsubf ->
+ fprintf ppf "(-f %a) +f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Inegmulsubf ->
+ fprintf ppf "(-f %a) -f (%a *f %a)"
+ printreg arg.(0)
+ printreg arg.(1)
+ printreg arg.(2)
+ | Isqrtf ->
+ fprintf ppf "sqrtf %a"
+ printreg arg.(0)
+
+(* Recognize immediate operands *)
+
+(* Immediate operands are 8-bit immediate values, zero-extended,
+ and rotated right by 0 ... 30 bits.
+ In Thumb/Thumb-2 mode we utilize 26 ... 30. *)
+
+let is_immediate n =
+ let n = ref n in
+ let s = ref 0 in
+ let m = if !thumb then 24 else 30 in
+ while (!s <= m && Int32.logand !n 0xffl <> !n) do
+ n := Int32.logor (Int32.shift_right_logical !n 2) (Int32.shift_left !n 30);
+ s := !s + 2
+ done;
+ !s <= m
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index a4b2241..f8db396 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -1,16 +1,17 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: emit.mlp 10293 2010-04-22 09:33:18Z xleroy $ *)
+(* $Id$ *)
(* Emission of ARM assembly code *)
@@ -33,16 +34,28 @@ let fastcode_flag = ref true
let emit_label lbl =
emit_string ".L"; emit_int lbl
-(* Output a symbol *)
+let emit_data_label lbl =
+ emit_string ".Ld"; emit_int lbl
+
+(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
+let emit_call s =
+ if !Clflags.dlcode || !pic_code
+ then `bl {emit_symbol s}(PLT)`
+ else `bl {emit_symbol s}`
+
+let emit_jump s =
+ if !Clflags.dlcode || !pic_code
+ then `b {emit_symbol s}(PLT)`
+ else `b {emit_symbol s}`
+
(* Output a pseudo-register *)
-let emit_reg r =
- match r.loc with
- | Reg r -> emit_string (register_name r)
+let emit_reg = function
+ {loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit_arm.emit_reg"
(* Layout of the stack frame *)
@@ -53,14 +66,23 @@ let frame_size () =
let sz =
!stack_offset +
4 * num_stack_slots.(0) +
+ 8 * num_stack_slots.(1) +
+ 8 * num_stack_slots.(2) +
(if !contains_calls then 4 else 0)
in Misc.align sz 8
let slot_offset loc cl =
match loc with
- Incoming n -> frame_size() + n
- | Local n -> !stack_offset + n * 4
- | Outgoing n -> n
+ Incoming n ->
+ assert (n >= 0);
+ frame_size() + n
+ | Local n ->
+ if cl = 0
+ then !stack_offset + n * 4
+ else !stack_offset + num_stack_slots.(0) * 4 + n * 8
+ | Outgoing n ->
+ assert (n >= 0);
+ n
(* Output a stack reference *)
@@ -79,20 +101,13 @@ let emit_addressing addr r n =
(* Record live pointers at call points *)
-type frame_descr =
- { fd_lbl: int; (* Return address *)
- fd_frame_size: int; (* Size of stack frame *)
- fd_live_offset: int list } (* Offsets/regs of live addresses *)
-
-let frame_descriptors = ref([] : frame_descr list)
-
-let record_frame live =
+let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
- live_offset := (r lsl 1) + 1 :: !live_offset
+ live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
@@ -100,18 +115,57 @@ let record_frame live =
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
- fd_live_offset = !live_offset } :: !frame_descriptors;
- `{emit_label lbl}:`
-
-let emit_frame fd =
- ` .word {emit_label fd.fd_lbl} + 4\n`;
- ` .short {emit_int fd.fd_frame_size}\n`;
- ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
- List.iter
- (fun n ->
- ` .short {emit_int n}\n`)
- fd.fd_live_offset;
- ` .align 2\n`
+ fd_live_offset = !live_offset;
+ fd_debuginfo = dbg } :: !frame_descriptors;
+ lbl
+
+let record_frame live dbg =
+ let lbl = record_frame_label live dbg in `{emit_label lbl}:`
+
+(* Record calls to the GC -- we've moved them out of the way *)
+
+type gc_call =
+ { gc_lbl: label; (* Entry label *)
+ gc_return_lbl: label; (* Where to branch after GC *)
+ gc_frame_lbl: label } (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+ `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
+ `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error.
+ In debug mode, we maintain one call to caml_ml_array_bound_error
+ per bound check site. Otherwise, we can share a single call. *)
+
+type bound_error_call =
+ { bd_lbl: label; (* Entry label *)
+ bd_frame_lbl: label } (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+
+let bound_error_label dbg =
+ if !Clflags.debug || !bound_error_sites = [] then begin
+ let lbl_bound_error = new_label() in
+ let lbl_frame = record_frame_label Reg.Set.empty dbg in
+ bound_error_sites :=
+ { bd_lbl = lbl_bound_error;
+ bd_frame_lbl = lbl_frame } :: !bound_error_sites;
+ lbl_bound_error
+ end else begin
+ let bd = List.hd !bound_error_sites in bd.bd_lbl
+ end
+
+let emit_call_bound_error bd =
+ `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
+ `{emit_label bd.bd_frame_lbl}:\n`
+
+(* Negate a comparison *)
+
+let negate_integer_comparison = function
+ Isigned cmp -> Isigned(negate_comparison cmp)
+ | Iunsigned cmp -> Iunsigned(negate_comparison cmp)
(* Names of various instructions *)
@@ -121,22 +175,13 @@ let name_for_comparison = function
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
-let name_for_float_comparison cmp neg =
- match cmp with
- Ceq -> if neg then "ne" else "eq"
- | Cne -> if neg then "eq" else "ne"
- | Cle -> if neg then "hi" else "ls"
- | Cge -> if neg then "lt" else "ge"
- | Clt -> if neg then "pl" else "mi"
- | Cgt -> if neg then "le" else "gt"
-
let name_for_int_operation = function
Iadd -> "add"
| Isub -> "sub"
| Imul -> "mul"
- | Iand -> "and"
- | Ior -> "orr"
- | Ixor -> "eor"
+ | Iand -> "and"
+ | Ior -> "orr"
+ | Ixor -> "eor"
| _ -> assert false
let name_for_shift_operation = function
@@ -145,60 +190,54 @@ let name_for_shift_operation = function
| Iasr -> "asr"
| _ -> assert false
-let name_for_shift_int_operation = function
- Ishiftadd -> "add"
- | Ishiftsub -> "sub"
- | Ishiftsubrev -> "rsb"
-
-(* Recognize immediate operands *)
-
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
- right by 0, 2, 4, ... 30 bits.
- We check only with 8-bit values shifted left 0 to 24 bits. *)
-
-let rec is_immed n shift =
- shift <= 24 &&
- (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n
- || is_immed n (shift + 2))
-
-let is_immediate n = is_immed n 0
-
(* General functional to decompose a non-immediate integer constant
- into 8-bit chunks shifted left 0 ... 24 bits *)
+ into 8-bit chunks shifted left 0 ... 30 bits. *)
let decompose_intconst n fn =
let i = ref n in
let shift = ref 0 in
let ninstr = ref 0 in
- while !i <> 0n do
- if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then
+ while !i <> 0l do
+ if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then
shift := !shift + 2
else begin
- let mask = Nativeint.shift_left 0xFFn !shift in
- let bits = Nativeint.logand !i mask in
- fn bits;
+ let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in
+ i := Int32.sub !i bits;
shift := !shift + 8;
- i := Nativeint.sub !i bits;
- incr ninstr
+ incr ninstr;
+ fn bits
end
done;
!ninstr
(* Load an integer constant into a register *)
-let emit_intconst r n =
- let nr = Nativeint.lognot n in
+let emit_intconst dst n =
+ let nr = Int32.lognot n in
if is_immediate n then begin
- ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1
+ (* Use movs here to enable 16-bit T1 encoding *)
+ ` movs {emit_reg dst}, #{emit_int32 n}\n`; 1
end else if is_immediate nr then begin
- ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1
+ ` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1
+ end else if !arch > ARMv6 then begin
+ let nl = Int32.logand 0xffffl n in
+ let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in
+ if nh = 0l then begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1
+ end else if Int32.logand nl 0xffl = nl then begin
+ ` movs {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end else begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end
end else begin
let first = ref true in
decompose_intconst n
(fun bits ->
if !first
- then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n`
- else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`;
+ then ` mov {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n`
+ else ` add {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`;
first := false)
end
@@ -206,46 +245,105 @@ let emit_intconst r n =
let emit_stack_adjustment instr n =
if n <= 0 then 0 else
- decompose_intconst (Nativeint.of_int n)
+ decompose_intconst (Int32.of_int n)
(fun bits ->
- ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`)
+ ` {emit_string instr} sp, sp, #{emit_int32 bits}\n`)
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
-(* Table of symbols referenced *)
-let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Table of floating-point literals *)
-let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Total space (in word) occupied by pending literals *)
+(* Pending floating-point literals *)
+let float_literals = ref ([] : (string * label) list)
+(* Pending relative references to the global offset table *)
+let gotrel_literals = ref ([] : (label * label) list)
+(* Pending symbol literals *)
+let symbol_literals = ref ([] : (string * label) list)
+(* Total space (in words) occupied by pending literals *)
let num_literals = ref 0
-(* Label a symbol or float constant *)
-let label_constant tbl s size =
+(* Label a floating-point literal *)
+let float_literal f =
try
- Hashtbl.find tbl s
+ List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
- Hashtbl.add tbl s lbl;
- num_literals := !num_literals + size;
+ num_literals := !num_literals + 2;
+ float_literals := (f, lbl) :: !float_literals;
lbl
-(* Emit all pending constants *)
-
-let emit_constants () =
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .word {emit_symbol s}\n`)
- symbol_constants;
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .double {emit_string s}\n`)
- float_constants;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+(* Label a GOTREL literal *)
+let gotrel_literal l =
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ gotrel_literals := (l, lbl) :: !gotrel_literals;
+ lbl
+
+(* Label a symbol literal *)
+let symbol_literal s =
+ try
+ List.assoc s !symbol_literals
+ with Not_found ->
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ symbol_literals := (s, lbl) :: !symbol_literals;
+ lbl
+
+(* Emit all pending literals *)
+let emit_literals() =
+ if !float_literals <> [] then begin
+ ` .align 3\n`;
+ List.iter
+ (fun (f, lbl) ->
+ `{emit_label lbl}: .double {emit_string f}\n`)
+ !float_literals;
+ float_literals := []
+ end;
+ if !symbol_literals <> [] then begin
+ let offset = if !thumb then 4 else 8 in
+ let suffix = if !pic_code then "(GOT)" else "" in
+ ` .align 2\n`;
+ List.iter
+ (fun (l, lbl) ->
+ `{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`)
+ !gotrel_literals;
+ List.iter
+ (fun (s, lbl) ->
+ `{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`)
+ !symbol_literals;
+ gotrel_literals := [];
+ symbol_literals := []
+ end;
num_literals := 0
+(* Emit code to load the address of a symbol *)
+
+let emit_load_symbol_addr dst s =
+ if !pic_code then begin
+ let lbl_pic = new_label() in
+ let lbl_got = gotrel_literal lbl_pic in
+ let lbl_sym = symbol_literal s in
+ (* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml),
+ so use r12 as temporary scratch register unless the destination is
+ r12, then we use r3 instead. *)
+ let tmp = if dst.loc = Reg 8 (*r12*)
+ then phys_reg 3 (*r3*)
+ else phys_reg 8 (*r12*) in
+ ` ldr {emit_reg tmp}, {emit_label lbl_got}\n`;
+ ` ldr {emit_reg dst}, {emit_label lbl_sym}\n`;
+ `{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`;
+ ` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`;
+ 4
+ end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin
+ ` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`;
+ ` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`;
+ 2
+ end else begin
+ let lbl = symbol_literal s in
+ ` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`;
+ 1
+ end
+
(* Output the assembly code for an instruction *)
let emit_instr i =
@@ -254,40 +352,76 @@ let emit_instr i =
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc = dst.loc then 0 else begin
- match (src, dst) with
- {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} ->
- ` mov {emit_reg dst}, {emit_reg src}\n`; 1
- | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} ->
- ` str {emit_reg src}, {emit_stack dst}\n`; 1
- | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} ->
- ` ldr {emit_reg dst}, {emit_stack src}\n`; 1
+ begin match (src, dst) with
+ {loc = Reg _; typ = Float}, {loc = Reg _} ->
+ ` fcpyd {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _}, {loc = Reg _} ->
+ ` mov {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _; typ = Float}, _ ->
+ ` fstd {emit_reg src}, {emit_stack dst}\n`
+ | {loc = Reg _}, _ ->
+ ` str {emit_reg src}, {emit_stack dst}\n`
+ | {typ = Float}, _ ->
+ ` fldd {emit_reg dst}, {emit_stack src}\n`
| _ ->
- assert false
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
+ end; 1
end
| Lop(Iconst_int n) ->
- emit_intconst i.res.(0) n
- | Lop(Iconst_float s) ->
- let bits = Int64.bits_of_float (float_of_string s) in
- let high_bits = Int64.to_nativeint (Int64.shift_right_logical bits 32)
- and low_bits = Int64.to_nativeint bits in
- if is_immediate low_bits && is_immediate high_bits then begin
- ` mov {emit_reg i.res.(0)}, #{emit_nativeint low_bits} @ {emit_string s}\n`;
- ` mov {emit_reg i.res.(1)}, #{emit_nativeint high_bits}\n`;
- 2
+ emit_intconst i.res.(0) (Nativeint.to_int32 n)
+ | Lop(Iconst_float f) when !fpu = Soft ->
+ ` @ {emit_string f}\n`;
+ let bits = Int64.bits_of_float (float_of_string f) in
+ let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
+ and low_bits = Int64.to_int32 bits in
+ if is_immediate low_bits || is_immediate high_bits then begin
+ let ninstr_low = emit_intconst i.res.(0) low_bits
+ and ninstr_high = emit_intconst i.res.(1) high_bits in
+ ninstr_low + ninstr_high
end else begin
- let lbl = label_constant float_constants s 2 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`;
+ let lbl = float_literal f in
+ ` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
2
end
+ | Lop(Iconst_float f) ->
+ let encode imm =
+ let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
+ let ex = Int64.to_int (Int64.shift_right_logical imm 52) in
+ let ex = (ex land 0x7ff) - 1023 in
+ let mn = Int64.logand imm 0xfffffffffffffL in
+ if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4
+ then
+ None
+ else begin
+ let mn = Int64.to_int (Int64.shift_right_logical mn 48) in
+ if mn land 0x0f <> mn then
+ None
+ else
+ let ex = ((ex + 3) land 0x07) lxor 0x04 in
+ Some((sg lsl 7) lor (ex lsl 4) lor mn)
+ end in
+ begin match encode (Int64.bits_of_float (float_of_string f)) with
+ None ->
+ let lbl = float_literal f in
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
+ | Some imm8 ->
+ ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
+ end; 1
| Lop(Iconst_symbol s) ->
- let lbl = label_constant symbol_constants s 1 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1
+ emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
- ` mov lr, pc\n`;
- `{record_frame i.live} bx {emit_reg i.arg.(0)}\n`; 2
+ if !arch >= ARMv5 then begin
+ ` blx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
+ end else begin
+ ` mov lr, pc\n`;
+ ` bx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 2
+ end
| Lop(Icall_imm s) ->
- `{record_frame i.live} bl {emit_symbol s}\n`; 1
+ ` {emit_call s}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
| Lop(Itailcall_ind) ->
let n = frame_size() in
if !contains_calls then
@@ -303,17 +437,16 @@ let emit_instr i =
if !contains_calls then
` ldr lr, [sp, #{emit_int (n-4)}]\n`;
let ninstr = emit_stack_adjustment "add" n in
- ` b {emit_symbol s}\n`;
+ ` {emit_jump s}\n`;
2 + ninstr
end
- | Lop(Iextcall(s, alloc)) ->
- if alloc then begin
- let lbl = label_constant symbol_constants s 1 in
- ` ldr r12, {emit_label lbl} @ {emit_symbol s}\n`;
- `{record_frame i.live} bl caml_c_call\n`; 2
- end else begin
- ` bl {emit_symbol s}\n`; 1
- end
+ | Lop(Iextcall(s, false)) ->
+ ` {emit_call s}\n`; 1
+ | Lop(Iextcall(s, true)) ->
+ let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in
+ ` {emit_call "caml_c_call"}\n`;
+ `{record_frame i.live i.dbg}\n`;
+ 1 + ninstr
| Lop(Istackoffset n) ->
assert (n mod 8 = 0);
let ninstr =
@@ -322,16 +455,28 @@ let emit_instr i =
else emit_stack_adjustment "add" (-n) in
stack_offset := !stack_offset + n;
ninstr
- | Lop(Iload((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- if i.res.(0).loc <> i.arg.(0).loc then begin
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
- end else begin
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
- end;
- 2
+ | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` flds s14, {emit_addressing addr i.arg 0}\n`;
+ ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use LDM or LDRD if possible *)
+ begin match i.res.(0), i.res.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ if i.res.(0).loc <> i.arg.(0).loc then begin
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
+ end else begin
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
+ end; 2
+ end
| Lop(Iload(size, addr)) ->
let r = i.res.(0) in
let instr =
@@ -340,65 +485,114 @@ let emit_instr i =
| Byte_signed -> "ldrsb"
| Sixteen_unsigned -> "ldrh"
| Sixteen_signed -> "ldrsh"
+ | Double
+ | Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
- 1
- | Lop(Istore((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
- ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`;
- 2
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
+ | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
+ ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
+ | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use STM or STRD if possible *)
+ begin match i.arg.(0), i.arg.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
+ ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
+ end
| Lop(Istore(size, addr)) ->
let r = i.arg.(0) in
let instr =
match size with
- Byte_unsigned | Byte_signed -> "strb"
- | Sixteen_unsigned | Sixteen_signed -> "strh"
+ Byte_unsigned
+ | Byte_signed -> "strb"
+ | Sixteen_unsigned
+ | Sixteen_signed -> "strh"
+ | Double
+ | Double_u -> "fstd"
| _ (* 32-bit quantities *) -> "str" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`;
- 1
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1
| Lop(Ialloc n) ->
+ let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- ` sub alloc_ptr, alloc_ptr, r12\n`;
+ let lbl_redo = new_label() in
+ `{emit_label lbl_redo}:`;
+ let ninstr = decompose_intconst
+ (Int32.of_int n)
+ (fun i ->
+ ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
` cmp alloc_ptr, alloc_limit\n`;
- `{record_frame i.live} blcc caml_call_gc\n`;
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 4 + ni
- end else if n = 8 || n = 12 || n = 16 then begin
- `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2
+ let lbl_call_gc = new_label() in
+ ` bcc {emit_label lbl_call_gc}\n`;
+ call_gc_sites :=
+ { gc_lbl = lbl_call_gc;
+ gc_return_lbl = lbl_redo;
+ gc_frame_lbl = lbl_frame } :: !call_gc_sites;
+ 3 + ninstr
end else begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- `{record_frame i.live} bl caml_allocN\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 2 + ni
+ let ninstr =
+ begin match n with
+ 8 -> ` {emit_call "caml_alloc1"}\n`; 1
+ | 12 -> ` {emit_call "caml_alloc2"}\n`; 1
+ | 16 -> ` {emit_call "caml_alloc3"}\n`; 1
+ | _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in
+ ` {emit_call "caml_allocN"}\n`; 1 + ninstr
+ end in
+ `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
+ 1 + ninstr
end
| Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop(Icomp cmp)) ->
- let comp = name_for_comparison cmp in
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop(Icheckbound)) ->
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop Icheckbound) ->
+ let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Iintop_imm(Icheckbound, n)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Ispecific(Ishiftcheckbound shift)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
+ ` bcs {emit_label lbl}\n`; 2
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let r = i.res.(0) in
` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
- if n <= 256 then
+ if n <= 256 then begin
+ ` it lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
- else begin
+ end else begin
+ ` itt lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
` sublt {emit_reg r}, {emit_reg r}, #1\n`
end;
- ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4
+ ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
| Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let a = i.arg.(0) in
@@ -409,40 +603,71 @@ let emit_instr i =
` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
` bpl {emit_label lbl}\n`;
` cmp {emit_reg r}, #0\n`;
+ ` it ne\n`;
` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
- `{emit_label lbl}:\n`; 6
+ `{emit_label lbl}:\n`; 7
| Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1
- | Lop(Iintop_imm(Icomp cmp, n)) ->
- let comp = name_for_comparison cmp in
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop_imm(Icheckbound, n)) ->
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
- | Lop(Inegf) -> (* argument and result in (r0, r1) *)
- ` eor r1, r1, #0x80000000\n`; 1
- | Lop(Iabsf) -> (* argument and result in (r0, r1) *)
- ` bic r1, r1, #0x80000000\n`; 1
- | Lop(Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf) ->
- assert false
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Iabsf | Inegf as op) when !fpu = Soft ->
+ let instr = (match op with
+ Iabsf -> "bic"
+ | Inegf -> "eor"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1
+ | Lop(Iabsf | Inegf | Ispecific Isqrtf as op) ->
+ let instr = (match op with
+ Iabsf -> "fabsd"
+ | Inegf -> "fnegd"
+ | Ispecific Isqrtf -> "fsqrtd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
+ | Lop(Ifloatofint) ->
+ ` fmsr s14, {emit_reg i.arg.(0)}\n`;
+ ` fsitod {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iintoffloat) ->
+ ` ftosizd s14, {emit_reg i.arg.(0)}\n`;
+ ` fmrs {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
+ let instr = (match op with
+ Iaddf -> "faddd"
+ | Isubf -> "fsubd"
+ | Imulf -> "fmuld"
+ | Idivf -> "fdivd"
+ | Ispecific Inegmulf -> "fnmuld"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ 1
+ | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
+ let instr = (match op with
+ Imuladdf -> "fmacd"
+ | Inegmuladdf -> "fnmacd"
+ | Imulsubf -> "fmscd"
+ | Inegmulsubf -> "fnmscd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`;
+ 1
| Lop(Ispecific(Ishiftarith(op, shift))) ->
- let instr = name_for_shift_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
+ let instr = (match op with
+ Ishiftadd -> "add"
+ | Ishiftsub -> "sub"
+ | Ishiftsubrev -> "rsb") in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
if shift >= 0
then `, lsl #{emit_int shift}\n`
else `, asr #{emit_int (-shift)}\n`;
1
- | Lop(Ispecific(Ishiftcheckbound shift)) ->
- ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
- ` blcs caml_ml_array_bound_error\n`; 2
| Lop(Ispecific(Irevsubimm n)) ->
` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Ispecific(Imuladd | Imulsub as op)) ->
+ let instr = (match op with
+ Imuladd -> "mla"
+ | Imulsub -> "mls"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
| Lreloadretaddr ->
let n = frame_size() in
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
@@ -458,29 +683,41 @@ let emit_instr i =
begin match tst with
Itruetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ifalsetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` beq {emit_label lbl}\n`
+ ` beq {emit_label lbl}\n`; 2
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Ifloattest(cmp, neg) ->
- assert false
+ let comp = (match (cmp, neg) with
+ (Ceq, false) | (Cne, true) -> "eq"
+ | (Cne, false) | (Ceq, true) -> "ne"
+ | (Clt, false) -> "cc"
+ | (Clt, true) -> "cs"
+ | (Cle, false) -> "ls"
+ | (Cle, true) -> "hi"
+ | (Cgt, false) -> "gt"
+ | (Cgt, true) -> "le"
+ | (Cge, false) -> "ge"
+ | (Cge, true) -> "lt") in
+ ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ ` fmstat\n`;
+ ` b{emit_string comp} {emit_label lbl}\n`; 3
| Ioddtest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ieventest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` beq {emit_label lbl}\n`
- end;
- 2
- | Lcondbranch3(lbl0, lbl1, lbl2) ->
+ ` beq {emit_label lbl}\n`; 2
+ end
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
@@ -495,108 +732,144 @@ let emit_instr i =
| Some lbl -> ` bgt {emit_label lbl}\n`
end;
4
- | Lswitch jumptbl ->
- ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
- ` mov r0, r0\n`; (* nop *)
- for i = 0 to Array.length jumptbl - 1 do
- ` .word {emit_label jumptbl.(i)}\n`
- done;
+ | Lswitch jumptbl ->
+ if !arch > ARMv6 && !thumb then begin
+ (* The Thumb-2 TBH instruction supports only forward branches,
+ so we need to generate appropriate trampolines for all labels
+ that appear before this switch instruction (PR#5623) *)
+ let tramtbl = Array.copy jumptbl in
+ ` tbh [pc, {emit_reg i.arg.(0)}, lsl #1]\n`;
+ for j = 0 to Array.length tramtbl - 1 do
+ let rec label i =
+ match i.desc with
+ Lend -> new_label()
+ | Llabel lbl when lbl = tramtbl.(j) -> lbl
+ | _ -> label i.next in
+ tramtbl.(j) <- label i.next;
+ ` .short ({emit_label tramtbl.(j)}-.)/2+{emit_int j}\n`
+ done;
+ (* Generate the necessary trampolines *)
+ for j = 0 to Array.length tramtbl - 1 do
+ if tramtbl.(j) <> jumptbl.(j) then
+ `{emit_label tramtbl.(j)}: b {emit_label jumptbl.(j)}\n`
+ done
+ end else if not !pic_code then begin
+ ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
+ ` nop\n`;
+ for j = 0 to Array.length jumptbl - 1 do
+ ` .word {emit_label jumptbl.(j)}\n`
+ done
+ end else begin
+ (* Slightly slower, but position-independent *)
+ ` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`;
+ ` nop\n`;
+ for j = 0 to Array.length jumptbl - 1 do
+ ` b {emit_label jumptbl.(j)}\n`
+ done
+ end;
2 + Array.length jumptbl
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`; 1
| Lpushtrap ->
stack_offset := !stack_offset + 8;
- ` stmfd sp!, \{trap_ptr, lr}\n`;
+ ` push \{trap_ptr, lr}\n`;
` mov trap_ptr, sp\n`; 2
| Lpoptrap ->
- ` ldmfd sp!, \{trap_ptr, lr}\n`;
+ ` pop \{trap_ptr, lr}\n`;
stack_offset := !stack_offset - 8; 1
| Lraise ->
- ` mov sp, trap_ptr\n`;
- ` ldmfd sp!, \{trap_ptr, pc}\n`; 2
+ if !Clflags.debug then begin
+ ` {emit_call "caml_raise_exn"}\n`;
+ `{record_frame Reg.Set.empty i.dbg}\n`; 1
+ end else begin
+ ` mov sp, trap_ptr\n`;
+ ` pop \{trap_ptr, pc}\n`; 2
+ end
(* Emission of an instruction sequence *)
-let no_fallthrough = function
- Lop(Itailcall_ind | Itailcall_imm _) -> true
- | Lreturn -> true
- | Lbranch _ -> true
- | Lswitch _ -> true
- | Lraise -> true
- | _ -> false
-
let rec emit_all ninstr i =
if i.desc = Lend then () else begin
let n = emit_instr i in
let ninstr' = ninstr + n in
- let limit = 511 - !num_literals in
- if ninstr' >= limit - 64 && no_fallthrough i.desc then begin
- emit_constants();
+ (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
+ let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
+ then 127
+ else 511) in
+ let limit = limit - !num_literals in
+ if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin
+ emit_literals();
emit_all 0 i.next
- end else
- if ninstr' >= limit then begin
+ end else if !num_literals != 0 && ninstr' >= limit then begin
let lbl = new_label() in
` b {emit_label lbl}\n`;
- emit_constants();
+ emit_literals();
`{emit_label lbl}:\n`;
emit_all 0 i.next
end else
emit_all ninstr' i.next
end
+(* Emission of the profiling prelude *)
+
+let emit_profile() =
+ match Config.system with
+ "linux_eabi" | "linux_eabihf" ->
+ ` push \{lr}\n`;
+ ` {emit_call "__gnu_mcount_nc"}\n`
+ | _ -> ()
+
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
+ float_literals := [];
+ gotrel_literals := [];
+ symbol_literals := [];
stack_offset := 0;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+ call_gc_sites := [];
+ bound_error_sites := [];
` .text\n`;
` .align 2\n`;
- ` .global {emit_symbol fundecl.fun_name}\n`;
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
+ if !arch > ARMv6 && !thumb then
+ ` .thumb\n`
+ else
+ ` .arm\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
+ if !Clflags.gprofile then emit_profile();
let n = frame_size() in
ignore(emit_stack_adjustment "sub" n);
if !contains_calls then
` str lr, [sp, #{emit_int(n - 4)}]\n`;
`{emit_label !tailrec_entry_point}:\n`;
emit_all 0 fundecl.fun_body;
- emit_constants()
+ emit_literals();
+ List.iter emit_call_gc !call_gc_sites;
+ List.iter emit_call_bound_error !bound_error_sites;
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
+ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`
(* Emission of data *)
let emit_item = function
- Cglobal_symbol s ->
- ` .global {emit_symbol s}\n`;
- | Cdefine_symbol s ->
- `{emit_symbol s}:\n`
- | Cdefine_label lbl ->
- `{emit_label (100000 + lbl)}:\n`
- | Cint8 n ->
- ` .byte {emit_int n}\n`
- | Cint16 n ->
- ` .short {emit_int n}\n`
- | Cint32 n ->
- ` .word {emit_nativeint n}\n`
- | Cint n ->
- ` .word {emit_nativeint n}\n`
- | Csingle f ->
- emit_float32_directive ".long" f
- | Cdouble f ->
- emit_float64_split_directive ".long" f
- | Csymbol_address s ->
- ` .word {emit_symbol s}\n`
- | Clabel_address lbl ->
- ` .word {emit_label (100000 + lbl)}\n`
- | Cstring s ->
- emit_string_directive " .ascii " s
- | Cskip n ->
- if n > 0 then ` .space {emit_int n}\n`
- | Calign n ->
- ` .align {emit_int(Misc.log2 n)}\n`
+ Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
+ | Cdefine_symbol s -> `{emit_symbol s}:\n`
+ | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
+ | Cint8 n -> ` .byte {emit_int n}\n`
+ | Cint16 n -> ` .short {emit_int n}\n`
+ | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Csingle f -> ` .single {emit_string f}\n`
+ | Cdouble f -> ` .double {emit_string f}\n`
+ | Csymbol_address s -> ` .word {emit_symbol s}\n`
+ | Clabel_address lbl -> ` .word {emit_data_label lbl}\n`
+ | Cstring s -> emit_string_directive " .ascii " s
+ | Cskip n -> if n > 0 then ` .space {emit_int n}\n`
+ | Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
@@ -605,32 +878,62 @@ let data l =
(* Beginning / end of an assembly file *)
let begin_assembly() =
- `trap_ptr .req r11\n`;
- `alloc_ptr .req r8\n`;
- `alloc_limit .req r10\n`;
+ ` .syntax unified\n`;
+ begin match !arch with
+ | ARMv4 -> ` .arch armv4t\n`
+ | ARMv5 -> ` .arch armv5t\n`
+ | ARMv5TE -> ` .arch armv5te\n`
+ | ARMv6 -> ` .arch armv6\n`
+ | ARMv6T2 -> ` .arch armv6t2\n`
+ | ARMv7 -> ` .arch armv7-a\n`
+ end;
+ begin match !fpu with
+ Soft -> ` .fpu softvfp\n`
+ | VFPv3_D16 -> ` .fpu vfpv3-d16\n`
+ | VFPv3 -> ` .fpu vfpv3\n`
+ end;
+ `trap_ptr .req r8\n`;
+ `alloc_ptr .req r10\n`;
+ `alloc_limit .req r11\n`;
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
- ` .word 0\n`;
+ ` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
- ` .data\n`;
- ` .global {emit_symbol lbl}\n`;
+ ` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
- ` .word {emit_int (List.length !frame_descriptors)}\n`;
- List.iter emit_frame !frame_descriptors;
- frame_descriptors := []
+ emit_frames
+ { efa_label = (fun lbl ->
+ ` .type {emit_label lbl}, %function\n`;
+ ` .word {emit_label lbl}\n`);
+ efa_16 = (fun n -> ` .short {emit_int n}\n`);
+ efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
+ efa_word = (fun n -> ` .word {emit_int n}\n`);
+ efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
+ efa_label_rel = (fun lbl ofs ->
+ ` .word {emit_label lbl} - . + {emit_int32 ofs}\n`);
+ efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
+ efa_string = (fun s -> emit_string_directive " .asciz " s) };
+ ` .type {emit_symbol lbl}, %object\n`;
+ ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
+ begin match Config.system with
+ "linux_eabihf" | "linux_eabi" ->
+ (* Mark stack as non-executable *)
+ ` .section .note.GNU-stack,\"\",%progbits\n`
+ | _ -> ()
+ end
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index e56ac6e..aed2b01 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -1,16 +1,17 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: proc.ml 9252 2009-05-04 13:46:46Z xleroy $ *)
+(* $Id$ *)
(* Description of the ARM processor *)
@@ -26,32 +27,56 @@ let word_addressed = false
(* Registers available for register allocation *)
-(* Register map:
- r0 - r3 general purpose (not preserved by C)
- r4 - r7 general purpose (preserved)
- r8 allocation pointer (preserved)
- r9 platform register, usually reserved
- r10 allocation limit (preserved)
- r11 trap pointer (preserved)
- r12 general purpose (not preserved by C)
- r13 stack pointer
- r14 return address
- r15 program counter
+(* Integer register map:
+ r0 - r3 general purpose (not preserved)
+ r4 - r7 general purpose (preserved)
+ r8 trap pointer (preserved)
+ r9 platform register, usually reserved
+ r10 allocation pointer (preserved)
+ r11 allocation limit (preserved)
+ r12 intra-procedural scratch register (not preserved)
+ r13 stack pointer
+ r14 return address
+ r15 program counter
+ Floatinng-point register map (VFPv3):
+ d0 - d7 general purpose (not preserved)
+ d8 - d15 general purpose (preserved)
+ d16 - d31 generat purpose (not preserved), VFPv3 only
*)
-let int_reg_name = [|
- "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12"
-|]
+let int_reg_name =
+ [| "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12" |]
+
+let float_reg_name =
+ [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
+ "d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
+ "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
+ "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
+
+(* We have three register classes:
+ 0 for integer registers
+ 1 for VFPv3-D16
+ 2 for VFPv3
+ This way we can choose between VFPv3-D16 and VFPv3
+ at (ocamlopt) runtime using command line switches.
+*)
-let num_register_classes = 1
+let num_register_classes = 3
-let register_class r = assert (r.typ <> Float); 0
+let register_class r =
+ match (r.typ, !fpu) with
+ (Int | Addr), _ -> 0
+ | Float, VFPv3_D16 -> 1
+ | Float, _ -> 2
-let num_available_registers = [| 9 |]
+let num_available_registers =
+ [| 9; 16; 32 |]
-let first_available_register = [| 0 |]
+let first_available_register =
+ [| 0; 100; 100 |]
-let register_name r = int_reg_name.(r)
+let register_name r =
+ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
let rotate_registers = true
@@ -59,25 +84,34 @@ let rotate_registers = true
let hard_int_reg =
let v = Array.create 9 Reg.dummy in
- for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done;
+ for i = 0 to 8 do
+ v.(i) <- Reg.at_location Int (Reg i)
+ done;
+ v
+
+let hard_float_reg =
+ let v = Array.create 32 Reg.dummy in
+ for i = 0 to 31 do
+ v.(i) <- Reg.at_location Float (Reg(100 + i))
+ done;
v
-let all_phys_regs = hard_int_reg
+let all_phys_regs =
+ Array.append hard_int_reg hard_float_reg
-let phys_reg n = all_phys_regs.(n)
+let phys_reg n =
+ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
let stack_slot slot ty =
- assert (ty <> Float);
Reg.at_location ty (Stack slot)
(* Calling conventions *)
-(* XXX float types have already been expanded into pairs of integers.
- So we cannot align these floats. See if that causes a problem. *)
-
-let calling_conventions first_int last_int make_stack arg =
+let calling_conventions
+ first_int last_int first_float last_float make_stack arg =
let loc = Array.create (Array.length arg) Reg.dummy in
let int = ref first_int in
+ let float = ref first_float in
let ofs = ref 0 in
for i = 0 to Array.length arg - 1 do
match arg.(i).typ with
@@ -90,37 +124,86 @@ let calling_conventions first_int last_int make_stack arg =
ofs := !ofs + size_int
end
| Float ->
- assert false
+ assert (abi = EABI_VFP);
+ assert (!fpu >= VFPv3_D16);
+ if !float <= last_float then begin
+ loc.(i) <- phys_reg !float;
+ incr float
+ end else begin
+ ofs := Misc.align !ofs size_float;
+ loc.(i) <- stack_slot (make_stack !ofs) Float;
+ ofs := !ofs + size_float
+ end
done;
- (loc, Misc.align !ofs 8)
+ (loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
+(* OCaml calling convention:
+ first integer args in r0...r7
+ first float args in d0...d15 (EABI+VFP)
+ remaining args on stack.
+ Return values in r0...r7 or d0...d15. *)
+
let loc_arguments arg =
- calling_conventions 0 7 outgoing arg
+ calling_conventions 0 7 100 115 outgoing arg
let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 7 incoming arg in loc
+ let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
let loc_results res =
- let (loc, ofs) = calling_conventions 0 7 not_supported res in loc
+ let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
+
+(* C calling convention:
+ first integer args in r0...r3
+ first float args in d0...d7 (EABI+VFP)
+ remaining args on stack.
+ Return values in r0...r1 or d0. *)
let loc_external_arguments arg =
- calling_conventions 0 3 outgoing arg
+ calling_conventions 0 3 100 107 outgoing arg
let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 1 not_supported res in loc
+ let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
let loc_exn_bucket = phys_reg 0
(* Registers destroyed by operations *)
-let destroyed_at_c_call = (* r4-r7 preserved *)
- Array.of_list(List.map phys_reg [0;1;2;3;8])
+let destroyed_at_alloc = (* r0-r6, d0-d15 preserved *)
+ Array.of_list (List.map
+ phys_reg
+ [7;8;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131])
+
+let destroyed_at_c_call =
+ Array.of_list (List.map
+ phys_reg
+ (match abi with
+ EABI -> (* r4-r7 preserved *)
+ [0;1;2;3;8;
+ 100;101;102;103;104;105;106;107;
+ 108;109;110;111;112;113;114;115;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131]
+ | EABI_VFP -> (* r4-r7, d8-d15 preserved *)
+ [0;1;2;3;8;
+ 100;101;102;103;104;105;106;107;
+ 116;116;118;119;120;121;122;123;
+ 124;125;126;127;128;129;130;131]))
let destroyed_at_oper = function
- Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
- | Iop(Iextcall(_, false)) -> destroyed_at_c_call
- | Iop(Ialloc(_)) -> [|phys_reg 8|] (* r12 destroyed *)
+ Iop(Icall_ind | Icall_imm _ )
+ | Iop(Iextcall(_, true)) ->
+ all_phys_regs
+ | Iop(Iextcall(_, false)) ->
+ destroyed_at_c_call
+ | Iop(Ialloc n) ->
+ destroyed_at_alloc
+ | Iop(Iconst_symbol _) when !pic_code ->
+ [|phys_reg 3; phys_reg 8|] (* r3 and r12 destroyed *)
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
+ [|phys_reg 107|] (* d7 (s14-s15) destroyed *)
| _ -> [||]
let destroyed_at_raise = all_phys_regs
@@ -128,15 +211,16 @@ let destroyed_at_raise = all_phys_regs
(* Maximal register pressure *)
let safe_register_pressure = function
- Iextcall(_, _) -> 4
+ Iextcall(_, _) -> 5
| _ -> 9
+
let max_register_pressure = function
- Iextcall(_, _) -> [| 4 |]
- | _ -> [| 9 |]
+ Iextcall(_, _) -> [| 5; 9; 9 |]
+ | _ -> [| 9; 16; 32 |]
(* Layout of the stack *)
-let num_stack_slots = [| 0 |]
+let num_stack_slots = [| 0; 0; 0 |]
let contains_calls = ref false
(* Calling the assembler *)
@@ -144,6 +228,3 @@ let contains_calls = ref false
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " -o " ^
Filename.quote outfile ^ " " ^ Filename.quote infile)
-
-open Clflags;;
-open Config;;
diff --git a/asmcomp/arm/reload.ml b/asmcomp/arm/reload.ml
index 0917438..c5b137a 100644
--- a/asmcomp/arm/reload.ml
+++ b/asmcomp/arm/reload.ml
@@ -1,6 +1,6 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
@@ -10,7 +10,7 @@
(* *)
(***********************************************************************)
-(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
+(* $Id$ *)
(* Reloading for the ARM *)
diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml
index 930e1bc..4b47733 100644
--- a/asmcomp/arm/scheduling.ml
+++ b/asmcomp/arm/scheduling.ml
@@ -1,51 +1,79 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: scheduling.ml 2553 1999-11-17 18:59:06Z xleroy $ *)
+(* $Id$ *)
+open Arch
open Mach
-(* Instruction scheduling for the Sparc *)
+(* Instruction scheduling for the ARM *)
-class scheduler = object
+class scheduler = object(self)
-inherit Schedgen.scheduler_generic
+inherit Schedgen.scheduler_generic as super
-(* Scheduling -- based roughly on the Strong ARM *)
+(* Scheduling -- based roughly on the ARM11 (ARMv6) *)
method oper_latency = function
- Ireload -> 2
- | Iload(_, _) -> 2
- | Iconst_symbol _ -> 2 (* turned into a load *)
- | Iconst_float _ -> 2 (* turned into a load *)
- | Iintop(Imul) -> 3
- | Iintop_imm(Imul, _) -> 3
- (* No data available for floatops, let's make educated guesses *)
- | Iaddf -> 3
- | Isubf -> 3
- | Imulf -> 5
- | Idivf -> 15
+ (* Loads have a latency of two cycles in general *)
+ Iconst_symbol _
+ | Iconst_float _
+ | Iload(_, _)
+ | Ireload
+ | Ifloatofint (* mcr/mrc count as memory access *)
+ | Iintoffloat -> 2
+ (* Multiplys have a latency of two cycles *)
+ | Iintop Imul
+ | Ispecific(Imuladd | Imulsub) -> 2
+ (* VFP instructions *)
+ | Iaddf
+ | Isubf
+ | Idivf
+ | Imulf | Ispecific Inegmulf
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)
+ | Ispecific Isqrtf
+ | Inegf | Iabsf when !fpu >= VFPv3_D16 -> 2
+ (* Everything else *)
| _ -> 1
-(* Issue cycles. Rough approximations *)
+method! is_checkbound = function
+ Ispecific(Ishiftcheckbound _) -> true
+ | op -> super#is_checkbound op
+
+(* Issue cycles. Rough approximations *)
method oper_issue_cycles = function
Ialloc _ -> 4
- | Iintop(Icomp _) -> 3
- | Iintop(Icheckbound) -> 2
- | Iintop_imm(Idiv, _) -> 4
- | Iintop_imm(Imod, _) -> 6
+ | Iintop(Ilsl | Ilsr | Iasr) -> 2
+ | Iintop(Icomp _)
| Iintop_imm(Icomp _, _) -> 3
+ | Iintop(Icheckbound)
| Iintop_imm(Icheckbound, _) -> 2
+ | Ispecific(Ishiftcheckbound _) -> 3
+ | Iintop_imm(Idiv, _) -> 4
+ | Iintop_imm(Imod, _) -> 6
+ | Iintop Imul
+ | Ispecific(Imuladd | Imulsub) -> 2
+ (* VFP instructions *)
+ | Iaddf
+ | Isubf -> 7
+ | Imulf
+ | Ispecific Inegmulf -> 9
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) -> 17
+ | Idivf
+ | Ispecific Isqrtf -> 27
+ | Inegf | Iabsf | Iconst_float _ when !fpu >= VFPv3_D16 -> 4
+ (* Everything else *)
| _ -> 1
end
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
index f09d146..94d0367 100644
--- a/asmcomp/arm/selection.ml
+++ b/asmcomp/arm/selection.ml
@@ -1,54 +1,77 @@
(***********************************************************************)
(* *)
-(* Objective Caml *)
+(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
-(* $Id: selection.ml 10295 2010-04-22 12:39:40Z xleroy $ *)
+(* $Id$ *)
(* Instruction selection for the ARM processor *)
-open Misc
-open Cmm
-open Reg
open Arch
-open Proc
+open Cmm
open Mach
+open Misc
+open Proc
+open Reg
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
- right by 0, 2, 4, ... 30 bits.
- To avoid problems with Caml's 31-bit arithmetic,
- we check only with 8-bit values shifted left 0 to 22 bits. *)
-
-let rec is_immed n shift =
- if shift > 22 then false
- else if n land (0xFF lsl shift) = n then true
- else is_immed n (shift + 2)
+let is_offset chunk n =
+ match chunk with
+ (* VFPv3 load/store have -1020 to 1020 *)
+ Single | Double | Double_u
+ when !fpu >= VFPv3_D16 ->
+ n >= -1020 && n <= 1020
+ (* ARM load/store byte/word have -4095 to 4095 *)
+ | Byte_unsigned | Byte_signed
+ | Thirtytwo_unsigned | Thirtytwo_signed
+ | Word | Single
+ when not !thumb ->
+ n >= -4095 && n <= 4095
+ (* Thumb-2 load/store have -255 to 4095 *)
+ | _ when !arch > ARMv6 && !thumb ->
+ n >= -255 && n <= 4095
+ (* Everything else has -255 to 255 *)
+ | _ ->
+ n >= -255 && n <= 255
-(* We have 12-bit + sign byte offsets for word accesses,
- 8-bit + sign word offsets for float accesses,
- and 8-bit + sign byte offsets for bytes and shorts.
- Use lowest common denominator. *)
+let is_intconst = function
+ Cconst_int _ -> true
+ | _ -> false
-let is_offset n = n < 256 && n > -256
+(* Special constraints on operand and result registers *)
-let is_intconst = function Cconst_int n -> true | _ -> false
+exception Use_default
-(* Soft emulation of float comparisons *)
+let r1 = phys_reg 1
-let float_comparison_function = function
- | Ceq -> "__eqdf2"
- | Cne -> "__nedf2"
- | Clt -> "__ltdf2"
- | Cle -> "__ledf2"
- | Cgt -> "__gtdf2"
- | Cge -> "__gedf2"
+let pseudoregs_for_operation op arg res =
+ match op with
+ (* For mul rd,rm,rs and mla rd,rm,rs,ra (pre-ARMv6) the registers rm
+ and rd must be different. We deal with this by pretending that rm
+ is also a result of the mul / mla operation. *)
+ Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
+ (arg, [| res.(0); arg.(0) |])
+ (* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
+ | Iabsf | Inegf when !fpu = Soft ->
+ ([|res.(0); arg.(1)|], res)
+ (* VFPv3 Imuladdf...Inegmulsubf: arg.(0) and res.(0) must be the same *)
+ | Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf) ->
+ let arg' = Array.copy arg in
+ arg'.(0) <- res.(0);
+ (arg', res)
+ (* We use __aeabi_idivmod for Cmodi only, and hence we care only
+ for the remainder in r1, so fix up the destination register. *)
+ | Iextcall("__aeabi_idivmod", false) ->
+ (arg, [|r1|])
+ (* Other instructions are regular *)
+ | _ -> raise Use_default
(* Instruction selection *)
class selector = object(self)
@@ -56,23 +79,32 @@ class selector = object(self)
inherit Selectgen.selector_generic as super
method! regs_for tyv =
- (* Expand floats into pairs of integer registers *)
- let nty = Array.length tyv in
- let rec expand i =
- if i >= nty then [] else begin
- match tyv.(i) with
- | Float -> Int :: Int :: expand (i+1)
- | ty -> ty :: expand (i+1)
- end in
- Reg.createv (Array.of_list (expand 0))
+ Reg.createv (if !fpu = Soft then begin
+ (* Expand floats into pairs of integer registers *)
+ let rec expand = function
+ [] -> []
+ | Float :: tyl -> Int :: Int :: expand tyl
+ | ty :: tyl -> ty :: expand tyl in
+ Array.of_list (expand (Array.to_list tyv))
+ end else begin
+ tyv
+ end)
method is_immediate n =
- n land 0xFF = n || is_immed n 2
+ is_immediate (Int32.of_int n)
+
+method! is_simple_expr = function
+ (* inlined floating-point ops are simple if their arguments are *)
+ | Cop(Cextcall("sqrt", _, _, _), args) when !fpu >= VFPv3_D16 ->
+ List.for_all self#is_simple_expr args
+ | e -> super#is_simple_expr e
-method select_addressing = function
- Cop(Cadda, [arg; Cconst_int n]) when is_offset n ->
+method select_addressing chunk = function
+ | Cop(Cadda, [arg; Cconst_int n])
+ when is_offset chunk n ->
(Iindexed n, arg)
- | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])]) when is_offset n ->
+ | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
+ when is_offset chunk n ->
(Iindexed n, Cop(Cadda, [arg1; arg2]))
| arg ->
(Iindexed 0, arg)
@@ -91,109 +123,146 @@ method select_shift_arith op shiftop shiftrevop args =
| [Cop(Casr, [arg1; Cconst_int n]); arg2]
when n > 0 && n < 32 && not(is_intconst arg1) ->
(Ispecific(Ishiftarith(shiftrevop, -n)), [arg2; arg1])
- | _ ->
- super#select_operation op args
+ | args ->
+ begin match super#select_operation op args with
+ (* Recognize multiply and add *)
+ (Iintop Iadd, [Cop(Cmuli, args); arg3])
+ | (Iintop Iadd, [arg3; Cop(Cmuli, args)]) as op_args ->
+ begin match self#select_operation Cmuli args with
+ (Iintop Imul, [arg1; arg2]) ->
+ (Ispecific Imuladd, [arg1; arg2; arg3])
+ | _ -> op_args
+ end
+ (* Recognize multiply and subtract *)
+ | (Iintop Isub, [arg3; Cop(Cmuli, args)]) as op_args
+ when !arch > ARMv6 ->
+ begin match self#select_operation Cmuli args with
+ (Iintop Imul, [arg1; arg2]) ->
+ (Ispecific Imulsub, [arg1; arg2; arg3])
+ | _ -> op_args
+ end
+ | op_args -> op_args
+ end
method! select_operation op args =
- match op with
- Cadda | Caddi ->
- begin match args with
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
- (Iintop_imm(Isub, -n), [arg1])
- | _ ->
- self#select_shift_arith op Ishiftadd Ishiftadd args
- end
- | Csuba | Csubi ->
- begin match args with
- [arg1; Cconst_int n] when n < 0 && self#is_immediate (-n) ->
- (Iintop_imm(Iadd, -n), [arg1])
- | [Cconst_int n; arg2] when self#is_immediate n ->
- (Ispecific(Irevsubimm n), [arg2])
- | _ ->
- self#select_shift_arith op Ishiftsub Ishiftsubrev args
- end
- | Cmuli -> (* no multiply immediate *)
+ match (op, args) with
+ (* Recognize special shift arithmetic *)
+ ((Cadda | Caddi), [arg; Cconst_int n])
+ when n < 0 && self#is_immediate (-n) ->
+ (Iintop_imm(Isub, -n), [arg])
+ | ((Cadda | Caddi as op), args) ->
+ self#select_shift_arith op Ishiftadd Ishiftadd args
+ | ((Csuba | Csubi), [arg; Cconst_int n])
+ when n < 0 && self#is_immediate (-n) ->
+ (Iintop_imm(Iadd, -n), [arg])
+ | ((Csuba | Csubi), [Cconst_int n; arg])
+ when self#is_immediate n ->
+ (Ispecific(Irevsubimm n), [arg])
+ | ((Csuba | Csubi as op), args) ->
+ self#select_shift_arith op Ishiftsub Ishiftsubrev args
+ | (Ccheckbound _, [Cop(Clsr, [arg1; Cconst_int n]); arg2])
+ when n > 0 && n < 32 && not(is_intconst arg2) ->
+ (Ispecific(Ishiftcheckbound n), [arg1; arg2])
+ (* ARM does not support immediate operands for multiplication *)
+ | (Cmuli, args) ->
(Iintop Imul, args)
- | Cdivi ->
- begin match args with
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Idiv, n), [arg1])
- | _ ->
- (Iextcall("__divsi3", false), args)
- end
- | Cmodi ->
- begin match args with
- [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Imod, n), [arg1])
- | _ ->
- (Iextcall("__modsi3", false), args)
- end
- | Ccheckbound _ ->
- begin match args with
- [Cop(Clsr, [arg1; Cconst_int n]); arg2]
- when n > 0 && n < 32 && not(is_intconst arg2) ->
- (Ispecific(Ishiftcheckbound n), [arg1; arg2])
- | _ ->
- super#select_operation op args
- end
- (* Turn floating-point operations into library function calls *)
- | Caddf -> (Iextcall("__adddf3", false), args)
- | Csubf -> (Iextcall("__subdf3", false), args)
- | Cmulf -> (Iextcall("__muldf3", false), args)
- | Cdivf -> (Iextcall("__divdf3", false), args)
- | Cfloatofint -> (Iextcall("__floatsidf", false), args)
- | Cintoffloat -> (Iextcall("__fixdfsi", false), args)
- | Ccmpf comp ->
- (Iintop_imm(Icomp(Isigned comp), 0),
- [Cop(Cextcall(float_comparison_function comp,
- typ_int, false, Debuginfo.none),
- args)])
+ (* Turn integer division/modulus into runtime ABI calls *)
+ | (Cdivi, [arg; Cconst_int n])
+ when n = 1 lsl Misc.log2 n ->
+ (Iintop_imm(Idiv, n), [arg])
+ | (Cdivi, args) ->
+ (Iextcall("__aeabi_idiv", false), args)
+ | (Cmodi, [arg; Cconst_int n])
+ when n = 1 lsl Misc.log2 n ->
+ (Iintop_imm(Imod, n), [arg])
+ | (Cmodi, args) ->
+ (* See above for fix up of return register *)
+ (Iextcall("__aeabi_idivmod", false), args)
+ (* Turn floating-point operations into runtime ABI calls for softfp *)
+ | (op, args) when !fpu = Soft -> self#select_operation_softfp op args
+ (* Select operations for VFPv3 *)
+ | (op, args) -> self#select_operation_vfpv3 op args
+
+method private select_operation_softfp op args =
+ match (op, args) with
+ (* Turn floating-point operations into runtime ABI calls *)
+ | (Caddf, args) -> (Iextcall("__aeabi_dadd", false), args)
+ | (Csubf, args) -> (Iextcall("__aeabi_dsub", false), args)
+ | (Cmulf, args) -> (Iextcall("__aeabi_dmul", false), args)
+ | (Cdivf, args) -> (Iextcall("__aeabi_ddiv", false), args)
+ | (Cfloatofint, args) -> (Iextcall("__aeabi_i2d", false), args)
+ | (Cintoffloat, args) -> (Iextcall("__aeabi_d2iz", false), args)
+ | (Ccmpf comp, args) ->
+ let func = (match comp with
+ Cne (* there's no __aeabi_dcmpne *)
+ | Ceq -> "__aeabi_dcmpeq"
+ | Clt -> "__aeabi_dcmplt"
+ | Cle -> "__aeabi_dcmple"
+ | Cgt -> "__aeabi_dcmpgt"
+ | Cge -> "__aeabi_dcmpge") in
+ let comp = (match comp with
+ Cne -> Ceq (* eq 0 => false *)
+ | _ -> Cne (* ne 0 => true *)) in
+ (Iintop_imm(Icomp(Iunsigned comp), 0),
+ [Cop(Cextcall(func, typ_int, false, Debuginfo.none), args)])
(* Add coercions around loads and stores of 32-bit floats *)
- | Cload Single ->
- (Iextcall("__extendsfdf2", false), [Cop(Cload Word, args)])
- | Cstore Single ->
- begin match args with
- | [arg1; arg2] ->
- let arg2' =
- Cop(Cextcall("__truncdfsf2", typ_int, false, Debuginfo.none),
- [arg2]) in
- self#select_operation (Cstore Word) [arg1; arg2']
- | _ -> assert false
- end
+ | (Cload Single, args) ->
+ (Iextcall("__aeabi_f2d", false), [Cop(Cload Word, args)])
+ | (Cstore Single, [arg1; arg2]) ->
+ let arg2' =
+ Cop(Cextcall("__aeabi_d2f", typ_int, false, Debuginfo.none),
+ [arg2]) in
+ self#select_operation (Cstore Word) [arg1; arg2']
(* Other operations are regular *)
- | _ -> super#select_operation op args
+ | (op, args) -> super#select_operation op args
+
+method private select_operation_vfpv3 op args =
+ match (op, args) with
+ (* Recognize floating-point negate and multiply *)
+ (Cnegf, [Cop(Cmulf, args)]) ->
+ (Ispecific Inegmulf, args)
+ (* Recognize floating-point multiply and add *)
+ | (Caddf, [arg; Cop(Cmulf, args)])
+ | (Caddf, [Cop(Cmulf, args); arg]) ->
+ (Ispecific Imuladdf, arg :: args)
+ (* Recognize floating-point negate, multiply and subtract *)
+ | (Csubf, [Cop(Cnegf, [arg]); Cop(Cmulf, args)])
+ | (Csubf, [Cop(Cnegf, [Cop(Cmulf, args)]); arg]) ->
+ (Ispecific Inegmulsubf, arg :: args)
+ (* Recognize floating-point negate, multiply and add *)
+ | (Csubf, [arg; Cop(Cmulf, args)]) ->
+ (Ispecific Inegmuladdf, arg :: args)
+ (* Recognize multiply and subtract *)
+ | (Csubf, [Cop(Cmulf, args); arg]) ->
+ (Ispecific Imulsubf, arg :: args)
+ (* Recognize floating-point square root *)
+ | (Cextcall("sqrt", _, false, _), args) ->
+ (Ispecific Isqrtf, args)
+ (* Other operations are regular *)
+ | (op, args) -> super#select_operation op args
method! select_condition = function
- | Cop(Ccmpf cmp, args) ->
- (Iinttest_imm(Isigned cmp, 0),
- Cop(Cextcall(float_comparison_function cmp,
- typ_int, false, Debuginfo.none),
- args))
+ (* Turn floating-point comparisons into runtime ABI calls *)
+ Cop(Ccmpf _ as op, args) when !fpu = Soft ->
+ begin match self#select_operation_softfp op args with
+ (Iintop_imm(Icomp(Iunsigned Ceq), 0), [arg]) -> (Ifalsetest, arg)
+ | (Iintop_imm(Icomp(Iunsigned Cne), 0), [arg]) -> (Itruetest, arg)
+ | _ -> assert false
+ end
| expr ->
super#select_condition expr
-(* Deal with some register irregularities:
-
-1- In mul rd, rm, rs, the registers rm and rd must be different.
- We deal with this by pretending that rm is also a result of the mul
- operation.
-
-2- For Inegf and Iabsf, force arguments and results in (r0, r1);
- this simplifies code generation later.
-*)
+(* Deal with some register constraints *)
method! insert_op_debug op dbg rs rd =
- match op with
- | Iintop(Imul) ->
- self#insert_debug (Iop op) dbg rs [| rd.(0); rs.(0) |]; rd
- | Iabsf | Inegf ->
- let r = [| phys_reg 0; phys_reg 1 |] in
- self#insert_moves rs r;
- self#insert_debug (Iop op) dbg r r;
- self#insert_moves r rd;
- rd
- | _ ->
- super#insert_op_debug op dbg rs rd
+ try
+ let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
+ self#insert_moves rs rsrc;
+ self#insert_debug (Iop op) dbg rsrc rdst;
+ self#insert_moves rdst rd;
+ rd
+ with Use_default ->
+ super#insert_op_debug op dbg rs rd
end
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index 1700bf3..827a63d 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -168,7 +168,7 @@ method! is_simple_expr e =
| _ ->
super#is_simple_expr e
-method select_addressing exp =
+method select_addressing chunk exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
@@ -200,7 +200,7 @@ method! select_operation op args =
match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
- begin match self#select_addressing (Cop(op, args)) with
+ begin match self#select_addressing Word (Cop(op, args)) with
(Iindexed d, _) -> super#select_operation op args
| (Iindexed2 0, _) -> super#select_operation op args
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
@@ -233,7 +233,7 @@ method! select_operation op args =
begin match args with
[loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
when loc = loc' ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ioffset_loc(n, addr)), [arg])
| _ ->
super#select_operation op args
@@ -250,11 +250,11 @@ method! select_operation op args =
method select_floatarith regular_op reversed_op mem_op mem_rev_op args =
match args with
[arg1; Cop(Cload chunk, [loc2])] ->
- let (addr, arg2) = self#select_addressing loc2 in
+ let (addr, arg2) = self#select_addressing chunk loc2 in
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_op, addr)),
[arg1; arg2])
| [Cop(Cload chunk, [loc1]); arg2] ->
- let (addr, arg1) = self#select_addressing loc1 in
+ let (addr, arg1) = self#select_addressing chunk loc1 in
(Ispecific(Ifloatarithmem(chunk_double chunk, mem_rev_op, addr)),
[arg2; arg1])
| [arg1; arg2] ->
@@ -295,10 +295,10 @@ method select_push exp =
| Cconst_natpointer n -> (Ispecific(Ipush_int n), Ctuple [])
| Cconst_symbol s -> (Ispecific(Ipush_symbol s), Ctuple [])
| Cop(Cload Word, [loc]) ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Word loc in
(Ispecific(Ipush_load addr), arg)
| Cop(Cload Double_u, [loc]) ->
- let (addr, arg) = self#select_addressing loc in
+ let (addr, arg) = self#select_addressing Double_u loc in
(Ispecific(Ipush_load_float addr), arg)
| _ -> (Ispecific(Ipush), exp)
diff --git a/asmcomp/power/selection.ml b/asmcomp/power/selection.ml
index ed15efb..0532d6b 100644
--- a/asmcomp/power/selection.ml
+++ b/asmcomp/power/selection.ml
@@ -52,7 +52,7 @@ inherit Selectgen.selector_generic as super
method is_immediate n = (n <= 32767) && (n >= -32768)
-method select_addressing exp =
+method select_addressing chunk exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
diff --git a/asmcomp/power64/selection.ml b/asmcomp/power64/selection.ml
index 7b8e2a4..d2325e1 100644
--- a/asmcomp/power64/selection.ml
+++ b/asmcomp/power64/selection.ml
@@ -52,7 +52,7 @@ inherit Selectgen.selector_generic as super
method is_immediate n = (n <= 32767) && (n >= -32768)
-method select_addressing exp =
+method select_addressing chunk exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index 2fc40f7..0bc9efb 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -204,7 +204,7 @@ method virtual is_immediate : int -> bool
(* Selection of addressing modes *)
method virtual select_addressing :
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
(* Default instruction selection for stores (of words) *)
@@ -219,10 +219,10 @@ method select_operation op args =
| (Capply(ty, dbg), _) -> (Icall_ind, args)
| (Cextcall(s, ty, alloc, dbg), _) -> (Iextcall(s, alloc), args)
| (Cload chunk, [arg]) ->
- let (addr, eloc) = self#select_addressing arg in
+ let (addr, eloc) = self#select_addressing chunk arg in
(Iload(chunk, addr), [eloc])
| (Cstore chunk, [arg1; arg2]) ->
- let (addr, eloc) = self#select_addressing arg1 in
+ let (addr, eloc) = self#select_addressing chunk arg1 in
if chunk = Word then begin
let (op, newarg2) = self#select_store addr arg2 in
(op, [newarg2; eloc])
@@ -366,7 +366,7 @@ method insert_move src dst =
self#insert (Iop Imove) [|src|] [|dst|]
method insert_moves src dst =
- for i = 0 to Array.length src - 1 do
+ for i = 0 to min (Array.length src) (Array.length dst) - 1 do
self#insert_move src.(i) dst.(i)
done
@@ -490,9 +490,8 @@ method emit_expr env exp =
let (loc_arg, stack_ofs) =
self#emit_extcall_args env new_args in
let rd = self#regs_for ty in
- let loc_res = Proc.loc_external_results rd in
- self#insert_debug (Iop(Iextcall(lbl, alloc))) dbg
- loc_arg loc_res;
+ let loc_res = self#insert_op_debug (Iextcall(lbl, alloc)) dbg
+ loc_arg (Proc.loc_external_results rd) in
self#insert_move_results loc_res rd stack_ofs;
Some rd
| Ialloc _ ->
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
index ae53cda..69dae6d 100644
--- a/asmcomp/selectgen.mli
+++ b/asmcomp/selectgen.mli
@@ -26,7 +26,7 @@ class virtual selector_generic : object
(* Must be defined to indicate whether a constant is a suitable
immediate operand to arithmetic instructions *)
method virtual select_addressing :
- Cmm.expression -> Arch.addressing_mode * Cmm.expression
+ Cmm.memory_chunk -> Cmm.expression -> Arch.addressing_mode * Cmm.expression
(* Must be defined to select addressing modes *)
method is_simple_expr: Cmm.expression -> bool
(* Can be overridden to reflect special extcalls known to be pure *)
diff --git a/asmcomp/sparc/selection.ml b/asmcomp/sparc/selection.ml
index 82758dc..c1f30fd 100644
--- a/asmcomp/sparc/selection.ml
+++ b/asmcomp/sparc/selection.ml
@@ -26,7 +26,7 @@ inherit Selectgen.selector_generic as super
method is_immediate n = (n <= 4095) && (n >= -4096)
-method select_addressing = function
+method select_addressing chunk = function
Cconst_symbol s ->
(Ibased(s, 0), Ctuple [])
| Cop(Cadda, [Cconst_symbol s; Cconst_int n]) ->
diff --git a/asmrun/arm.S b/asmrun/arm.S
index 1313e9c..6482956 100644
--- a/asmrun/arm.S
+++ b/asmrun/arm.S
@@ -1,286 +1,411 @@
/***********************************************************************/
/* */
-/* Objective Caml */
+/* OCaml */
/* */
-/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
+/* Benedikt Meurer, University of Siegen */
/* */
-/* Copyright 1998 Institut National de Recherche en Informatique et */
-/* en Automatique. All rights reserved. This file is distributed */
-/* under the terms of the GNU Library General Public License, with */
-/* the special exception on linking described in file ../LICENSE. */
+/* Copyright 1998 Institut National de Recherche en Informatique */
+/* et en Automatique. Copyright 2012 Benedikt Meurer. All rights */
+/* reserved. This file is distributed under the terms of the GNU */
+/* Library General Public License, with the special exception on */
+/* linking described in file ../LICENSE. */
/* */
/***********************************************************************/
-/* $Id: arm.S 9252 2009-05-04 13:46:46Z xleroy $ */
+/* $Id$ */
/* Asm part of the runtime system, ARM processor */
+/* Must be preprocessed by cpp */
-trap_ptr .req r11
-alloc_ptr .req r8
-alloc_limit .req r10
-
+ .syntax unified
.text
+#if defined(SYS_linux_eabihf)
+ .arch armv7-a
+ .fpu vfpv3-d16
+ .thumb
+#elif defined(SYS_linux_eabi)
+ .arch armv4t
+ .arm
+
+ /* Compatibility macros */
+ .macro blx reg
+ mov lr, pc
+ bx \reg
+ .endm
+ .macro cbz reg, lbl
+ cmp \reg, #0
+ beq \lbl
+ .endm
+ .macro vpop regs
+ .endm
+ .macro vpush regs
+ .endm
+#endif
+
+trap_ptr .req r8
+alloc_ptr .req r10
+alloc_limit .req r11
+
+/* Support for profiling with gprof */
+
+#if defined(PROFILING) && (defined(SYS_linux_eabihf) || defined(SYS_linux_eabi))
+#define PROFILE \
+ push {lr}; \
+ bl __gnu_mcount_nc
+#else
+#define PROFILE
+#endif
/* Allocation functions and GC interface */
- .globl caml_call_gc
+ .globl caml_system__code_begin
+caml_system__code_begin:
+
+ .align 2
+ .globl caml_call_gc
.type caml_call_gc, %function
caml_call_gc:
- /* Record return address and desired size */
- /* Can use alloc_limit as a temporary since it will be reloaded by
- invoke_gc */
- ldr alloc_limit, .Lcaml_last_return_address
- str lr, [alloc_limit, #0]
- ldr alloc_limit, .Lcaml_requested_size
- str r12, [alloc_limit, #0]
- /* Branch to shared GC code */
- bl .Linvoke_gc
- /* Finish allocation */
- ldr r12, .Lcaml_requested_size
- ldr r12, [r12, #0]
- sub alloc_ptr, alloc_ptr, r12
+ PROFILE
+ /* Record return address */
+ ldr r12, =caml_last_return_address
+ str lr, [r12]
+.Lcaml_call_gc:
+ /* Record lowest stack address */
+ ldr r12, =caml_bottom_of_stack
+ str sp, [r12]
+ /* Save caller floating-point registers on the stack */
+ vpush {d0-d7}
+ /* Save integer registers and return address on the stack */
+ push {r0-r7,r12,lr}
+ /* Store pointer to saved integer registers in caml_gc_regs */
+ ldr r12, =caml_gc_regs
+ str sp, [r12]
+ /* Save current allocation pointer for debugging purposes */
+ ldr alloc_limit, =caml_young_ptr
+ str alloc_ptr, [alloc_limit]
+ /* Save trap pointer in case an exception is raised during GC */
+ ldr r12, =caml_exception_pointer
+ str trap_ptr, [r12]
+ /* Call the garbage collector */
+ bl caml_garbage_collection
+ /* Restore integer registers and return address from the stack */
+ pop {r0-r7,r12,lr}
+ /* Restore floating-point registers from the stack */
+ vpop {d0-d7}
+ /* Reload new allocation pointer and limit */
+ /* alloc_limit still points to caml_young_ptr */
+ ldr r12, =caml_young_limit
+ ldr alloc_ptr, [alloc_limit]
+ ldr alloc_limit, [r12]
+ /* Return to caller */
bx lr
+ .type caml_call_gc, %function
+ .size caml_call_gc, .-caml_call_gc
- .globl caml_alloc1
+ .align 2
+ .globl caml_alloc1
.type caml_alloc1, %function
caml_alloc1:
- sub alloc_ptr, alloc_ptr, #8
+ PROFILE
+.Lcaml_alloc1:
+ sub alloc_ptr, alloc_ptr, 8
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc1
+ b .Lcaml_alloc1
+ .type caml_alloc1, %function
+ .size caml_alloc1, .-caml_alloc1
- .globl caml_alloc2
+ .align 2
+ .globl caml_alloc2
.type caml_alloc2, %function
caml_alloc2:
- sub alloc_ptr, alloc_ptr, #12
+ PROFILE
+.Lcaml_alloc2:
+ sub alloc_ptr, alloc_ptr, 12
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc2
+ b .Lcaml_alloc2
+ .type caml_alloc2, %function
+ .size caml_alloc2, .-caml_alloc2
- .globl caml_alloc3
+ .align 2
+ .globl caml_alloc3
.type caml_alloc3, %function
caml_alloc3:
- sub alloc_ptr, alloc_ptr, #16
+ PROFILE
+.Lcaml_alloc3:
+ sub alloc_ptr, alloc_ptr, 16
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address */
- ldr r12, .Lcaml_last_return_address
- str lr, [r12, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r7, =caml_last_return_address
+ str lr, [r7]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr lr, [r7]
/* Try again */
- b caml_alloc3
+ b .Lcaml_alloc3
+ .type caml_alloc3, %function
+ .size caml_alloc3, .-caml_alloc3
- .globl caml_allocN
+ .align 2
+ .globl caml_allocN
.type caml_allocN, %function
caml_allocN:
- sub alloc_ptr, alloc_ptr, r12
+ PROFILE
+.Lcaml_allocN:
+ sub alloc_ptr, alloc_ptr, r7
cmp alloc_ptr, alloc_limit
- movcs pc, lr /* Return if alloc_ptr >= alloc_limit */
- /* Record return address and desired size */
- /* Can use alloc_limit as a temporary since it will be reloaded by
- invoke_gc */
- ldr alloc_limit, .Lcaml_last_return_address
- str lr, [alloc_limit, #0]
- ldr alloc_limit, .Lcaml_requested_size
- str r12, [alloc_limit, #0]
- /* Invoke GC */
- bl .Linvoke_gc
+ bcc 1f
+ bx lr
+1: /* Record return address */
+ ldr r12, =caml_last_return_address
+ str lr, [r12]
+ /* Call GC (preserves r7) */
+ bl .Lcaml_call_gc
+ /* Restore return address */
+ ldr r12, =caml_last_return_address
+ ldr lr, [r12]
/* Try again */
- ldr r12, .Lcaml_requested_size
- ldr r12, [r12, #0]
- b caml_allocN
-
-/* Shared code to invoke the GC */
-.Linvoke_gc:
- /* Record lowest stack address */
- ldr r12, .Lcaml_bottom_of_stack
- str sp, [r12, #0]
- /* Save integer registers and return address on stack */
- stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12,lr}
- /* Store pointer to saved integer registers in caml_gc_regs */
- ldr r12, .Lcaml_gc_regs
- str sp, [r12, #0]
- /* Save current allocation pointer for debugging purposes */
- ldr r12, .Lcaml_young_ptr
- str alloc_ptr, [r12, #0]
- /* Save trap pointer in case an exception is raised during GC */
- ldr r12, .Lcaml_exception_pointer
- str trap_ptr, [r12, #0]
- /* Call the garbage collector */
- bl caml_garbage_collection
- /* Restore the registers from the stack */
- ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r12}
- /* Reload return address */
- ldr r12, .Lcaml_last_return_address
- ldr lr, [r12, #0]
- /* Reload new allocation pointer and allocation limit */
- ldr r12, .Lcaml_young_ptr
- ldr alloc_ptr, [r12, #0]
- ldr r12, .Lcaml_young_limit
- ldr alloc_limit, [r12, #0]
- /* Return to caller */
- ldr r12, [sp], #4
- bx r12
+ b .Lcaml_allocN
+ .type caml_allocN, %function
+ .size caml_allocN, .-caml_allocN
-/* Call a C function from Caml */
-/* Function to call is in r12 */
+/* Call a C function from OCaml */
+/* Function to call is in r7 */
- .globl caml_c_call
+ .align 2
+ .globl caml_c_call
.type caml_c_call, %function
caml_c_call:
+ PROFILE
+ /* Record lowest stack address and return address */
+ ldr r5, =caml_last_return_address
+ ldr r6, =caml_bottom_of_stack
+ str lr, [r5]
+ str sp, [r6]
/* Preserve return address in callee-save register r4 */
mov r4, lr
- /* Record lowest stack address and return address */
- ldr r5, .Lcaml_last_return_address
- ldr r6, .Lcaml_bottom_of_stack
- str lr, [r5, #0]
- str sp, [r6, #0]
- /* Make the exception handler and alloc ptr available to the C code */
- ldr r6, .Lcaml_young_ptr
- ldr r7, .Lcaml_exception_pointer
- str alloc_ptr, [r6, #0]
- str trap_ptr, [r7, #0]
+ /* Make the exception handler alloc ptr available to the C code */
+ ldr r5, =caml_young_ptr
+ ldr r6, =caml_exception_pointer
+ str alloc_ptr, [r5]
+ str trap_ptr, [r6]
/* Call the function */
- mov lr, pc
- bx r12
+ blx r7
/* Reload alloc ptr and alloc limit */
- ldr r5, .Lcaml_young_limit
- ldr alloc_ptr, [r6, #0] /* r6 still points to caml_young_ptr */
- ldr alloc_limit, [r5, #0]
+ ldr r6, =caml_young_limit
+ ldr alloc_ptr, [r5] /* r5 still points to caml_young_ptr */
+ ldr alloc_limit, [r6]
/* Return */
bx r4
+ .type caml_c_call, %function
+ .size caml_c_call, .-caml_c_call
-/* Start the Caml program */
+/* Start the OCaml program */
- .globl caml_start_program
+ .align 2
+ .globl caml_start_program
.type caml_start_program, %function
caml_start_program:
- ldr r12, .Lcaml_program
+ PROFILE
+ ldr r12, =caml_program
/* Code shared with caml_callback* */
-/* Address of Caml code to call is in r12 */
-/* Arguments to the Caml code are in r0...r3 */
+/* Address of OCaml code to call is in r12 */
+/* Arguments to the OCaml code are in r0...r3 */
.Ljump_to_caml:
/* Save return address and callee-save registers */
- stmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr} /* 8-alignment */
+ vpush {d8-d15}
+ push {r4-r8,r10,r11,lr} /* 8-byte alignment */
/* Setup a callback link on the stack */
- sub sp, sp, #4*4 /* 8-alignment */
- ldr r4, .Lcaml_bottom_of_stack
- ldr r4, [r4, #0]
- str r4, [sp, #0]
- ldr r4, .Lcaml_last_return_address
- ldr r4, [r4, #0]
- str r4, [sp, #4]
- ldr r4, .Lcaml_gc_regs
- ldr r4, [r4, #0]
- str r4, [sp, #8]
- /* Setup a trap frame to catch exceptions escaping the Caml code */
- sub sp, sp, #4*2
- ldr r4, .Lcaml_exception_pointer
- ldr r4, [r4, #0]
- str r4, [sp, #0]
- ldr r4, .LLtrap_handler
- str r4, [sp, #4]
+ sub sp, sp, 4*4 /* 8-byte alignment */
+ ldr r4, =caml_bottom_of_stack
+ ldr r5, =caml_last_return_address
+ ldr r6, =caml_gc_regs
+ ldr r4, [r4]
+ ldr r5, [r5]
+ ldr r6, [r6]
+ str r4, [sp, 0]
+ str r5, [sp, 4]
+ str r6, [sp, 8]
+ /* Setup a trap frame to catch exceptions escaping the OCaml code */
+ sub sp, sp, 2*4
+ ldr r6, =caml_exception_pointer
+ ldr r5, =.Ltrap_handler
+ ldr r4, [r6]
+ str r4, [sp, 0]
+ str r5, [sp, 4]
mov trap_ptr, sp
/* Reload allocation pointers */
- ldr r4, .Lcaml_young_ptr
- ldr alloc_ptr, [r4, #0]
- ldr r4, .Lcaml_young_limit
- ldr alloc_limit, [r4, #0]
- /* Call the Caml code */
- mov lr, pc
- bx r12
+ ldr r4, =caml_young_ptr
+ ldr alloc_ptr, [r4]
+ ldr r4, =caml_young_limit
+ ldr alloc_limit, [r4]
+ /* Call the OCaml code */
+ blx r12
.Lcaml_retaddr:
/* Pop the trap frame, restoring caml_exception_pointer */
- ldr r4, .Lcaml_exception_pointer
- ldr r5, [sp, #0]
- str r5, [r4, #0]
- add sp, sp, #2 * 4
+ ldr r4, =caml_exception_pointer
+ ldr r5, [sp, 0]
+ str r5, [r4]
+ add sp, sp, 2*4
/* Pop the callback link, restoring the global variables */
.Lreturn_result:
- ldr r4, .Lcaml_bottom_of_stack
- ldr r5, [sp, #0]
- str r5, [r4, #0]
- ldr r4, .Lcaml_last_return_address
- ldr r5, [sp, #4]
- str r5, [r4, #0]
- ldr r4, .Lcaml_gc_regs
- ldr r5, [sp, #8]
- str r5, [r4, #0]
- add sp, sp, #4*4
+ ldr r4, =caml_bottom_of_stack
+ ldr r5, [sp, 0]
+ str r5, [r4]
+ ldr r4, =caml_last_return_address
+ ldr r5, [sp, 4]
+ str r5, [r4]
+ ldr r4, =caml_gc_regs
+ ldr r5, [sp, 8]
+ str r5, [r4]
+ add sp, sp, 4*4
/* Update allocation pointer */
- ldr r4, .Lcaml_young_ptr
- str alloc_ptr, [r4, #0]
+ ldr r4, =caml_young_ptr
+ str alloc_ptr, [r4]
/* Reload callee-save registers and return */
- ldmfd sp!, {r4,r5,r6,r7,r8,r10,r11,lr}
- bx lr
+ pop {r4-r8,r10,r11,lr}
+ vpop {d8-d15}
+ bx lr
+ .type .Lcaml_retaddr, %function
+ .size .Lcaml_retaddr, .-.Lcaml_retaddr
+ .type caml_start_program, %function
+ .size caml_start_program, .-caml_start_program
+
+/* The trap handler */
- /* The trap handler */
+ .align 2
.Ltrap_handler:
/* Save exception pointer */
- ldr r4, .Lcaml_exception_pointer
- str trap_ptr, [r4, #0]
+ ldr r12, =caml_exception_pointer
+ str trap_ptr, [r12]
/* Encode exception bucket as an exception result */
- orr r0, r0, #2
+ orr r0, r0, 2
/* Return it */
b .Lreturn_result
+ .type .Ltrap_handler, %function
+ .size .Ltrap_handler, .-.Ltrap_handler
+
+/* Raise an exception from OCaml */
+
+ .align 2
+ .globl caml_raise_exn
+caml_raise_exn:
+ PROFILE
+ /* Test if backtrace is active */
+ ldr r1, =caml_backtrace_active
+ ldr r1, [r1]
+ cbz r1, 1f
+ /* Preserve exception bucket in callee-save register r4 */
+ mov r4, r0
+ /* Stash the backtrace */
+ mov r1, lr /* arg2: pc of raise */
+ mov r2, sp /* arg3: sp of raise */
+ mov r3, trap_ptr /* arg4: sp of handler */
+ bl caml_stash_backtrace
+ /* Restore exception bucket */
+ mov r0, r4
+1: /* Cut stack at current trap handler */
+ mov sp, trap_ptr
+ /* Pop previous handler and addr of trap, and jump to it */
+ pop {trap_ptr, pc}
+ .type caml_raise_exn, %function
+ .size caml_raise_exn, .-caml_raise_exn
/* Raise an exception from C */
- .globl caml_raise_exception
+ .align 2
+ .globl caml_raise_exception
.type caml_raise_exception, %function
caml_raise_exception:
- /* Reload Caml allocation pointers */
- ldr r12, .Lcaml_young_ptr
- ldr alloc_ptr, [r12, #0]
- ldr r12, .Lcaml_young_limit
- ldr alloc_limit, [r12, #0]
- /* Cut stack at current trap handler */
- ldr r12, .Lcaml_exception_pointer
- ldr sp, [r12, #0]
+ PROFILE
+ /* Reload trap ptr, alloc ptr and alloc limit */
+ ldr trap_ptr, =caml_exception_pointer
+ ldr alloc_ptr, =caml_young_ptr
+ ldr alloc_limit, =caml_young_limit
+ ldr trap_ptr, [trap_ptr]
+ ldr alloc_ptr, [alloc_ptr]
+ ldr alloc_limit, [alloc_limit]
+ /* Test if backtrace is active */
+ ldr r1, =caml_backtrace_active
+ ldr r1, [r1]
+ cbz r1, 1f
+ /* Preserve exception bucket in callee-save register r4 */
+ mov r4, r0
+ ldr r1, =caml_last_return_address /* arg2: pc of raise */
+ ldr r1, [r1]
+ ldr r2, =caml_bottom_of_stack /* arg3: sp of raise */
+ ldr r2, [r2]
+ mov r3, trap_ptr /* arg4: sp of handler */
+ bl caml_stash_backtrace
+ /* Restore exception bucket */
+ mov r0, r4
+1: /* Cut stack at current trap handler */
+ mov sp, trap_ptr
/* Pop previous handler and addr of trap, and jump to it */
- ldmfd sp!, {trap_ptr, pc}
+ pop {trap_ptr, pc}
+ .type caml_raise_exception, %function
+ .size caml_raise_exception, .-caml_raise_exception
-/* Callback from C to Caml */
+/* Callback from C to OCaml */
- .globl caml_callback_exn
+ .align 2
+ .globl caml_callback_exn
.type caml_callback_exn, %function
caml_callback_exn:
+ PROFILE
/* Initial shuffling of arguments (r0 = closure, r1 = first arg) */
mov r12, r0
- mov r0, r1 /* r0 = first arg */
- mov r1, r12 /* r1 = closure environment */
- ldr r12, [r12, #0] /* code pointer */
+ mov r0, r1 /* r0 = first arg */
+ mov r1, r12 /* r1 = closure environment */
+ ldr r12, [r12] /* code pointer */
b .Ljump_to_caml
+ .type caml_callback_exn, %function
+ .size caml_callback_exn, .-caml_callback_exn
- .globl caml_callback2_exn
+ .align 2
+ .globl caml_callback2_exn
.type caml_callback2_exn, %function
caml_callback2_exn:
+ PROFILE
/* Initial shuffling of arguments (r0 = closure, r1 = arg1, r2 = arg2) */
mov r12, r0
- mov r0, r1 /* r0 = first arg */
- mov r1, r2 /* r1 = second arg */
- mov r2, r12 /* r2 = closure environment */
- ldr r12, .Lcaml_apply2
+ mov r0, r1 /* r0 = first arg */
+ mov r1, r2 /* r1 = second arg */
+ mov r2, r12 /* r2 = closure environment */
+ ldr r12, =caml_apply2
b .Ljump_to_caml
+ .type caml_callback2_exn, %function
+ .size caml_callback2_exn, .-caml_callback2_exn
- .globl caml_callback3_exn
+ .align 2
+ .globl caml_callback3_exn
.type caml_callback3_exn, %function
caml_callback3_exn:
+ PROFILE
/* Initial shuffling of arguments */
/* (r0 = closure, r1 = arg1, r2 = arg2, r3 = arg3) */
mov r12, r0
@@ -288,43 +413,36 @@ caml_callback3_exn:
mov r1, r2 /* r1 = second arg */
mov r2, r3 /* r2 = third arg */
mov r3, r12 /* r3 = closure environment */
- ldr r12, .Lcaml_apply3
+ ldr r12, =caml_apply3
b .Ljump_to_caml
+ .type caml_callback3_exn, %function
+ .size caml_callback3_exn, .-caml_callback3_exn
- .globl caml_ml_array_bound_error
+ .align 2
+ .globl caml_ml_array_bound_error
.type caml_ml_array_bound_error, %function
caml_ml_array_bound_error:
- /* Load address of [caml_array_bound_error] in r12 */
- ldr r12, .Lcaml_array_bound_error
+ PROFILE
+ /* Load address of [caml_array_bound_error] in r7 */
+ ldr r7, =caml_array_bound_error
/* Call that function */
b caml_c_call
+ .type caml_ml_array_bound_error, %function
+ .size caml_ml_array_bound_error, .-caml_ml_array_bound_error
-/* Global references */
-
-.Lcaml_last_return_address: .word caml_last_return_address
-.Lcaml_bottom_of_stack: .word caml_bottom_of_stack
-.Lcaml_gc_regs: .word caml_gc_regs
-.Lcaml_young_ptr: .word caml_young_ptr
-.Lcaml_young_limit: .word caml_young_limit
-.Lcaml_exception_pointer: .word caml_exception_pointer
-.Lcaml_program: .word caml_program
-.LLtrap_handler: .word .Ltrap_handler
-.Lcaml_apply2: .word caml_apply2
-.Lcaml_apply3: .word caml_apply3
-.Lcaml_array_bound_error: .word caml_array_bound_error
-.Lcaml_requested_size: .word caml_requested_size
-
- .data
-caml_requested_size:
- .word 0
+ .globl caml_system__code_end
+caml_system__code_end:
/* GC roots for callback */
.data
- .globl caml_system__frametable
+ .align 2
+ .globl caml_system__frametable
caml_system__frametable:
.word 1 /* one descriptor */
.word .Lcaml_retaddr /* return address into callback */
.short -1 /* negative frame size => use callback link */
.short 0 /* no roots */
.align 2
+ .type caml_system__frametable, %object
+ .size caml_system__frametable, .-caml_system__frametable
diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h
index 1e91327..732f3a0 100644
--- a/asmrun/signals_osdep.h
+++ b/asmrun/signals_osdep.h
@@ -78,7 +78,7 @@
/****************** ARM, Linux */
-#elif defined(TARGET_arm) && defined (SYS_linux)
+#elif defined(TARGET_arm) && (defined (SYS_linux_eabi) || defined(SYS_linux_eabihf))
#include <sys/ucontext.h>
diff --git a/configure b/configure
index 6ed0a9c..4e07c92 100755
--- a/configure
+++ b/configure
@@ -636,6 +636,7 @@ if test $withsharedlibs = "yes"; then
i[345]86-*-netbsd*) natdynlink=true;;
x86_64-*-netbsd*) natdynlink=true;;
i386-*-gnu0.3) natdynlink=true;;
+ arm*-*-linux*) natdynlink=true;;
esac
fi
@@ -691,8 +692,13 @@ case "$host" in
powerpc-*-rhapsody*) arch=power; model=ppc; system=rhapsody;;
powerpc-*-darwin*) arch=power; system=rhapsody
if $arch64; then model=ppc64; else model=ppc; fi;;
- arm*-*-linux*) arch=arm; system=linux;;
- arm*-*-gnu*) arch=arm; system=gnu;;
+ arm*-*-linux-gnueabihf) arch=arm; system=linux_eabihf;;
+ armv7*-*-linux-gnueabi) arch=arm; model=armv7; system=linux_eabi;;
+ armv6t2*-*-linux-gnueabi) arch=arm; model=armv6t2; system=linux_eabi;;
+ armv6*-*-linux-gnueabi) arch=arm; model=armv6; system=linux_eabi;;
+ armv5te*-*-linux-gnueabi) arch=arm; model=armv5te; system=linux_eabi;;
+ armv5*-*-linux-gnueabi) arch=arm; model=armv5; system=linux_eabi;;
+ arm*-*-linux-gnueabi) arch=arm; system=linux_eabi;;
ia64-*-linux*) arch=ia64; system=linux;;
ia64-*-gnu*) arch=ia64; system=gnu;;
ia64-*-freebsd*) arch=ia64; system=freebsd;;
@@ -804,6 +810,7 @@ case "$arch,$model,$system" in
case "$nativecc" in gcc*) ;; *) cc_profile='-xpg';; esac;;
amd64,*,linux) profiling='prof';;
amd64,*,gnu) profiling='prof';;
+ arm,*,linux*) profiling='prof';;
*) profiling='noprof';;
esac
--
1.7.10