2281 lines
80 KiB
Diff
2281 lines
80 KiB
Diff
From 10d852d542f4ecdc5efc5afbae2d42167df4539c Mon Sep 17 00:00:00 2001
|
|
From: Xavier Leroy <xavier.leroy@inria.fr>
|
|
Date: Thu, 18 Jul 2013 16:09:20 +0000
|
|
Subject: [PATCH 09/11] Port to the ARM 64-bits (AArch64) architecture
|
|
(experimental). Merge of branch branches/arm64.
|
|
|
|
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@13909 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
|
|
(cherry picked from commit 055d5c0379e42b4f561cb1fc5159659d8e9a7b6f)
|
|
---
|
|
asmcomp/arm64/arch.ml | 146 ++++++++
|
|
asmcomp/arm64/emit.mlp | 742 +++++++++++++++++++++++++++++++++++++++
|
|
asmcomp/arm64/proc.ml | 212 +++++++++++
|
|
asmcomp/arm64/reload.ml | 16 +
|
|
asmcomp/arm64/scheduling.ml | 18 +
|
|
asmcomp/arm64/selection.ml | 265 ++++++++++++++
|
|
asmcomp/compilenv.ml | 9 +
|
|
asmcomp/compilenv.mli | 4 +
|
|
asmrun/arm64.S | 535 ++++++++++++++++++++++++++++
|
|
asmrun/signals_osdep.h | 19 +
|
|
asmrun/stack.h | 5 +
|
|
byterun/interp.c | 6 +
|
|
configure | 5 +-
|
|
otherlibs/num/bng.c | 6 +-
|
|
otherlibs/num/bng_arm64.c | 20 ++
|
|
testsuite/tests/asmcomp/Makefile | 2 +-
|
|
testsuite/tests/asmcomp/arm64.S | 52 +++
|
|
testsuite/tests/asmcomp/main.ml | 1 +
|
|
18 files changed, 2057 insertions(+), 6 deletions(-)
|
|
create mode 100644 asmcomp/arm64/arch.ml
|
|
create mode 100644 asmcomp/arm64/emit.mlp
|
|
create mode 100644 asmcomp/arm64/proc.ml
|
|
create mode 100644 asmcomp/arm64/reload.ml
|
|
create mode 100644 asmcomp/arm64/scheduling.ml
|
|
create mode 100644 asmcomp/arm64/selection.ml
|
|
create mode 100644 asmrun/arm64.S
|
|
create mode 100644 otherlibs/num/bng_arm64.c
|
|
create mode 100644 testsuite/tests/asmcomp/arm64.S
|
|
|
|
diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml
|
|
new file mode 100644
|
|
index 0000000..a53251f
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/arch.ml
|
|
@@ -0,0 +1,146 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+let command_line_options = []
|
|
+
|
|
+(* Specific operations for the ARM processor, 64-bit mode *)
|
|
+
|
|
+open Format
|
|
+
|
|
+let command_line_options = []
|
|
+
|
|
+(* Addressing modes *)
|
|
+
|
|
+type addressing_mode =
|
|
+ | Iindexed of int (* reg + displ *)
|
|
+ | Ibased of string * int (* global var + displ *)
|
|
+
|
|
+(* We do not support the reg + shifted reg addressing mode, because
|
|
+ what we really need is reg + shifted reg + displ,
|
|
+ and this is decomposed in two instructions (reg + shifted reg -> tmp,
|
|
+ then addressing tmp + displ). *)
|
|
+
|
|
+(* Specific operations *)
|
|
+
|
|
+type specific_operation =
|
|
+ | Ishiftarith of arith_operation * int
|
|
+ | Ishiftcheckbound of int
|
|
+ | Imuladd (* multiply and add *)
|
|
+ | Imulsub (* multiply and subtract *)
|
|
+ | Inegmulf (* floating-point negate and multiply *)
|
|
+ | Imuladdf (* floating-point multiply and add *)
|
|
+ | Inegmuladdf (* floating-point negate, multiply and add *)
|
|
+ | Imulsubf (* floating-point multiply and subtract *)
|
|
+ | Inegmulsubf (* floating-point negate, multiply and subtract *)
|
|
+ | Isqrtf (* floating-point square root *)
|
|
+ | Ibswap of int (* endianess conversion *)
|
|
+
|
|
+and arith_operation =
|
|
+ Ishiftadd
|
|
+ | Ishiftsub
|
|
+
|
|
+(* Sizes, endianness *)
|
|
+
|
|
+let big_endian = false
|
|
+
|
|
+let size_addr = 8
|
|
+let size_int = 8
|
|
+let size_float = 8
|
|
+
|
|
+let allow_unaligned_access = false
|
|
+
|
|
+(* Behavior of division *)
|
|
+
|
|
+let division_crashes_on_overflow = false
|
|
+
|
|
+(* Operations on addressing modes *)
|
|
+
|
|
+let identity_addressing = Iindexed 0
|
|
+
|
|
+let offset_addressing addr delta =
|
|
+ match addr with
|
|
+ | Iindexed n -> Iindexed(n + delta)
|
|
+ | Ibased(s, n) -> Ibased(s, n + delta)
|
|
+
|
|
+let num_args_addressing = function
|
|
+ | Iindexed n -> 1
|
|
+ | Ibased(s, n) -> 0
|
|
+
|
|
+(* Printing operations and addressing modes *)
|
|
+
|
|
+let print_addressing printreg addr ppf arg =
|
|
+ match addr with
|
|
+ | Iindexed n ->
|
|
+ printreg ppf arg.(0);
|
|
+ if n <> 0 then fprintf ppf " + %i" n
|
|
+ | Ibased(s, 0) ->
|
|
+ fprintf ppf "\"%s\"" s
|
|
+ | Ibased(s, n) ->
|
|
+ fprintf ppf "\"%s\" + %i" s n
|
|
+
|
|
+let print_specific_operation printreg op ppf arg =
|
|
+ match op with
|
|
+ | Ishiftarith(op, shift) ->
|
|
+ let op_name = function
|
|
+ | Ishiftadd -> "+"
|
|
+ | Ishiftsub -> "-" in
|
|
+ let shift_mark =
|
|
+ if shift >= 0
|
|
+ then sprintf "<< %i" shift
|
|
+ else sprintf ">> %i" (-shift) in
|
|
+ fprintf ppf "%a %s %a %s"
|
|
+ printreg arg.(0) (op_name op) printreg arg.(1) shift_mark
|
|
+ | Ishiftcheckbound n ->
|
|
+ fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
|
|
+ | Imuladd ->
|
|
+ fprintf ppf "(%a * %a) + %a"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Imulsub ->
|
|
+ fprintf ppf "-(%a * %a) + %a"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmulf ->
|
|
+ fprintf ppf "-f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ | Imuladdf ->
|
|
+ fprintf ppf "%a +f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmuladdf ->
|
|
+ fprintf ppf "(-f %a) -f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Imulsubf ->
|
|
+ fprintf ppf "%a -f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Inegmulsubf ->
|
|
+ fprintf ppf "(-f %a) +f (%a *f %a)"
|
|
+ printreg arg.(0)
|
|
+ printreg arg.(1)
|
|
+ printreg arg.(2)
|
|
+ | Isqrtf ->
|
|
+ fprintf ppf "sqrtf %a"
|
|
+ printreg arg.(0)
|
|
+ | Ibswap n ->
|
|
+ fprintf ppf "bswap%i %a" n
|
|
+ printreg arg.(0)
|
|
+
|
|
diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp
|
|
new file mode 100644
|
|
index 0000000..fc9649c
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/emit.mlp
|
|
@@ -0,0 +1,742 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* Emission of ARM assembly code, 64-bit mode *)
|
|
+
|
|
+open Misc
|
|
+open Cmm
|
|
+open Arch
|
|
+open Proc
|
|
+open Reg
|
|
+open Mach
|
|
+open Linearize
|
|
+open Emitaux
|
|
+
|
|
+(* Tradeoff between code size and code speed *)
|
|
+
|
|
+let fastcode_flag = ref true
|
|
+
|
|
+(* Names for special regs *)
|
|
+
|
|
+let reg_trap_ptr = phys_reg 23
|
|
+let reg_alloc_ptr = phys_reg 24
|
|
+let reg_alloc_limit = phys_reg 25
|
|
+let reg_tmp1 = phys_reg 26
|
|
+let reg_tmp2 = phys_reg 27
|
|
+let reg_x15 = phys_reg 15
|
|
+
|
|
+(* Output a label *)
|
|
+
|
|
+let emit_label lbl =
|
|
+ emit_string ".L"; emit_int lbl
|
|
+
|
|
+let emit_data_label lbl =
|
|
+ emit_string ".Ld"; emit_int lbl
|
|
+
|
|
+(* Symbols *)
|
|
+
|
|
+let emit_symbol s =
|
|
+ Emitaux.emit_symbol '$' s
|
|
+
|
|
+(* Output a pseudo-register *)
|
|
+
|
|
+let emit_reg = function
|
|
+ {loc = Reg r} -> emit_string (register_name r)
|
|
+ | _ -> fatal_error "Emit.emit_reg"
|
|
+
|
|
+(* Likewise, but with the 32-bit name of the register *)
|
|
+
|
|
+let int_reg_name_w =
|
|
+ [| "w0"; "w1"; "w2"; "w3"; "w4"; "w5"; "w6"; "w7";
|
|
+ "w8"; "w9"; "w10"; "w11"; "w12"; "w13"; "w14"; "w15";
|
|
+ "w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25";
|
|
+ "w26"; "w27"; "w28"; "w16"; "w17" |]
|
|
+
|
|
+let emit_wreg = function
|
|
+ {loc = Reg r} -> emit_string int_reg_name_w.(r)
|
|
+ | _ -> fatal_error "Emit.emit_wreg"
|
|
+
|
|
+(* Layout of the stack frame *)
|
|
+
|
|
+let stack_offset = ref 0
|
|
+
|
|
+let frame_size () =
|
|
+ let sz =
|
|
+ !stack_offset +
|
|
+ 8 * num_stack_slots.(0) +
|
|
+ 8 * num_stack_slots.(1) +
|
|
+ (if !contains_calls then 8 else 0)
|
|
+ in Misc.align sz 16
|
|
+
|
|
+let slot_offset loc cl =
|
|
+ match loc with
|
|
+ Incoming n ->
|
|
+ assert (n >= 0);
|
|
+ frame_size() + n
|
|
+ | Local n ->
|
|
+ !stack_offset +
|
|
+ (if cl = 0
|
|
+ then n * 8
|
|
+ else num_stack_slots.(0) * 8 + n * 8)
|
|
+ | Outgoing n ->
|
|
+ assert (n >= 0);
|
|
+ n
|
|
+
|
|
+(* Output a stack reference *)
|
|
+
|
|
+let emit_stack r =
|
|
+ match r.loc with
|
|
+ | Stack s ->
|
|
+ let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
|
|
+ | _ -> fatal_error "Emit.emit_stack"
|
|
+
|
|
+(* Output an addressing mode *)
|
|
+
|
|
+let emit_symbol_offset s ofs =
|
|
+ emit_symbol s;
|
|
+ if ofs > 0 then `+{emit_int ofs}`
|
|
+ else if ofs < 0 then `-{emit_int (-ofs)}`
|
|
+ else ()
|
|
+
|
|
+let emit_addressing addr r =
|
|
+ match addr with
|
|
+ | Iindexed ofs ->
|
|
+ `[{emit_reg r}, #{emit_int ofs}]`
|
|
+ | Ibased(s, ofs) ->
|
|
+ `[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]`
|
|
+
|
|
+(* Record live pointers at call points *)
|
|
+
|
|
+let record_frame_label live dbg =
|
|
+ let lbl = new_label() in
|
|
+ let live_offset = ref [] in
|
|
+ Reg.Set.iter
|
|
+ (function
|
|
+ {typ = Addr; loc = Reg r} ->
|
|
+ live_offset := ((r lsl 1) + 1) :: !live_offset
|
|
+ | {typ = Addr; loc = Stack s} as reg ->
|
|
+ live_offset := slot_offset s (register_class reg) :: !live_offset
|
|
+ | _ -> ())
|
|
+ live;
|
|
+ frame_descriptors :=
|
|
+ { fd_lbl = lbl;
|
|
+ fd_frame_size = frame_size();
|
|
+ fd_live_offset = !live_offset;
|
|
+ fd_debuginfo = dbg } :: !frame_descriptors;
|
|
+ lbl
|
|
+
|
|
+let record_frame live dbg =
|
|
+ let lbl = record_frame_label live dbg in `{emit_label lbl}:`
|
|
+
|
|
+(* Record calls to the GC -- we've moved them out of the way *)
|
|
+
|
|
+type gc_call =
|
|
+ { gc_lbl: label; (* Entry label *)
|
|
+ gc_return_lbl: label; (* Where to branch after GC *)
|
|
+ gc_frame_lbl: label } (* Label of frame descriptor *)
|
|
+
|
|
+let call_gc_sites = ref ([] : gc_call list)
|
|
+
|
|
+let emit_call_gc gc =
|
|
+ `{emit_label gc.gc_lbl}: bl {emit_symbol "caml_call_gc"}\n`;
|
|
+ `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
|
|
+
|
|
+(* Record calls to caml_ml_array_bound_error.
|
|
+ In debug mode, we maintain one call to caml_ml_array_bound_error
|
|
+ per bound check site. Otherwise, we can share a single call. *)
|
|
+
|
|
+type bound_error_call =
|
|
+ { bd_lbl: label; (* Entry label *)
|
|
+ bd_frame_lbl: label } (* Label of frame descriptor *)
|
|
+
|
|
+let bound_error_sites = ref ([] : bound_error_call list)
|
|
+
|
|
+let bound_error_label dbg =
|
|
+ if !Clflags.debug || !bound_error_sites = [] then begin
|
|
+ let lbl_bound_error = new_label() in
|
|
+ let lbl_frame = record_frame_label Reg.Set.empty dbg in
|
|
+ bound_error_sites :=
|
|
+ { bd_lbl = lbl_bound_error;
|
|
+ bd_frame_lbl = lbl_frame } :: !bound_error_sites;
|
|
+ lbl_bound_error
|
|
+ end else begin
|
|
+ let bd = List.hd !bound_error_sites in bd.bd_lbl
|
|
+ end
|
|
+
|
|
+let emit_call_bound_error bd =
|
|
+ `{emit_label bd.bd_lbl}: bl {emit_symbol "caml_ml_array_bound_error"}\n`;
|
|
+ `{emit_label bd.bd_frame_lbl}:\n`
|
|
+
|
|
+(* Names of various instructions *)
|
|
+
|
|
+let name_for_comparison = function
|
|
+ | Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
|
|
+ | Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
|
|
+ | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
|
|
+ | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
|
|
+
|
|
+let name_for_int_operation = function
|
|
+ | Iadd -> "add"
|
|
+ | Isub -> "sub"
|
|
+ | Imul -> "mul"
|
|
+ | Idiv -> "sdiv"
|
|
+ | Iand -> "and"
|
|
+ | Ior -> "orr"
|
|
+ | Ixor -> "eor"
|
|
+ | Ilsl -> "lsl"
|
|
+ | Ilsr -> "lsr"
|
|
+ | Iasr -> "asr"
|
|
+ | _ -> assert false
|
|
+
|
|
+(* Load an integer constant into a register *)
|
|
+
|
|
+let emit_intconst dst n =
|
|
+ let rec emit_pos first shift =
|
|
+ if shift < 0 then begin
|
|
+ if first then ` mov {emit_reg dst}, xzr\n`
|
|
+ end else begin
|
|
+ let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
|
|
+ if s = 0n then emit_pos first (shift - 16) else begin
|
|
+ if first then
|
|
+ ` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
|
|
+ else
|
|
+ ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
|
|
+ emit_pos false (shift - 16)
|
|
+ end
|
|
+ end
|
|
+ and emit_neg first shift =
|
|
+ if shift < 0 then begin
|
|
+ if first then ` movn {emit_reg dst}, #0\n`
|
|
+ end else begin
|
|
+ let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
|
|
+ if s = 0xFFFFn then emit_neg first (shift - 16) else begin
|
|
+ if first then
|
|
+ ` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
|
|
+ else
|
|
+ ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
|
|
+ emit_neg false (shift - 16)
|
|
+ end
|
|
+ end
|
|
+ in
|
|
+ if n < 0n then emit_neg true 48 else emit_pos true 48
|
|
+
|
|
+(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
|
|
+ "a normalized binary floating point encoding with 1 sign bit, 4
|
|
+ bits of fraction and a 3-bit exponent" *)
|
|
+
|
|
+let is_immediate_float bits =
|
|
+ let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
|
|
+ let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
|
|
+ exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
|
|
+
|
|
+(* Adjust sp (up or down) by the given byte amount *)
|
|
+
|
|
+let emit_stack_adjustment n =
|
|
+ let instr = if n < 0 then "sub" else "add" in
|
|
+ let m = abs n in
|
|
+ assert (m < 0x1_000_000);
|
|
+ let ml = m land 0xFFF and mh = m land 0xFFF_000 in
|
|
+ if mh <> 0 then ` {emit_string instr} sp, sp, #{emit_int mh}\n`;
|
|
+ if ml <> 0 then ` {emit_string instr} sp, sp, #{emit_int ml}\n`;
|
|
+ if n <> 0 then cfi_adjust_cfa_offset (-n)
|
|
+
|
|
+(* Deallocate the stack frame and reload the return address
|
|
+ before a return or tail call *)
|
|
+
|
|
+let output_epilogue f =
|
|
+ let n = frame_size() in
|
|
+ if !contains_calls then
|
|
+ ` ldr x30, [sp, #{emit_int (n-8)}]\n`;
|
|
+ if n > 0 then
|
|
+ emit_stack_adjustment n;
|
|
+ f();
|
|
+ (* reset CFA back because function body may continue *)
|
|
+ if n > 0 then cfi_adjust_cfa_offset n
|
|
+
|
|
+(* Name of current function *)
|
|
+let function_name = ref ""
|
|
+(* Entry point for tail recursive calls *)
|
|
+let tailrec_entry_point = ref 0
|
|
+(* Pending floating-point literals *)
|
|
+let float_literals = ref ([] : (int64 * label) list)
|
|
+
|
|
+(* Label a floating-point literal *)
|
|
+let float_literal f =
|
|
+ try
|
|
+ List.assoc f !float_literals
|
|
+ with Not_found ->
|
|
+ let lbl = new_label() in
|
|
+ float_literals := (f, lbl) :: !float_literals;
|
|
+ lbl
|
|
+
|
|
+(* Emit all pending literals *)
|
|
+let emit_literals() =
|
|
+ if !float_literals <> [] then begin
|
|
+ ` .align 3\n`;
|
|
+ List.iter
|
|
+ (fun (f, lbl) ->
|
|
+ `{emit_label lbl}: .quad `; emit_printf "0x%Lx\n" f)
|
|
+ !float_literals;
|
|
+ float_literals := []
|
|
+ end
|
|
+
|
|
+(* Emit code to load the address of a symbol *)
|
|
+
|
|
+let emit_load_symbol_addr dst s =
|
|
+ if (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit s then begin
|
|
+ ` adrp {emit_reg dst}, {emit_symbol s}\n`;
|
|
+ ` add {emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n`
|
|
+ end else begin
|
|
+ ` adrp {emit_reg dst}, :got:{emit_symbol s}\n`;
|
|
+ ` ldr {emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n`
|
|
+ end
|
|
+
|
|
+(* Output the assembly code for an instruction *)
|
|
+
|
|
+let emit_instr i =
|
|
+ emit_debug_info i.dbg;
|
|
+ match i.desc with
|
|
+ | Lend -> ()
|
|
+ | Lop(Imove | Ispill | Ireload) ->
|
|
+ let src = i.arg.(0) and dst = i.res.(0) in
|
|
+ if src.loc <> dst.loc then begin
|
|
+ match (src, dst) with
|
|
+ | {loc = Reg _; typ = Float}, {loc = Reg _} ->
|
|
+ ` fmov {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg _}, {loc = Reg _} ->
|
|
+ ` mov {emit_reg dst}, {emit_reg src}\n`
|
|
+ | {loc = Reg _}, {loc = Stack _} ->
|
|
+ ` str {emit_reg src}, {emit_stack dst}\n`
|
|
+ | {loc = Stack _}, {loc = Reg _} ->
|
|
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
|
|
+ | _ ->
|
|
+ assert false
|
|
+ end
|
|
+ | Lop(Iconst_int n) ->
|
|
+ emit_intconst i.res.(0) n
|
|
+ | Lop(Iconst_float f) ->
|
|
+ let b = Int64.bits_of_float(float_of_string f) in
|
|
+ if b = 0L then
|
|
+ ` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n`
|
|
+ else if is_immediate_float b then
|
|
+ ` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n`
|
|
+ else begin
|
|
+ let lbl = float_literal b in
|
|
+ ` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`;
|
|
+ ` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}] /* {emit_string f} */\n`
|
|
+ end
|
|
+ | Lop(Iconst_symbol s) ->
|
|
+ emit_load_symbol_addr i.res.(0) s
|
|
+ | Lop(Icall_ind) ->
|
|
+ ` blr {emit_reg i.arg.(0)}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`
|
|
+ | Lop(Icall_imm s) ->
|
|
+ ` bl {emit_symbol s}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`
|
|
+ | Lop(Itailcall_ind) ->
|
|
+ output_epilogue (fun () -> ` br {emit_reg i.arg.(0)}\n`)
|
|
+ | Lop(Itailcall_imm s) ->
|
|
+ if s = !function_name then
|
|
+ ` b {emit_label !tailrec_entry_point}\n`
|
|
+ else
|
|
+ output_epilogue (fun () -> ` b {emit_symbol s}\n`)
|
|
+ | Lop(Iextcall(s, false)) ->
|
|
+ ` bl {emit_symbol s}\n`
|
|
+ | Lop(Iextcall(s, true)) ->
|
|
+ emit_load_symbol_addr reg_x15 s;
|
|
+ ` bl {emit_symbol "caml_c_call"}\n`;
|
|
+ `{record_frame i.live i.dbg}\n`
|
|
+ | Lop(Istackoffset n) ->
|
|
+ assert (n mod 16 = 0);
|
|
+ emit_stack_adjustment (-n);
|
|
+ stack_offset := !stack_offset + n
|
|
+ | Lop(Iload(size, addr)) ->
|
|
+ let dst = i.res.(0) in
|
|
+ let base =
|
|
+ match addr with
|
|
+ | Iindexed ofs -> i.arg.(0)
|
|
+ | Ibased(s, ofs) ->
|
|
+ ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
|
|
+ reg_tmp1 in
|
|
+ begin match size with
|
|
+ | Byte_unsigned ->
|
|
+ ` ldrb {emit_wreg dst}, {emit_addressing addr base}\n`
|
|
+ | Byte_signed ->
|
|
+ ` ldrsb {emit_reg dst}, {emit_addressing addr base}\n`
|
|
+ | Sixteen_unsigned ->
|
|
+ ` ldrh {emit_wreg dst}, {emit_addressing addr base}\n`
|
|
+ | Sixteen_signed ->
|
|
+ ` ldrsh {emit_reg dst}, {emit_addressing addr base}\n`
|
|
+ | Thirtytwo_unsigned ->
|
|
+ ` ldr {emit_wreg dst}, {emit_addressing addr base}\n`
|
|
+ | Thirtytwo_signed ->
|
|
+ ` ldrsw {emit_reg dst}, {emit_addressing addr base}\n`
|
|
+ | Single ->
|
|
+ ` ldr s7, {emit_addressing addr base}\n`;
|
|
+ ` fcvt {emit_reg dst}, s7\n`
|
|
+ | Word | Double | Double_u ->
|
|
+ ` ldr {emit_reg dst}, {emit_addressing addr base}\n`
|
|
+ end
|
|
+ | Lop(Istore(size, addr)) ->
|
|
+ let src = i.arg.(0) in
|
|
+ let base =
|
|
+ match addr with
|
|
+ | Iindexed ofs -> i.arg.(1)
|
|
+ | Ibased(s, ofs) ->
|
|
+ ` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
|
|
+ reg_tmp1 in
|
|
+ begin match size with
|
|
+ | Byte_unsigned | Byte_signed ->
|
|
+ ` strb {emit_wreg src}, {emit_addressing addr base}\n`
|
|
+ | Sixteen_unsigned | Sixteen_signed ->
|
|
+ ` strh {emit_wreg src}, {emit_addressing addr base}\n`
|
|
+ | Thirtytwo_unsigned | Thirtytwo_signed ->
|
|
+ ` str {emit_wreg src}, {emit_addressing addr base}\n`
|
|
+ | Single ->
|
|
+ ` fcvt s7, {emit_reg src}\n`;
|
|
+ ` str s7, {emit_addressing addr base}\n`;
|
|
+ | Word | Double | Double_u ->
|
|
+ ` str {emit_reg src}, {emit_addressing addr base}\n`
|
|
+ end
|
|
+ | Lop(Ialloc n) ->
|
|
+ let lbl_frame = record_frame_label i.live i.dbg in
|
|
+ if !fastcode_flag then begin
|
|
+ let lbl_redo = new_label() in
|
|
+ let lbl_call_gc = new_label() in
|
|
+ `{emit_label lbl_redo}:`;
|
|
+ ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
|
|
+ ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
|
|
+ ` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`;
|
|
+ ` b.lo {emit_label lbl_call_gc}\n`;
|
|
+ call_gc_sites :=
|
|
+ { gc_lbl = lbl_call_gc;
|
|
+ gc_return_lbl = lbl_redo;
|
|
+ gc_frame_lbl = lbl_frame } :: !call_gc_sites
|
|
+ end else begin
|
|
+ begin match n with
|
|
+ | 16 -> ` bl {emit_symbol "caml_alloc1"}\n`
|
|
+ | 24 -> ` bl {emit_symbol "caml_alloc2"}\n`
|
|
+ | 32 -> ` bl {emit_symbol "caml_alloc3"}\n`
|
|
+ | _ -> emit_intconst reg_x15 (Nativeint.of_int n);
|
|
+ ` bl {emit_symbol "caml_allocN"}\n`
|
|
+ end;
|
|
+ `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`
|
|
+ end
|
|
+ | Lop(Iintop(Icomp cmp)) ->
|
|
+ ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
|
|
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
|
|
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
+ ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
|
|
+ | Lop(Iintop Icheckbound) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
+ ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` b.ls {emit_label lbl}\n`
|
|
+ | Lop(Iintop_imm(Icheckbound, n)) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
+ ` b.ls {emit_label lbl}\n`
|
|
+ | Lop(Ispecific(Ishiftcheckbound shift)) ->
|
|
+ let lbl = bound_error_label i.dbg in
|
|
+ ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
|
|
+ ` b.cs {emit_label lbl}\n`
|
|
+ | Lop(Iintop Imod) ->
|
|
+ ` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
|
|
+ let l = Misc.log2 n in
|
|
+ ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
|
|
+ ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
|
|
+ ` asr {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_int l}\n`
|
|
+ | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
|
|
+ let l = Misc.log2 n in
|
|
+ ` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
|
|
+ ` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
|
|
+ ` asr {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_int l}\n`;
|
|
+ ` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsl {emit_int l}\n`
|
|
+ | Lop(Iintop op) ->
|
|
+ let instr = name_for_int_operation op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
|
|
+ | Lop(Iintop_imm(op, n)) ->
|
|
+ let instr = name_for_int_operation op in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`
|
|
+ | Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) ->
|
|
+ let instr = (match op with
|
|
+ | Ifloatofint -> "scvtf"
|
|
+ | Iintoffloat -> "fcvtzs"
|
|
+ | Iabsf -> "fabs"
|
|
+ | Inegf -> "fneg"
|
|
+ | Ispecific Isqrtf -> "fsqrt"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
|
|
+ let instr = (match op with
|
|
+ | Iaddf -> "fadd"
|
|
+ | Isubf -> "fsub"
|
|
+ | Imulf -> "fmul"
|
|
+ | Idivf -> "fdiv"
|
|
+ | Ispecific Inegmulf -> "fnmul"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
|
|
+ | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
|
|
+ let instr = (match op with
|
|
+ | Imuladdf -> "fmadd"
|
|
+ | Inegmuladdf -> "fnmadd"
|
|
+ | Imulsubf -> "fmsub"
|
|
+ | Inegmulsubf -> "fnmsub"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
|
|
+ | Lop(Ispecific(Ishiftarith(op, shift))) ->
|
|
+ let instr = (match op with
|
|
+ Ishiftadd -> "add"
|
|
+ | Ishiftsub -> "sub") in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
|
|
+ if shift >= 0
|
|
+ then `, lsl #{emit_int shift}\n`
|
|
+ else `, asr #{emit_int (-shift)}\n`
|
|
+ | Lop(Ispecific(Imuladd | Imulsub as op)) ->
|
|
+ let instr = (match op with
|
|
+ Imuladd -> "madd"
|
|
+ | Imulsub -> "msub"
|
|
+ | _ -> assert false) in
|
|
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
|
|
+ | Lop(Ispecific(Ibswap size)) ->
|
|
+ begin match size with
|
|
+ | 16 ->
|
|
+ ` rev16 {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`;
|
|
+ ` ubfm {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #16\n`
|
|
+ | 32 ->
|
|
+ ` rev {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`
|
|
+ | 64 ->
|
|
+ ` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
|
|
+ | _ ->
|
|
+ assert false
|
|
+ end
|
|
+ | Lreloadretaddr ->
|
|
+ ()
|
|
+ | Lreturn ->
|
|
+ output_epilogue (fun () -> ` ret\n`)
|
|
+ | Llabel lbl ->
|
|
+ `{emit_label lbl}:\n`
|
|
+ | Lbranch lbl ->
|
|
+ ` b {emit_label lbl}\n`
|
|
+ | Lcondbranch(tst, lbl) ->
|
|
+ begin match tst with
|
|
+ | Itruetest ->
|
|
+ ` cbnz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
|
|
+ | Ifalsetest ->
|
|
+ ` cbz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
|
|
+ | Iinttest cmp ->
|
|
+ ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ let comp = name_for_comparison cmp in
|
|
+ ` b.{emit_string comp} {emit_label lbl}\n`
|
|
+ | Iinttest_imm(cmp, n) ->
|
|
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
|
|
+ let comp = name_for_comparison cmp in
|
|
+ ` b.{emit_string comp} {emit_label lbl}\n`
|
|
+ | Ifloattest(cmp, neg) ->
|
|
+ let comp = (match (cmp, neg) with
|
|
+ | (Ceq, false) | (Cne, true) -> "eq"
|
|
+ | (Cne, false) | (Ceq, true) -> "ne"
|
|
+ | (Clt, false) -> "cc"
|
|
+ | (Clt, true) -> "cs"
|
|
+ | (Cle, false) -> "ls"
|
|
+ | (Cle, true) -> "hi"
|
|
+ | (Cgt, false) -> "gt"
|
|
+ | (Cgt, true) -> "le"
|
|
+ | (Cge, false) -> "ge"
|
|
+ | (Cge, true) -> "lt") in
|
|
+ ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
|
|
+ ` b.{emit_string comp} {emit_label lbl}\n`
|
|
+ | Ioddtest ->
|
|
+ ` tbnz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
|
|
+ | Ieventest ->
|
|
+ ` tbz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
|
|
+ end
|
|
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
|
|
+ ` cmp {emit_reg i.arg.(0)}, #1\n`;
|
|
+ begin match lbl0 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` b.lt {emit_label lbl}\n`
|
|
+ end;
|
|
+ begin match lbl1 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` b.eq {emit_label lbl}\n`
|
|
+ end;
|
|
+ begin match lbl2 with
|
|
+ None -> ()
|
|
+ | Some lbl -> ` b.gt {emit_label lbl}\n`
|
|
+ end
|
|
+ | Lswitch jumptbl ->
|
|
+ let lbltbl = new_label() in
|
|
+ ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
|
|
+ ` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`;
|
|
+ ` br {emit_reg reg_tmp1}\n`;
|
|
+ `{emit_label lbltbl}:`;
|
|
+ for j = 0 to Array.length jumptbl - 1 do
|
|
+ ` b {emit_label jumptbl.(j)}\n`
|
|
+ done
|
|
+(* Alternative:
|
|
+ let lbltbl = new_label() in
|
|
+ ` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
|
|
+ ` ldr {emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`;
|
|
+ ` add {emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`;
|
|
+ ` br {emit_reg reg_tmp1}\n`;
|
|
+ `{emit_label lbltbl}:\n`;
|
|
+ for j = 0 to Array.length jumptbl - 1 do
|
|
+ ` .word {emit_label jumptbl.(j)} - {emit_label lbltbl}\n`
|
|
+ done
|
|
+*)
|
|
+ | Lsetuptrap lbl ->
|
|
+ let lblnext = new_label() in
|
|
+ ` adr {emit_reg reg_tmp1}, {emit_label lblnext}\n`;
|
|
+ ` b {emit_label lbl}\n`;
|
|
+ `{emit_label lblnext}:\n`
|
|
+ | Lpushtrap ->
|
|
+ stack_offset := !stack_offset + 16;
|
|
+ ` str {emit_reg reg_trap_ptr}, [sp, -16]!\n`;
|
|
+ ` str {emit_reg reg_tmp1}, [sp, #8]\n`;
|
|
+ cfi_adjust_cfa_offset 16;
|
|
+ ` mov {emit_reg reg_trap_ptr}, sp\n`
|
|
+ | Lpoptrap ->
|
|
+ ` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
|
|
+ cfi_adjust_cfa_offset (-16);
|
|
+ stack_offset := !stack_offset - 16
|
|
+ | Lraise ->
|
|
+ if !Clflags.debug then begin
|
|
+ ` bl {emit_symbol "caml_raise_exn"}\n`;
|
|
+ `{record_frame Reg.Set.empty i.dbg}\n`
|
|
+ end else begin
|
|
+ ` mov sp, {emit_reg reg_trap_ptr}\n`;
|
|
+ ` ldr {emit_reg reg_tmp1}, [sp, #8]\n`;
|
|
+ ` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
|
|
+ ` br {emit_reg reg_tmp1}\n`
|
|
+ end
|
|
+
|
|
+(* Emission of an instruction sequence *)
|
|
+
|
|
+let rec emit_all i =
|
|
+ if i.desc = Lend then () else (emit_instr i; emit_all i.next)
|
|
+
|
|
+(* Emission of the profiling prelude *)
|
|
+
|
|
+let emit_profile() = () (* TODO *)
|
|
+(*
|
|
+ match Config.system with
|
|
+ "linux_eabi" | "linux_eabihf" ->
|
|
+ ` push \{lr}\n`;
|
|
+ ` {emit_call "__gnu_mcount_nc"}\n`
|
|
+ | _ -> ()
|
|
+*)
|
|
+
|
|
+(* Emission of a function declaration *)
|
|
+
|
|
+let fundecl fundecl =
|
|
+ function_name := fundecl.fun_name;
|
|
+ fastcode_flag := fundecl.fun_fast;
|
|
+ tailrec_entry_point := new_label();
|
|
+ float_literals := [];
|
|
+ stack_offset := 0;
|
|
+ call_gc_sites := [];
|
|
+ bound_error_sites := [];
|
|
+ ` .text\n`;
|
|
+ ` .align 2\n`;
|
|
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
|
|
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
|
|
+ `{emit_symbol fundecl.fun_name}:\n`;
|
|
+ emit_debug_info fundecl.fun_dbg;
|
|
+ cfi_startproc();
|
|
+ if !Clflags.gprofile then emit_profile();
|
|
+ let n = frame_size() in
|
|
+ if n > 0 then
|
|
+ emit_stack_adjustment (-n);
|
|
+ if !contains_calls then
|
|
+ ` str x30, [sp, #{emit_int (n-8)}]\n`;
|
|
+ `{emit_label !tailrec_entry_point}:\n`;
|
|
+ emit_all fundecl.fun_body;
|
|
+ List.iter emit_call_gc !call_gc_sites;
|
|
+ List.iter emit_call_bound_error !bound_error_sites;
|
|
+ cfi_endproc();
|
|
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
|
|
+ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
|
|
+ emit_literals()
|
|
+
|
|
+(* Emission of data *)
|
|
+
|
|
+let emit_item = function
|
|
+ | Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
|
|
+ | Cdefine_symbol s -> `{emit_symbol s}:\n`
|
|
+ | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
|
|
+ | Cint8 n -> ` .byte {emit_int n}\n`
|
|
+ | Cint16 n -> ` .short {emit_int n}\n`
|
|
+ | Cint32 n -> ` .long {emit_nativeint n}\n`
|
|
+ | Cint n -> ` .quad {emit_nativeint n}\n`
|
|
+ | Csingle f -> emit_float32_directive ".long" f
|
|
+ | Cdouble f -> emit_float64_directive ".quad" f
|
|
+ | Csymbol_address s -> ` .quad {emit_symbol s}\n`
|
|
+ | Clabel_address lbl -> ` .quad {emit_data_label lbl}\n`
|
|
+ | Cstring s -> emit_string_directive " .ascii " s
|
|
+ | Cskip n -> if n > 0 then ` .space {emit_int n}\n`
|
|
+ | Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
|
|
+
|
|
+let data l =
|
|
+ ` .data\n`;
|
|
+ List.iter emit_item l
|
|
+
|
|
+(* Beginning / end of an assembly file *)
|
|
+
|
|
+let begin_assembly() =
|
|
+ reset_debug_info();
|
|
+ let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
|
|
+ ` .data\n`;
|
|
+ ` .globl {emit_symbol lbl_begin}\n`;
|
|
+ `{emit_symbol lbl_begin}:\n`;
|
|
+ let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
|
|
+ ` .text\n`;
|
|
+ ` .globl {emit_symbol lbl_begin}\n`;
|
|
+ `{emit_symbol lbl_begin}:\n`
|
|
+
|
|
+let end_assembly () =
|
|
+ let lbl_end = Compilenv.make_symbol (Some "code_end") in
|
|
+ ` .text\n`;
|
|
+ ` .globl {emit_symbol lbl_end}\n`;
|
|
+ `{emit_symbol lbl_end}:\n`;
|
|
+ let lbl_end = Compilenv.make_symbol (Some "data_end") in
|
|
+ ` .data\n`;
|
|
+ ` .globl {emit_symbol lbl_end}\n`;
|
|
+ `{emit_symbol lbl_end}:\n`;
|
|
+ ` .long 0\n`;
|
|
+ let lbl = Compilenv.make_symbol (Some "frametable") in
|
|
+ ` .globl {emit_symbol lbl}\n`;
|
|
+ `{emit_symbol lbl}:\n`;
|
|
+ emit_frames
|
|
+ { efa_label = (fun lbl ->
|
|
+ ` .type {emit_label lbl}, %function\n`;
|
|
+ ` .quad {emit_label lbl}\n`);
|
|
+ efa_16 = (fun n -> ` .short {emit_int n}\n`);
|
|
+ efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
|
|
+ efa_word = (fun n -> ` .quad {emit_int n}\n`);
|
|
+ efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
|
|
+ efa_label_rel = (fun lbl ofs ->
|
|
+ ` .long {emit_label lbl} - . + {emit_int32 ofs}\n`);
|
|
+ efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
|
|
+ efa_string = (fun s -> emit_string_directive " .asciz " s) };
|
|
+ ` .type {emit_symbol lbl}, %object\n`;
|
|
+ ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
|
|
+ begin match Config.system with
|
|
+ | "linux" ->
|
|
+ (* Mark stack as non-executable *)
|
|
+ ` .section .note.GNU-stack,\"\",%progbits\n`
|
|
+ | _ -> ()
|
|
+ end
|
|
diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml
|
|
new file mode 100644
|
|
index 0000000..b52c2fd
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/proc.ml
|
|
@@ -0,0 +1,212 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* Description of the ARM processor in 64-bit mode *)
|
|
+
|
|
+open Misc
|
|
+open Cmm
|
|
+open Reg
|
|
+open Arch
|
|
+open Mach
|
|
+
|
|
+(* Instruction selection *)
|
|
+
|
|
+let word_addressed = false
|
|
+
|
|
+(* Registers available for register allocation *)
|
|
+
|
|
+(* Integer register map:
|
|
+ x0 - x15 general purpose (caller-save)
|
|
+ x16, x17 temporaries (used by call veeners)
|
|
+ x18 platform register (reserved)
|
|
+ x19 - x25 general purpose (callee-save)
|
|
+ x26 trap pointer
|
|
+ x27 alloc pointer
|
|
+ x28 alloc limit
|
|
+ x29 frame pointer
|
|
+ x30 return address
|
|
+ sp / xzr stack pointer / zero register
|
|
+ Floating-point register map:
|
|
+ d0 - d7 general purpose (caller-save)
|
|
+ d8 - d15 general purpose (callee-save)
|
|
+ d16 - d31 generat purpose (caller-save)
|
|
+*)
|
|
+
|
|
+let int_reg_name =
|
|
+ [| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7";
|
|
+ "x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15";
|
|
+ "x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25";
|
|
+ "x26"; "x27"; "x28"; "x16"; "x17" |]
|
|
+
|
|
+let float_reg_name =
|
|
+ [| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
|
|
+ "d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
|
|
+ "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
|
|
+ "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
|
|
+
|
|
+let num_register_classes = 2
|
|
+
|
|
+let register_class r =
|
|
+ match r.typ with
|
|
+ | (Int | Addr) -> 0
|
|
+ | Float -> 1
|
|
+
|
|
+let num_available_registers =
|
|
+ [| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
|
|
+
|
|
+let first_available_register =
|
|
+ [| 0; 100 |]
|
|
+
|
|
+let register_name r =
|
|
+ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
|
|
+
|
|
+let rotate_registers = true
|
|
+
|
|
+(* Representation of hard registers by pseudo-registers *)
|
|
+
|
|
+let hard_int_reg =
|
|
+ let v = Array.create 28 Reg.dummy in
|
|
+ for i = 0 to 27 do
|
|
+ v.(i) <- Reg.at_location Int (Reg i)
|
|
+ done;
|
|
+ v
|
|
+
|
|
+let hard_float_reg =
|
|
+ let v = Array.create 32 Reg.dummy in
|
|
+ for i = 0 to 31 do
|
|
+ v.(i) <- Reg.at_location Float (Reg(100 + i))
|
|
+ done;
|
|
+ v
|
|
+
|
|
+let all_phys_regs =
|
|
+ Array.append hard_int_reg hard_float_reg
|
|
+
|
|
+let phys_reg n =
|
|
+ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
|
|
+
|
|
+let reg_x15 = phys_reg 15
|
|
+let reg_d7 = phys_reg 107
|
|
+
|
|
+let stack_slot slot ty =
|
|
+ Reg.at_location ty (Stack slot)
|
|
+
|
|
+(* Calling conventions *)
|
|
+
|
|
+let calling_conventions
|
|
+ first_int last_int first_float last_float make_stack arg =
|
|
+ let loc = Array.create (Array.length arg) Reg.dummy in
|
|
+ let int = ref first_int in
|
|
+ let float = ref first_float in
|
|
+ let ofs = ref 0 in
|
|
+ for i = 0 to Array.length arg - 1 do
|
|
+ match arg.(i).typ with
|
|
+ Int | Addr as ty ->
|
|
+ if !int <= last_int then begin
|
|
+ loc.(i) <- phys_reg !int;
|
|
+ incr int
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (make_stack !ofs) ty;
|
|
+ ofs := !ofs + size_int
|
|
+ end
|
|
+ | Float ->
|
|
+ if !float <= last_float then begin
|
|
+ loc.(i) <- phys_reg !float;
|
|
+ incr float
|
|
+ end else begin
|
|
+ loc.(i) <- stack_slot (make_stack !ofs) Float;
|
|
+ ofs := !ofs + size_float
|
|
+ end
|
|
+ done;
|
|
+ (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
|
|
+
|
|
+let incoming ofs = Incoming ofs
|
|
+let outgoing ofs = Outgoing ofs
|
|
+let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
|
|
+
|
|
+(* OCaml calling convention:
|
|
+ first integer args in r0...r15
|
|
+ first float args in d0...d15
|
|
+ remaining args on stack.
|
|
+ Return values in r0...r15 or d0...d15. *)
|
|
+
|
|
+let loc_arguments arg =
|
|
+ calling_conventions 0 15 100 115 outgoing arg
|
|
+let loc_parameters arg =
|
|
+ let (loc, _) = calling_conventions 0 15 100 115 incoming arg in loc
|
|
+let loc_results res =
|
|
+ let (loc, _) = calling_conventions 0 15 100 115 not_supported res in loc
|
|
+
|
|
+(* C calling convention:
|
|
+ first integer args in r0...r7
|
|
+ first float args in d0...d7
|
|
+ remaining args on stack.
|
|
+ Return values in r0...r1 or d0. *)
|
|
+
|
|
+let loc_external_arguments arg =
|
|
+ calling_conventions 0 7 100 107 outgoing arg
|
|
+let loc_external_results res =
|
|
+ let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
|
|
+
|
|
+let loc_exn_bucket = phys_reg 0
|
|
+
|
|
+(* Registers destroyed by operations *)
|
|
+
|
|
+let destroyed_at_c_call =
|
|
+ (* x19-x28, d8-d15 preserved *)
|
|
+ Array.of_list (List.map phys_reg
|
|
+ [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;
|
|
+ 100;101;102;103;104;105;106;107;
|
|
+ 116;117;118;119;120;121;122;123;
|
|
+ 124;125;126;127;128;129;130;131])
|
|
+
|
|
+let destroyed_at_oper = function
|
|
+ | Iop(Icall_ind | Icall_imm _) | Iop(Iextcall(_, true)) ->
|
|
+ all_phys_regs
|
|
+ | Iop(Iextcall(_, false)) ->
|
|
+ destroyed_at_c_call
|
|
+ | Iop(Ialloc _) ->
|
|
+ [| reg_x15 |]
|
|
+ | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
|
|
+ [| reg_d7 |] (* d7 / s7 destroyed *)
|
|
+ | _ -> [||]
|
|
+
|
|
+let destroyed_at_raise = all_phys_regs
|
|
+
|
|
+(* Maximal register pressure *)
|
|
+
|
|
+let safe_register_pressure = function
|
|
+ | Iextcall(_, _) -> 8
|
|
+ | Ialloc _ -> 25
|
|
+ | _ -> 26
|
|
+
|
|
+let max_register_pressure = function
|
|
+ | Iextcall(_, _) -> [| 10; 8 |]
|
|
+ | Ialloc _ -> [| 25; 32 |]
|
|
+ | Iintoffloat | Ifloatofint
|
|
+ | Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
|
|
+ | _ -> [| 26; 32 |]
|
|
+
|
|
+(* Layout of the stack *)
|
|
+
|
|
+let num_stack_slots = [| 0; 0 |]
|
|
+let contains_calls = ref false
|
|
+
|
|
+(* Calling the assembler *)
|
|
+
|
|
+let assemble_file infile outfile =
|
|
+ Ccomp.command (Config.asm ^ " -o " ^
|
|
+ Filename.quote outfile ^ " " ^ Filename.quote infile)
|
|
+
|
|
+
|
|
+let init () = ()
|
|
diff --git a/asmcomp/arm64/reload.ml b/asmcomp/arm64/reload.ml
|
|
new file mode 100644
|
|
index 0000000..ff9214e
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/reload.ml
|
|
@@ -0,0 +1,16 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* Reloading for the ARM 64 bits *)
|
|
+
|
|
+let fundecl f =
|
|
+ (new Reloadgen.reload_generic)#fundecl f
|
|
diff --git a/asmcomp/arm64/scheduling.ml b/asmcomp/arm64/scheduling.ml
|
|
new file mode 100644
|
|
index 0000000..cc244be
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/scheduling.ml
|
|
@@ -0,0 +1,18 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique et *)
|
|
+(* en Automatique. All rights reserved. This file is distributed *)
|
|
+(* under the terms of the Q Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+let _ = let module M = Schedgen in () (* to create a dependency *)
|
|
+
|
|
+(* Scheduling is turned off because the processor schedules dynamically
|
|
+ much better than what we could do. *)
|
|
+
|
|
+let fundecl f = f
|
|
diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml
|
|
new file mode 100644
|
|
index 0000000..c74b282
|
|
--- /dev/null
|
|
+++ b/asmcomp/arm64/selection.ml
|
|
@@ -0,0 +1,265 @@
|
|
+(***********************************************************************)
|
|
+(* *)
|
|
+(* OCaml *)
|
|
+(* *)
|
|
+(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
+(* Benedikt Meurer, University of Siegen *)
|
|
+(* *)
|
|
+(* Copyright 2013 Institut National de Recherche en Informatique *)
|
|
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
|
|
+(* reserved. This file is distributed under the terms of the Q *)
|
|
+(* Public License version 1.0. *)
|
|
+(* *)
|
|
+(***********************************************************************)
|
|
+
|
|
+(* Instruction selection for the ARM processor *)
|
|
+
|
|
+open Arch
|
|
+open Cmm
|
|
+open Mach
|
|
+
|
|
+let is_offset chunk n =
|
|
+ (n >= -256 && n <= 255) (* 9 bits signed unscaled *)
|
|
+|| (n >= 0 &&
|
|
+ match chunk with (* 12 bits unsigned, scaled by chunk size *)
|
|
+ | Byte_unsigned | Byte_signed ->
|
|
+ n < 0x1000
|
|
+ | Sixteen_unsigned | Sixteen_signed ->
|
|
+ n land 1 = 0 && n lsr 1 < 0x1000
|
|
+ | Thirtytwo_unsigned | Thirtytwo_signed | Single ->
|
|
+ n land 3 = 0 && n lsr 2 < 0x1000
|
|
+ | Word | Double | Double_u ->
|
|
+ n land 7 = 0 && n lsr 3 < 0x1000)
|
|
+
|
|
+(* An automaton to recognize ( 0+1+0* | 1+0+1* )
|
|
+
|
|
+ 0 1 0
|
|
+ / \ / \ / \
|
|
+ \ / \ / \ /
|
|
+ -0--> [1] --1--> [2] --0--> [3]
|
|
+ /
|
|
+ [0]
|
|
+ \
|
|
+ -1--> [4] --0--> [5] --1--> [6]
|
|
+ / \ / \ / \
|
|
+ \ / \ / \ /
|
|
+ 1 0 1
|
|
+
|
|
+The accepting states are 2, 3, 5 and 6. *)
|
|
+
|
|
+let auto_table = [| (* accepting?, next on 0, next on 1 *)
|
|
+ (* state 0 *) (false, 1, 4);
|
|
+ (* state 1 *) (false, 1, 2);
|
|
+ (* state 2 *) (true, 3, 2);
|
|
+ (* state 3 *) (true, 3, 7);
|
|
+ (* state 4 *) (false, 5, 4);
|
|
+ (* state 5 *) (true, 5, 6);
|
|
+ (* state 6 *) (true, 7, 6);
|
|
+ (* state 7 *) (false, 7, 7) (* error state *)
|
|
+|]
|
|
+
|
|
+let rec run_automata nbits state input =
|
|
+ let (acc, next0, next1) = auto_table.(state) in
|
|
+ if nbits <= 0
|
|
+ then acc
|
|
+ else run_automata (nbits - 1)
|
|
+ (if input land 1 = 0 then next0 else next1)
|
|
+ (input asr 1)
|
|
+
|
|
+(* We are very conservative wrt what ARM64 supports: we don't support
|
|
+ repetitions of a 000111000 or 1110000111 pattern, just a single
|
|
+ pattern of this kind. *)
|
|
+
|
|
+let is_logical_immediate n =
|
|
+ n <> 0 && n <> -1 && run_automata 64 0 n
|
|
+
|
|
+let is_intconst = function
|
|
+ Cconst_int _ -> true
|
|
+ | _ -> false
|
|
+
|
|
+let inline_ops =
|
|
+ [ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap";
|
|
+ "caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ]
|
|
+
|
|
+let use_direct_addressing symb =
|
|
+ (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit symb
|
|
+
|
|
+(* Instruction selection *)
|
|
+
|
|
+class selector = object(self)
|
|
+
|
|
+inherit Selectgen.selector_generic as super
|
|
+
|
|
+method is_immediate n =
|
|
+ let mn = -n in
|
|
+ n land 0xFFF = n || n land 0xFFF_000 = n
|
|
+ || mn land 0xFFF = mn || mn land 0xFFF_000 = mn
|
|
+
|
|
+method! is_simple_expr = function
|
|
+ (* inlined floating-point ops are simple if their arguments are *)
|
|
+ | Cop(Cextcall(fn, _, _, _), args) when List.mem fn inline_ops ->
|
|
+ List.for_all self#is_simple_expr args
|
|
+ | e -> super#is_simple_expr e
|
|
+
|
|
+method select_addressing chunk = function
|
|
+ | Cop(Cadda, [Cconst_symbol s; Cconst_int n])
|
|
+ when use_direct_addressing s ->
|
|
+ (Ibased(s, n), Ctuple [])
|
|
+ | Cop(Cadda, [arg; Cconst_int n])
|
|
+ when is_offset chunk n ->
|
|
+ (Iindexed n, arg)
|
|
+ | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
|
|
+ when is_offset chunk n ->
|
|
+ (Iindexed n, Cop(Cadda, [arg1; arg2]))
|
|
+ | Cconst_symbol s
|
|
+ when use_direct_addressing s ->
|
|
+ (Ibased(s, 0), Ctuple [])
|
|
+ | arg ->
|
|
+ (Iindexed 0, arg)
|
|
+
|
|
+method! select_operation op args =
|
|
+ match op with
|
|
+ (* Integer addition *)
|
|
+ | Caddi | Cadda ->
|
|
+ begin match args with
|
|
+ (* Add immediate *)
|
|
+ | [arg; Cconst_int n] | [Cconst_int n; arg] when self#is_immediate n ->
|
|
+ ((if n >= 0 then Iintop_imm(Iadd, n) else Iintop_imm(Isub, -n)),
|
|
+ [arg])
|
|
+ (* Shift-add *)
|
|
+ | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftadd, n)), [arg1; arg2])
|
|
+ | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg1; arg2])
|
|
+ | [Cop(Clsl, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftadd, n)), [arg2; arg1])
|
|
+ | [Cop(Casr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftadd, -n)), [arg2; arg1])
|
|
+ (* Multiply-add *)
|
|
+ | [arg1; Cop(Cmuli, args2)] | [Cop(Cmuli, args2); arg1] ->
|
|
+ begin match self#select_operation Cmuli args2 with
|
|
+ | (Iintop_imm(Ilsl, l), [arg3]) ->
|
|
+ (Ispecific(Ishiftarith(Ishiftadd, l)), [arg1; arg3])
|
|
+ | (Iintop Imul, [arg3; arg4]) ->
|
|
+ (Ispecific Imuladd, [arg3; arg4; arg1])
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ (* Integer subtraction *)
|
|
+ | Csubi | Csuba ->
|
|
+ begin match args with
|
|
+ (* Sub immediate *)
|
|
+ | [arg; Cconst_int n] when self#is_immediate n ->
|
|
+ ((if n >= 0 then Iintop_imm(Isub, n) else Iintop_imm(Iadd, -n)),
|
|
+ [arg])
|
|
+ (* Shift-sub *)
|
|
+ | [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftsub, n)), [arg1; arg2])
|
|
+ | [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftarith(Ishiftsub, -n)), [arg1; arg2])
|
|
+ (* Multiply-sub *)
|
|
+ | [arg1; Cop(Cmuli, args2)] ->
|
|
+ begin match self#select_operation Cmuli args2 with
|
|
+ | (Iintop_imm(Ilsl, l), [arg3]) ->
|
|
+ (Ispecific(Ishiftarith(Ishiftsub, l)), [arg1; arg3])
|
|
+ | (Iintop Imul, [arg3; arg4]) ->
|
|
+ (Ispecific Imulsub, [arg3; arg4; arg1])
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ (* Checkbounds *)
|
|
+ | Ccheckbound _ ->
|
|
+ begin match args with
|
|
+ | [Cop(Clsr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
|
|
+ (Ispecific(Ishiftcheckbound n), [arg1; arg2])
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ (* Integer multiplication *)
|
|
+ (* ARM does not support immediate operands for multiplication *)
|
|
+ | Cmuli ->
|
|
+ begin match args with
|
|
+ | [arg; Cconst_int n] | [Cconst_int n; arg] ->
|
|
+ let l = Misc.log2 n in
|
|
+ if n = 1 lsl l
|
|
+ then (Iintop_imm(Ilsl, l), [arg])
|
|
+ else (Iintop Imul, args)
|
|
+ | _ ->
|
|
+ (Iintop Imul, args)
|
|
+ end
|
|
+ (* Division and modulus *)
|
|
+ (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
|
|
+ | Cdivi ->
|
|
+ begin match args with
|
|
+ | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
|
|
+ ((if n = 1 then Imove else Iintop_imm(Idiv, n)), [arg])
|
|
+ | _ ->
|
|
+ (Iintop Idiv, args)
|
|
+ end
|
|
+ | Cmodi ->
|
|
+ begin match args with
|
|
+ | [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
|
|
+ ((if n = 1 then Iconst_int 0n else Iintop_imm(Imod, n)), [arg])
|
|
+ | _ ->
|
|
+ (Iintop Imod, args)
|
|
+ end
|
|
+ (* Bitwise logical operations have a different range of immediate
|
|
+ operands than the other instructions *)
|
|
+ | Cand -> self#select_logical Iand args
|
|
+ | Cor -> self#select_logical Ior args
|
|
+ | Cxor -> self#select_logical Ixor args
|
|
+ (* Recognize floating-point negate and multiply *)
|
|
+ | Cnegf ->
|
|
+ begin match args with
|
|
+ | [Cop(Cmulf, args)] -> (Ispecific Inegmulf, args)
|
|
+ | _ -> super#select_operation op args
|
|
+ end
|
|
+ (* Recognize floating-point multiply and add/sub *)
|
|
+ | Caddf ->
|
|
+ begin match args with
|
|
+ | [arg; Cop(Cmulf, args)] | [Cop(Cmulf, args); arg] ->
|
|
+ (Ispecific Imuladdf, arg :: args)
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ | Csubf ->
|
|
+ begin match args with
|
|
+ | [arg; Cop(Cmulf, args)] ->
|
|
+ (Ispecific Imulsubf, arg :: args)
|
|
+ | [Cop(Cmulf, args); arg] ->
|
|
+ (Ispecific Inegmulsubf, arg :: args)
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+ end
|
|
+ (* Recognize floating-point square root *)
|
|
+ | Cextcall("sqrt", _, _, _) ->
|
|
+ (Ispecific Isqrtf, args)
|
|
+ (* Recognize bswap instructions *)
|
|
+ | Cextcall("caml_bswap16_direct", _, _, _) ->
|
|
+ (Ispecific(Ibswap 16), args)
|
|
+ | Cextcall("caml_int32_direct_bswap", _, _, _) ->
|
|
+ (Ispecific(Ibswap 32), args)
|
|
+ | Cextcall(("caml_int64_direct_bswap"|"caml_nativeint_direct_bswap"),
|
|
+ _, _, _) ->
|
|
+ (Ispecific (Ibswap 64), args)
|
|
+ (* Other operations are regular *)
|
|
+ | _ ->
|
|
+ super#select_operation op args
|
|
+
|
|
+method select_logical op = function
|
|
+ | [arg; Cconst_int n] when is_logical_immediate n ->
|
|
+ (Iintop_imm(op, n), [arg])
|
|
+ | [Cconst_int n; arg] when is_logical_immediate n ->
|
|
+ (Iintop_imm(op, n), [arg])
|
|
+ | args ->
|
|
+ (Iintop op, args)
|
|
+
|
|
+end
|
|
+
|
|
+let fundecl f = (new selector)#emit_fundecl f
|
|
diff --git a/asmcomp/compilenv.ml b/asmcomp/compilenv.ml
|
|
index 17870c9..280b131 100644
|
|
--- a/asmcomp/compilenv.ml
|
|
+++ b/asmcomp/compilenv.ml
|
|
@@ -83,6 +83,15 @@ let make_symbol ?(unitname = current_unit.ui_symbol) idopt =
|
|
| None -> prefix
|
|
| Some id -> prefix ^ "__" ^ id
|
|
|
|
+let symbol_in_current_unit name =
|
|
+ let prefix = "caml" ^ current_unit.ui_symbol in
|
|
+ name = prefix ||
|
|
+ (let lp = String.length prefix in
|
|
+ String.length name >= 2 + lp
|
|
+ && String.sub name 0 lp = prefix
|
|
+ && name.[lp] = '_'
|
|
+ && name.[lp + 1] = '_')
|
|
+
|
|
let read_unit_info filename =
|
|
let ic = open_in_bin filename in
|
|
try
|
|
diff --git a/asmcomp/compilenv.mli b/asmcomp/compilenv.mli
|
|
index 51cb8c6..9ffb145 100644
|
|
--- a/asmcomp/compilenv.mli
|
|
+++ b/asmcomp/compilenv.mli
|
|
@@ -31,6 +31,10 @@ val make_symbol: ?unitname:string -> string option -> string
|
|
corresponds to symbol [id] in the compilation unit [u]
|
|
(or the current unit). *)
|
|
|
|
+val symbol_in_current_unit: string -> bool
|
|
+ (* Return true if the given asm symbol belongs to the
|
|
+ current compilation unit, false otherwise. *)
|
|
+
|
|
val symbol_for_global: Ident.t -> string
|
|
(* Return the asm symbol that refers to the given global identifier *)
|
|
|
|
diff --git a/asmrun/arm64.S b/asmrun/arm64.S
|
|
new file mode 100644
|
|
index 0000000..de670e6
|
|
--- /dev/null
|
|
+++ b/asmrun/arm64.S
|
|
@@ -0,0 +1,535 @@
|
|
+/***********************************************************************/
|
|
+/* */
|
|
+/* OCaml */
|
|
+/* */
|
|
+/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
|
|
+/* */
|
|
+/* Copyright 2013 Institut National de Recherche en Informatique et */
|
|
+/* en Automatique. All rights reserved. This file is distributed */
|
|
+/* under the terms of the GNU Library General Public License, with */
|
|
+/* the special exception on linking described in file ../LICENSE. */
|
|
+/* */
|
|
+/***********************************************************************/
|
|
+
|
|
+/* Asm part of the runtime system, ARM processor, 64-bit mode */
|
|
+/* Must be preprocessed by cpp */
|
|
+
|
|
+/* Special registers */
|
|
+
|
|
+#define TRAP_PTR x26
|
|
+#define ALLOC_PTR x27
|
|
+#define ALLOC_LIMIT x28
|
|
+#define ARG x15
|
|
+#define TMP x16
|
|
+#define TMP2 x17
|
|
+
|
|
+/* Support for CFI directives */
|
|
+
|
|
+#if defined(ASM_CFI_SUPPORTED)
|
|
+#define CFI_STARTPROC .cfi_startproc
|
|
+#define CFI_ENDPROC .cfi_endproc
|
|
+#define CFI_ADJUST(n) .cfi_adjust_cfa_offset n
|
|
+#else
|
|
+#define CFI_STARTPROC
|
|
+#define CFI_ENDPROC
|
|
+#define CFI_ADJUST(n)
|
|
+#endif
|
|
+
|
|
+/* Support for profiling with gprof */
|
|
+
|
|
+#define PROFILE
|
|
+
|
|
+/* Macros to load and store global variables. Destroy TMP2 */
|
|
+
|
|
+#if defined(__PIC__)
|
|
+
|
|
+#define ADDRGLOBAL(reg,symb) \
|
|
+ adrp TMP2, :got:symb; \
|
|
+ ldr reg, [TMP2, #:got_lo12:symb]
|
|
+
|
|
+#define LOADGLOBAL(reg,symb) \
|
|
+ ADDRGLOBAL(TMP2,symb); \
|
|
+ ldr reg, [TMP2]
|
|
+
|
|
+#define STOREGLOBAL(reg,symb) \
|
|
+ ADDRGLOBAL(TMP2,symb); \
|
|
+ str reg, [TMP2]
|
|
+
|
|
+#else
|
|
+
|
|
+#define ADDRGLOBAL(reg,symb) \
|
|
+ adrp reg, symb; \
|
|
+ add reg, reg, #:lo12:symb
|
|
+
|
|
+#define LOADGLOBAL(reg,symb) \
|
|
+ adrp TMP2, symb; \
|
|
+ ldr reg, [TMP2, #:lo12:symb]
|
|
+
|
|
+#define STOREGLOBAL(reg,symb) \
|
|
+ adrp TMP2, symb; \
|
|
+ str reg, [TMP2, #:lo12:symb]
|
|
+
|
|
+#endif
|
|
+
|
|
+/* Allocation functions and GC interface */
|
|
+
|
|
+ .globl caml_system__code_begin
|
|
+caml_system__code_begin:
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_call_gc
|
|
+caml_call_gc:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Record return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+.Lcaml_call_gc:
|
|
+ /* Record lowest stack address */
|
|
+ mov TMP, sp
|
|
+ STOREGLOBAL(TMP, caml_bottom_of_stack)
|
|
+ /* Set up stack space, saving return address and frame pointer */
|
|
+ /* (2 regs RA/GP, 24 allocatable int regs, 24 caller-save float regs) * 8 */
|
|
+ stp x29, x30, [sp, -400]!
|
|
+ CFI_ADJUST(400)
|
|
+ add x29, sp, #0
|
|
+ /* Save allocatable integer registers on the stack, in the order
|
|
+ given in proc.ml */
|
|
+ stp x0, x1, [sp, 16]
|
|
+ stp x2, x3, [sp, 32]
|
|
+ stp x4, x5, [sp, 48]
|
|
+ stp x6, x7, [sp, 64]
|
|
+ stp x8, x9, [sp, 80]
|
|
+ stp x10, x11, [sp, 96]
|
|
+ stp x12, x13, [sp, 112]
|
|
+ stp x14, x15, [sp, 128]
|
|
+ stp x19, x20, [sp, 144]
|
|
+ stp x21, x22, [sp, 160]
|
|
+ stp x23, x24, [sp, 176]
|
|
+ str x25, [sp, 192]
|
|
+ /* Save caller-save floating-point registers on the stack
|
|
+ (callee-saves are preserved by caml_garbage_collection) */
|
|
+ stp d0, d1, [sp, 208]
|
|
+ stp d2, d3, [sp, 224]
|
|
+ stp d4, d5, [sp, 240]
|
|
+ stp d6, d7, [sp, 256]
|
|
+ stp d16, d17, [sp, 272]
|
|
+ stp d18, d19, [sp, 288]
|
|
+ stp d20, d21, [sp, 304]
|
|
+ stp d22, d23, [sp, 320]
|
|
+ stp d24, d25, [sp, 336]
|
|
+ stp d26, d27, [sp, 352]
|
|
+ stp d28, d29, [sp, 368]
|
|
+ stp d30, d31, [sp, 384]
|
|
+ /* Store pointer to saved integer registers in caml_gc_regs */
|
|
+ add TMP, sp, #16
|
|
+ STOREGLOBAL(TMP, caml_gc_regs)
|
|
+ /* Save current allocation pointer for debugging purposes */
|
|
+ STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ /* Save trap pointer in case an exception is raised during GC */
|
|
+ STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
|
|
+ /* Call the garbage collector */
|
|
+ bl caml_garbage_collection
|
|
+ /* Restore registers */
|
|
+ ldp x0, x1, [sp, 16]
|
|
+ ldp x2, x3, [sp, 32]
|
|
+ ldp x4, x5, [sp, 48]
|
|
+ ldp x6, x7, [sp, 64]
|
|
+ ldp x8, x9, [sp, 80]
|
|
+ ldp x10, x11, [sp, 96]
|
|
+ ldp x12, x13, [sp, 112]
|
|
+ ldp x14, x15, [sp, 128]
|
|
+ ldp x19, x20, [sp, 144]
|
|
+ ldp x21, x22, [sp, 160]
|
|
+ ldp x23, x24, [sp, 176]
|
|
+ ldr x25, [sp, 192]
|
|
+ ldp d0, d1, [sp, 208]
|
|
+ ldp d2, d3, [sp, 224]
|
|
+ ldp d4, d5, [sp, 240]
|
|
+ ldp d6, d7, [sp, 256]
|
|
+ ldp d16, d17, [sp, 272]
|
|
+ ldp d18, d19, [sp, 288]
|
|
+ ldp d20, d21, [sp, 304]
|
|
+ ldp d22, d23, [sp, 320]
|
|
+ ldp d24, d25, [sp, 336]
|
|
+ ldp d26, d27, [sp, 352]
|
|
+ ldp d28, d29, [sp, 368]
|
|
+ ldp d30, d31, [sp, 384]
|
|
+ /* Reload new allocation pointer and allocation limit */
|
|
+ LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
|
|
+ /* Free stack space and return to caller */
|
|
+ ldp x29, x30, [sp], 400
|
|
+ ret
|
|
+ CFI_ENDPROC
|
|
+ .type caml_call_gc, %function
|
|
+ .size caml_call_gc, .-caml_call_gc
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_alloc1
|
|
+caml_alloc1:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+1: sub ALLOC_PTR, ALLOC_PTR, #16
|
|
+ cmp ALLOC_PTR, ALLOC_LIMIT
|
|
+ b.lo 2f
|
|
+ ret
|
|
+2: stp x29, x30, [sp, -16]!
|
|
+ CFI_ADJUST(16)
|
|
+ add x29, sp, #0
|
|
+ /* Record return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+ /* Call GC */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldp x29, x30, [sp], 16
|
|
+ CFI_ADJUST(-16)
|
|
+ /* Try again */
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_alloc1, %function
|
|
+ .size caml_alloc1, .-caml_alloc1
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_alloc2
|
|
+caml_alloc2:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+1: sub ALLOC_PTR, ALLOC_PTR, #24
|
|
+ cmp ALLOC_PTR, ALLOC_LIMIT
|
|
+ b.lo 2f
|
|
+ ret
|
|
+2: stp x29, x30, [sp, -16]!
|
|
+ CFI_ADJUST(16)
|
|
+ add x29, sp, #0
|
|
+ /* Record return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+ /* Call GC */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldp x29, x30, [sp], 16
|
|
+ CFI_ADJUST(-16)
|
|
+ /* Try again */
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_alloc2, %function
|
|
+ .size caml_alloc2, .-caml_alloc2
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_alloc3
|
|
+caml_alloc3:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+1: sub ALLOC_PTR, ALLOC_PTR, #32
|
|
+ cmp ALLOC_PTR, ALLOC_LIMIT
|
|
+ b.lo 2f
|
|
+ ret
|
|
+2: stp x29, x30, [sp, -16]!
|
|
+ CFI_ADJUST(16)
|
|
+ add x29, sp, #0
|
|
+ /* Record return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+ /* Call GC */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldp x29, x30, [sp], 16
|
|
+ CFI_ADJUST(-16)
|
|
+ /* Try again */
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_alloc2, %function
|
|
+ .size caml_alloc2, .-caml_alloc2
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_allocN
|
|
+caml_allocN:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+1: sub ALLOC_PTR, ALLOC_PTR, ARG
|
|
+ cmp ALLOC_PTR, ALLOC_LIMIT
|
|
+ b.lo 2f
|
|
+ ret
|
|
+2: stp x29, x30, [sp, -16]!
|
|
+ CFI_ADJUST(16)
|
|
+ add x29, sp, #0
|
|
+ /* Record return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+ /* Call GC. This preserves ARG */
|
|
+ bl .Lcaml_call_gc
|
|
+ /* Restore return address */
|
|
+ ldp x29, x30, [sp], 16
|
|
+ CFI_ADJUST(-16)
|
|
+ /* Try again */
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_allocN, %function
|
|
+ .size caml_allocN, .-caml_allocN
|
|
+
|
|
+/* Call a C function from OCaml */
|
|
+/* Function to call is in ARG */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_c_call
|
|
+caml_c_call:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Preserve return address in callee-save register x19 */
|
|
+ mov x19, x30
|
|
+ /* Record lowest stack address and return address */
|
|
+ STOREGLOBAL(x30, caml_last_return_address)
|
|
+ add TMP, sp, #0
|
|
+ STOREGLOBAL(TMP, caml_bottom_of_stack)
|
|
+ /* Make the exception handler alloc ptr available to the C code */
|
|
+ STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
|
|
+ /* Call the function */
|
|
+ blr ARG
|
|
+ /* Reload alloc ptr and alloc limit */
|
|
+ LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
|
|
+ /* Return */
|
|
+ ret x19
|
|
+ CFI_ENDPROC
|
|
+ .type caml_c_call, %function
|
|
+ .size caml_c_call, .-caml_c_call
|
|
+
|
|
+/* Start the OCaml program */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_start_program
|
|
+caml_start_program:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ ADDRGLOBAL(ARG, caml_program)
|
|
+
|
|
+/* Code shared with caml_callback* */
|
|
+/* Address of OCaml code to call is in ARG */
|
|
+/* Arguments to the OCaml code are in x0...x7 */
|
|
+
|
|
+.Ljump_to_caml:
|
|
+ /* Set up stack frame and save callee-save registers */
|
|
+ stp x29, x30, [sp, -160]!
|
|
+ CFI_ADJUST(160)
|
|
+ add x29, sp, #0
|
|
+ stp x19, x20, [sp, 16]
|
|
+ stp x21, x22, [sp, 32]
|
|
+ stp x23, x24, [sp, 48]
|
|
+ stp x25, x26, [sp, 64]
|
|
+ stp x27, x28, [sp, 80]
|
|
+ stp d8, d9, [sp, 96]
|
|
+ stp d10, d11, [sp, 112]
|
|
+ stp d12, d13, [sp, 128]
|
|
+ stp d14, d15, [sp, 144]
|
|
+ /* Setup a callback link on the stack */
|
|
+ LOADGLOBAL(x8, caml_bottom_of_stack)
|
|
+ LOADGLOBAL(x9, caml_last_return_address)
|
|
+ LOADGLOBAL(x10, caml_gc_regs)
|
|
+ stp x8, x9, [sp, -32]! /* 16-byte alignment */
|
|
+ CFI_ADJUST(32)
|
|
+ str x10, [sp, 16]
|
|
+ /* Setup a trap frame to catch exceptions escaping the OCaml code */
|
|
+ LOADGLOBAL(x8, caml_exception_pointer)
|
|
+ adr x9, .Ltrap_handler
|
|
+ stp x8, x9, [sp, -16]!
|
|
+ CFI_ADJUST(16)
|
|
+ add TRAP_PTR, sp, #0
|
|
+ /* Reload allocation pointers */
|
|
+ LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
|
|
+ /* Call the OCaml code */
|
|
+ blr ARG
|
|
+.Lcaml_retaddr:
|
|
+ /* Pop the trap frame, restoring caml_exception_pointer */
|
|
+ ldr x8, [sp], 16
|
|
+ CFI_ADJUST(-16)
|
|
+ STOREGLOBAL(x8, caml_exception_pointer)
|
|
+ /* Pop the callback link, restoring the global variables */
|
|
+.Lreturn_result:
|
|
+ ldr x10, [sp, 16]
|
|
+ ldp x8, x9, [sp], 32
|
|
+ CFI_ADJUST(-32)
|
|
+ STOREGLOBAL(x8, caml_bottom_of_stack)
|
|
+ STOREGLOBAL(x9, caml_last_return_address)
|
|
+ STOREGLOBAL(x10, caml_gc_regs)
|
|
+ /* Update allocation pointer */
|
|
+ STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ /* Reload callee-save registers and return address */
|
|
+ ldp x19, x20, [sp, 16]
|
|
+ ldp x21, x22, [sp, 32]
|
|
+ ldp x23, x24, [sp, 48]
|
|
+ ldp x25, x26, [sp, 64]
|
|
+ ldp x27, x28, [sp, 80]
|
|
+ ldp d8, d9, [sp, 96]
|
|
+ ldp d10, d11, [sp, 112]
|
|
+ ldp d12, d13, [sp, 128]
|
|
+ ldp d14, d15, [sp, 144]
|
|
+ ldp x29, x30, [sp], 160
|
|
+ CFI_ADJUST(-160)
|
|
+ /* Return to C caller */
|
|
+ ret
|
|
+ CFI_ENDPROC
|
|
+ .type .Lcaml_retaddr, %function
|
|
+ .size .Lcaml_retaddr, .-.Lcaml_retaddr
|
|
+ .type caml_start_program, %function
|
|
+ .size caml_start_program, .-caml_start_program
|
|
+
|
|
+/* The trap handler */
|
|
+
|
|
+ .align 2
|
|
+.Ltrap_handler:
|
|
+ CFI_STARTPROC
|
|
+ /* Save exception pointer */
|
|
+ STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
|
|
+ /* Encode exception bucket as an exception result */
|
|
+ orr x0, x0, #2
|
|
+ /* Return it */
|
|
+ b .Lreturn_result
|
|
+ CFI_ENDPROC
|
|
+ .type .Ltrap_handler, %function
|
|
+ .size .Ltrap_handler, .-.Ltrap_handler
|
|
+
|
|
+/* Raise an exception from OCaml */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_raise_exn
|
|
+caml_raise_exn:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Test if backtrace is active */
|
|
+ LOADGLOBAL(TMP, caml_backtrace_active)
|
|
+ cbnz TMP, 2f
|
|
+1: /* Cut stack at current trap handler */
|
|
+ mov sp, TRAP_PTR
|
|
+ /* Pop previous handler and jump to it */
|
|
+ ldr TMP, [sp, 8]
|
|
+ ldr TRAP_PTR, [sp], 16
|
|
+ br TMP
|
|
+2: /* Preserve exception bucket in callee-save register x19 */
|
|
+ mov x19, x0
|
|
+ /* Stash the backtrace */
|
|
+ /* arg1: exn bucket, already in x0 */
|
|
+ mov x1, x30 /* arg2: pc of raise */
|
|
+ add x2, sp, #0 /* arg3: sp of raise */
|
|
+ mov x3, TRAP_PTR /* arg4: sp of handler */
|
|
+ bl caml_stash_backtrace
|
|
+ /* Restore exception bucket and raise */
|
|
+ mov x0, x19
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_raise_exn, %function
|
|
+ .size caml_raise_exn, .-caml_raise_exn
|
|
+
|
|
+/* Raise an exception from C */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_raise_exception
|
|
+caml_raise_exception:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Reload trap ptr, alloc ptr and alloc limit */
|
|
+ LOADGLOBAL(TRAP_PTR, caml_exception_pointer)
|
|
+ LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
|
|
+ LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
|
|
+ /* Test if backtrace is active */
|
|
+ LOADGLOBAL(TMP, caml_backtrace_active)
|
|
+ cbnz TMP, 2f
|
|
+1: /* Cut stack at current trap handler */
|
|
+ mov sp, TRAP_PTR
|
|
+ /* Pop previous handler and jump to it */
|
|
+ ldr TMP, [sp, 8]
|
|
+ ldr TRAP_PTR, [sp], 16
|
|
+ br TMP
|
|
+2: /* Preserve exception bucket in callee-save register x19 */
|
|
+ mov x19, x0
|
|
+ /* Stash the backtrace */
|
|
+ /* arg1: exn bucket, already in x0 */
|
|
+ LOADGLOBAL(x1, caml_last_return_address) /* arg2: pc of raise */
|
|
+ LOADGLOBAL(x2, caml_bottom_of_stack) /* arg3: sp of raise */
|
|
+ mov x3, TRAP_PTR /* arg4: sp of handler */
|
|
+ bl caml_stash_backtrace
|
|
+ /* Restore exception bucket and raise */
|
|
+ mov x0, x19
|
|
+ b 1b
|
|
+ CFI_ENDPROC
|
|
+ .type caml_raise_exception, %function
|
|
+ .size caml_raise_exception, .-caml_raise_exception
|
|
+
|
|
+/* Callback from C to OCaml */
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_callback_exn
|
|
+caml_callback_exn:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Initial shuffling of arguments (x0 = closure, x1 = first arg) */
|
|
+ mov TMP, x0
|
|
+ mov x0, x1 /* x0 = first arg */
|
|
+ mov x1, TMP /* x1 = closure environment */
|
|
+ ldr ARG, [TMP] /* code pointer */
|
|
+ b .Ljump_to_caml
|
|
+ CFI_ENDPROC
|
|
+ .type caml_callback_exn, %function
|
|
+ .size caml_callback_exn, .-caml_callback_exn
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_callback2_exn
|
|
+caml_callback2_exn:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Initial shuffling of arguments (x0 = closure, x1 = arg1, x2 = arg2) */
|
|
+ mov TMP, x0
|
|
+ mov x0, x1 /* x0 = first arg */
|
|
+ mov x1, x2 /* x1 = second arg
|
|
+ mov x2, TMP /* x2 = closure environment */
|
|
+ ADDRGLOBAL(ARG, caml_apply2)
|
|
+ b .Ljump_to_caml
|
|
+ CFI_ENDPROC
|
|
+ .type caml_callback2_exn, %function
|
|
+ .size caml_callback2_exn, .-caml_callback2_exn
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_callback3_exn
|
|
+caml_callback3_exn:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Initial shuffling of arguments */
|
|
+ /* (x0 = closure, x1 = arg1, x2 = arg2, x3 = arg3) */
|
|
+ mov TMP, x0
|
|
+ mov x0, x1 /* x0 = first arg */
|
|
+ mov x1, x2 /* x1 = second arg */
|
|
+ mov x2, x3 /* x2 = third arg */
|
|
+ mov x3, TMP /* x3 = closure environment */
|
|
+ ADDRGLOBAL(ARG, caml_apply3)
|
|
+ b .Ljump_to_caml
|
|
+ CFI_ENDPROC
|
|
+ .type caml_callback3_exn, %function
|
|
+ .size caml_callback3_exn, .-caml_callback3_exn
|
|
+
|
|
+ .align 2
|
|
+ .globl caml_ml_array_bound_error
|
|
+caml_ml_array_bound_error:
|
|
+ CFI_STARTPROC
|
|
+ PROFILE
|
|
+ /* Load address of [caml_array_bound_error] in ARG */
|
|
+ ADDRGLOBAL(ARG, caml_array_bound_error)
|
|
+ /* Call that function */
|
|
+ b caml_c_call
|
|
+ CFI_ENDPROC
|
|
+ .type caml_ml_array_bound_error, %function
|
|
+ .size caml_ml_array_bound_error, .-caml_ml_array_bound_error
|
|
+
|
|
+ .globl caml_system__code_end
|
|
+caml_system__code_end:
|
|
+
|
|
+/* GC roots for callback */
|
|
+
|
|
+ .data
|
|
+ .align 3
|
|
+ .globl caml_system__frametable
|
|
+caml_system__frametable:
|
|
+ .quad 1 /* one descriptor */
|
|
+ .quad .Lcaml_retaddr /* return address into callback */
|
|
+ .short -1 /* negative frame size => use callback link */
|
|
+ .short 0 /* no roots */
|
|
+ .align 3
|
|
+ .type caml_system__frametable, %object
|
|
+ .size caml_system__frametable, .-caml_system__frametable
|
|
diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h
|
|
index ff19847..68ec837 100644
|
|
--- a/asmrun/signals_osdep.h
|
|
+++ b/asmrun/signals_osdep.h
|
|
@@ -92,6 +92,25 @@
|
|
#define CONTEXT_YOUNG_PTR (context->uc_mcontext.arm_r8)
|
|
#define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
|
|
|
|
+/****************** ARM64, Linux */
|
|
+
|
|
+#elif defined(TARGET_arm64) && defined(SYS_linux)
|
|
+
|
|
+ #include <sys/ucontext.h>
|
|
+
|
|
+ #define DECLARE_SIGNAL_HANDLER(name) \
|
|
+ static void name(int sig, siginfo_t * info, ucontext_t * context)
|
|
+
|
|
+ #define SET_SIGACT(sigact,name) \
|
|
+ sigact.sa_sigaction = (void (*)(int,siginfo_t *,void *)) (name); \
|
|
+ sigact.sa_flags = SA_SIGINFO
|
|
+
|
|
+ typedef unsigned long context_reg;
|
|
+ #define CONTEXT_PC (context->uc_mcontext.pc)
|
|
+ #define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.regs[26])
|
|
+ #define CONTEXT_YOUNG_PTR (context->uc_mcontext.regs[27])
|
|
+ #define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
|
|
+
|
|
/****************** AMD64, Solaris x86 */
|
|
|
|
#elif defined(TARGET_amd64) && defined (SYS_solaris)
|
|
diff --git a/asmrun/stack.h b/asmrun/stack.h
|
|
index 756db95..031e408 100644
|
|
--- a/asmrun/stack.h
|
|
+++ b/asmrun/stack.h
|
|
@@ -65,6 +65,11 @@
|
|
#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
|
|
#endif
|
|
|
|
+#ifdef TARGET_arm64
|
|
+#define Saved_return_address(sp) *((intnat *)((sp) - 8))
|
|
+#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
|
|
+#endif
|
|
+
|
|
/* Structure of OCaml callback contexts */
|
|
|
|
struct caml_context {
|
|
diff --git a/byterun/interp.c b/byterun/interp.c
|
|
index b99ed2f..af9fa0f 100644
|
|
--- a/byterun/interp.c
|
|
+++ b/byterun/interp.c
|
|
@@ -173,6 +173,12 @@ sp is a local copy of the global variable caml_extern_sp. */
|
|
#define SP_REG asm("%r14")
|
|
#define ACCU_REG asm("%r13")
|
|
#endif
|
|
+#ifdef __aarch64__
|
|
+#define PC_REG asm("%x19")
|
|
+#define SP_REG asm("%x20")
|
|
+#define ACCU_REG asm("%x21")
|
|
+#define JUMPTBL_BASE_REG asm("%x22")
|
|
+#endif
|
|
#endif
|
|
|
|
/* Division and modulus madness */
|
|
diff --git a/configure b/configure
|
|
index 9b02664..36edfab 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -657,6 +657,7 @@ if test $withsharedlibs = "yes"; then
|
|
x86_64-*-netbsd*) natdynlink=true;;
|
|
i386-*-gnu0.3) natdynlink=true;;
|
|
arm*-*-linux*) natdynlink=true;;
|
|
+ aarch64-*-linux*) natdynlink=true;;
|
|
esac
|
|
fi
|
|
|
|
@@ -715,6 +716,7 @@ case "$host" in
|
|
x86_64-*-netbsd*) arch=amd64; system=netbsd;;
|
|
x86_64-*-openbsd*) arch=amd64; system=openbsd;;
|
|
x86_64-*-darwin*) arch=amd64; system=macosx;;
|
|
+ aarch64-*-linux*) arch=arm64; system=linux;;
|
|
esac
|
|
|
|
# Some platforms exist both in 32-bit and 64-bit variants, not distinguished
|
|
@@ -767,7 +769,7 @@ case "$arch,$model,$system" in
|
|
aspp='gcc -m64 -c';;
|
|
amd64,*,*) as='as'
|
|
aspp='gcc -c';;
|
|
- arm,*,*) as='as';
|
|
+ arm,*,*|arm64,*,*)as='as';
|
|
aspp='gcc -c';;
|
|
i386,*,solaris) as='as'
|
|
aspp='/usr/ccs/bin/as -P';;
|
|
@@ -1193,6 +1195,7 @@ case "$arch" in
|
|
fi;;
|
|
power) bng_arch=ppc; bng_asm_level=1;;
|
|
amd64) bng_arch=amd64; bng_asm_level=1;;
|
|
+ arm64) bng_arch=arm64; bng_asm_level=1;;
|
|
*) bng_arch=generic; bng_asm_level=0;;
|
|
esac
|
|
|
|
diff --git a/otherlibs/num/bng.c b/otherlibs/num/bng.c
|
|
index 5bbedb0..0483ef5 100644
|
|
--- a/otherlibs/num/bng.c
|
|
+++ b/otherlibs/num/bng.c
|
|
@@ -23,12 +23,10 @@
|
|
#include "bng_amd64.c"
|
|
#elif defined(BNG_ARCH_ppc)
|
|
#include "bng_ppc.c"
|
|
-#elif defined (BNG_ARCH_alpha)
|
|
-#include "bng_alpha.c"
|
|
#elif defined (BNG_ARCH_sparc)
|
|
#include "bng_sparc.c"
|
|
-#elif defined (BNG_ARCH_mips)
|
|
-#include "bng_mips.c"
|
|
+#elif defined (BNG_ARCH_arm64)
|
|
+#include "bng_arm64.c"
|
|
#endif
|
|
#endif
|
|
|
|
diff --git a/otherlibs/num/bng_arm64.c b/otherlibs/num/bng_arm64.c
|
|
new file mode 100644
|
|
index 0000000..50843a0
|
|
--- /dev/null
|
|
+++ b/otherlibs/num/bng_arm64.c
|
|
@@ -0,0 +1,20 @@
|
|
+/***********************************************************************/
|
|
+/* */
|
|
+/* OCaml */
|
|
+/* */
|
|
+/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
|
|
+/* */
|
|
+/* Copyright 2013 Institut National de Recherche en Informatique et */
|
|
+/* en Automatique. All rights reserved. This file is distributed */
|
|
+/* under the terms of the GNU Library General Public License, with */
|
|
+/* the special exception on linking described in file ../../LICENSE. */
|
|
+/* */
|
|
+/***********************************************************************/
|
|
+
|
|
+/* Code specific for the ARM 64 (AArch64) architecture */
|
|
+
|
|
+#define BngMult(resh,resl,arg1,arg2) \
|
|
+ asm("mul %0, %2, %3 \n\t" \
|
|
+ "umulh %1, %2, %3" \
|
|
+ : "=&r" (resl), "=&r" (resh) \
|
|
+ : "r" (arg1), "r" (arg2))
|
|
diff --git a/testsuite/tests/asmcomp/Makefile b/testsuite/tests/asmcomp/Makefile
|
|
index fd01d33..9dca023 100644
|
|
--- a/testsuite/tests/asmcomp/Makefile
|
|
+++ b/testsuite/tests/asmcomp/Makefile
|
|
@@ -126,7 +126,7 @@ parsecmm.mli parsecmm.ml: parsecmm.mly
|
|
lexcmm.ml: lexcmm.mll
|
|
@$(OCAMLLEX) -q lexcmm.mll
|
|
|
|
-CASES=fib tak quicksort quicksort2 soli \
|
|
+CASES=fib tak quicksort quicksort2 soli integr \
|
|
arith checkbound tagged-fib tagged-integr tagged-quicksort tagged-tak
|
|
ARGS_fib=-DINT_INT -DFUN=fib main.c
|
|
ARGS_tak=-DUNIT_INT -DFUN=takmain main.c
|
|
diff --git a/testsuite/tests/asmcomp/arm64.S b/testsuite/tests/asmcomp/arm64.S
|
|
new file mode 100644
|
|
index 0000000..3bb4110
|
|
--- /dev/null
|
|
+++ b/testsuite/tests/asmcomp/arm64.S
|
|
@@ -0,0 +1,52 @@
|
|
+/***********************************************************************/
|
|
+/* */
|
|
+/* OCaml */
|
|
+/* */
|
|
+/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
|
|
+/* */
|
|
+/* Copyright 2013 Institut National de Recherche en Informatique et */
|
|
+/* en Automatique. All rights reserved. This file is distributed */
|
|
+/* under the terms of the Q Public License version 1.0. */
|
|
+/* */
|
|
+/***********************************************************************/
|
|
+
|
|
+ .globl call_gen_code
|
|
+ .align 2
|
|
+call_gen_code:
|
|
+ /* Set up stack frame and save callee-save registers */
|
|
+ stp x29, x30, [sp, -160]!
|
|
+ add x29, sp, #0
|
|
+ stp x19, x20, [sp, 16]
|
|
+ stp x21, x22, [sp, 32]
|
|
+ stp x23, x24, [sp, 48]
|
|
+ stp x25, x26, [sp, 64]
|
|
+ stp x27, x28, [sp, 80]
|
|
+ stp d8, d9, [sp, 96]
|
|
+ stp d10, d11, [sp, 112]
|
|
+ stp d12, d13, [sp, 128]
|
|
+ stp d14, d15, [sp, 144]
|
|
+ /* Shuffle arguments */
|
|
+ mov x8, x0
|
|
+ mov x0, x1
|
|
+ mov x1, x2
|
|
+ mov x2, x3
|
|
+ mov x3, x4
|
|
+ /* Call generated asm */
|
|
+ blr x8
|
|
+ /* Reload callee-save registers and return address */
|
|
+ ldp x19, x20, [sp, 16]
|
|
+ ldp x21, x22, [sp, 32]
|
|
+ ldp x23, x24, [sp, 48]
|
|
+ ldp x25, x26, [sp, 64]
|
|
+ ldp x27, x28, [sp, 80]
|
|
+ ldp d8, d9, [sp, 96]
|
|
+ ldp d10, d11, [sp, 112]
|
|
+ ldp d12, d13, [sp, 128]
|
|
+ ldp d14, d15, [sp, 144]
|
|
+ ldp x29, x30, [sp], 160
|
|
+ ret
|
|
+
|
|
+ .globl caml_c_call
|
|
+ .align 2
|
|
+caml_c_call:
|
|
+ br x15
|
|
diff --git a/testsuite/tests/asmcomp/main.ml b/testsuite/tests/asmcomp/main.ml
|
|
index d67a643..82b699e 100644
|
|
--- a/testsuite/tests/asmcomp/main.ml
|
|
+++ b/testsuite/tests/asmcomp/main.ml
|
|
@@ -13,6 +13,7 @@
|
|
open Clflags
|
|
|
|
let compile_file filename =
|
|
+ Clflags.dlcode := false;
|
|
Compilenv.reset "test";
|
|
Emit.begin_assembly();
|
|
let ic = open_in filename in
|
|
--
|
|
1.8.5.3
|
|
|