From c5d136f4163a0b815f936269151faf2ae44791b2 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Tue, 29 May 2012 20:47:07 +0100 Subject: [PATCH 7/7] Add support for ppc64. Note (1): This patch was rejected upstream because they don't have appropriate hardware for testing. Note (2): Upstream powerpc directory has some support for ppc64, but only for Macs, and I couldn't get it to work at all with IBM hardware. This patch was collaborated on by several people, most notably David Woodhouse. Includes fix for position of stack arguments to external C functions when there are more than 8 parameters (RHBZ#829187). Includes fix for minor heap corruption because of unaligned minor heap register (RHBZ#826649). Includes updates for OCaml 4.00.0. --- asmcomp/power64/arch.ml | 87 ++++ asmcomp/power64/emit.mlp | 989 +++++++++++++++++++++++++++++++++++++++++ asmcomp/power64/proc.ml | 241 ++++++++++ asmcomp/power64/reload.ml | 18 + asmcomp/power64/scheduling.ml | 65 +++ asmcomp/power64/selection.ml | 103 +++++ asmrun/Makefile | 6 + asmrun/power64-elf.S | 486 ++++++++++++++++++++ asmrun/stack.h | 9 + configure | 3 + 10 files changed, 2007 insertions(+) create mode 100644 asmcomp/power64/arch.ml create mode 100644 asmcomp/power64/emit.mlp create mode 100644 asmcomp/power64/proc.ml create mode 100644 asmcomp/power64/reload.ml create mode 100644 asmcomp/power64/scheduling.ml create mode 100644 asmcomp/power64/selection.ml create mode 100644 asmrun/power64-elf.S diff --git a/asmcomp/power64/arch.ml b/asmcomp/power64/arch.ml new file mode 100644 index 0000000..6a14864 --- /dev/null +++ b/asmcomp/power64/arch.ml @@ -0,0 +1,87 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: arch.ml 9547 2010-01-22 12:48:24Z doligez $ *) + +(* Specific operations for the PowerPC processor *) + +open Misc +open Format + +(* Machine-specific command-line options *) + +let command_line_options = [] + +(* Specific operations *) + +type specific_operation = + Imultaddf (* multiply and add *) + | Imultsubf (* multiply and subtract *) + | Ialloc_far of int (* allocation in large functions *) + +(* Addressing modes *) + +type addressing_mode = + Ibased of string * int (* symbol + displ *) + | Iindexed of int (* reg + displ *) + | Iindexed2 (* reg + reg *) + +(* Sizes, endianness *) + +let big_endian = true + +let size_addr = 8 +let size_int = size_addr +let size_float = 8 + +(* Behavior of division *) + +let division_crashes_on_overflow = false + +(* Operations on addressing modes *) + +let identity_addressing = Iindexed 0 + +let offset_addressing addr delta = + match addr with + Ibased(s, n) -> Ibased(s, n + delta) + | Iindexed n -> Iindexed(n + delta) + | Iindexed2 -> assert false + +let num_args_addressing = function + Ibased(s, n) -> 0 + | Iindexed n -> 1 + | Iindexed2 -> 2 + +(* Printing operations and addressing modes *) + +let print_addressing printreg addr ppf arg = + match addr with + | Ibased(s, n) -> + let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in + fprintf ppf "\"%s\"%s" s idx + | Iindexed n -> + let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in + fprintf ppf "%a%s" printreg arg.(0) idx + | Iindexed2 -> + fprintf ppf "%a + %a" printreg arg.(0) printreg arg.(1) + +let print_specific_operation printreg op ppf arg = + match op with + | Imultaddf -> + fprintf ppf "%a *f %a +f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Imultsubf -> + fprintf ppf "%a *f %a -f %a" + printreg arg.(0) printreg arg.(1) printreg arg.(2) + | Ialloc_far n -> + fprintf ppf "alloc_far %d" n diff --git a/asmcomp/power64/emit.mlp b/asmcomp/power64/emit.mlp new file mode 100644 index 0000000..42f585d --- /dev/null +++ b/asmcomp/power64/emit.mlp @@ -0,0 +1,989 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: emit.mlp 9547 2010-01-22 12:48:24Z doligez $ *) + +(* Emission of PowerPC assembly code *) + +module StringSet = Set.Make(struct type t = string let compare = compare end) + +open Location +open Misc +open Cmm +open Arch +open Proc +open Reg +open Mach +open Linearize +open Emitaux + +(* Layout of the stack. The stack is kept 16-aligned. *) + +let stack_size_lbl = ref 0 +let stack_slot_lbl = ref 0 +let stack_args_size = ref 0 +let stack_traps_size = ref 0 + +(* We have a stack frame of our own if we call other functions (including + use of exceptions, or if we need more than the red zone *) +let has_stack_frame () = + if !contains_calls or (num_stack_slots.(0) + num_stack_slots.(1)) > (288-16)/8 then + true + else + false + +let frame_size_sans_args () = + let size = 8 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 48 in + Misc.align size 16 + +let slot_offset loc cls = + match loc with + Local n -> + if cls = 0 + then (!stack_slot_lbl, num_stack_slots.(1) * 8 + n * 8) + else (!stack_slot_lbl, n * 8) + | Incoming n -> ((if has_stack_frame() then !stack_size_lbl else 0), 48 + n) + | Outgoing n -> (0, n) + +(* Output a symbol *) + +let emit_symbol = + match Config.system with + | "elf" | "bsd" -> (fun s -> Emitaux.emit_symbol '.' s) + | "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s) + | _ -> assert false + +(* Output a label *) + +let label_prefix = + match Config.system with + | "elf" | "bsd" -> ".L" + | "rhapsody" -> "L" + | _ -> assert false + +let emit_label lbl = + emit_string label_prefix; emit_int lbl + +(* Section switching *) + +let toc_space = + match Config.system with + | "elf" | "bsd" -> " .section \".toc\",\"aw\"\n" + | "rhapsody" -> " .toc\n" + | _ -> assert false + +let data_space = + match Config.system with + | "elf" | "bsd" -> " .section \".data\"\n" + | "rhapsody" -> " .data\n" + | _ -> assert false + +let code_space = + match Config.system with + | "elf" | "bsd" -> " .section \".text\"\n" + | "rhapsody" -> " .text\n" + | _ -> assert false + +let rodata_space = + match Config.system with + | "elf" | "bsd" -> " .section \".rodata\"\n" + | "rhapsody" -> " .const\n" + | _ -> assert false + +(* Output a pseudo-register *) + +let emit_reg r = + match r.loc with + Reg r -> emit_string (register_name r) + | _ -> fatal_error "Emit.emit_reg" + +let use_full_regnames = + Config.system = "rhapsody" + +let emit_gpr r = + if use_full_regnames then emit_char 'r'; + emit_int r + +let emit_fpr r = + if use_full_regnames then emit_char 'f'; + emit_int r + +let emit_ccr r = + if use_full_regnames then emit_string "cr"; + emit_int r + +(* Output a stack reference *) + +let emit_stack r = + match r.loc with + Stack s -> + let lbl, ofs = slot_offset s (register_class r) in + if lbl > 0 then + `{emit_label lbl}+`; + `{emit_int ofs}({emit_gpr 1})` + | _ -> fatal_error "Emit.emit_stack" + +(* Split a 32-bit integer constants in two 16-bit halves *) + +let low n = n land 0xFFFF +let high n = n asr 16 + +let nativelow n = Nativeint.to_int n land 0xFFFF +let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16) + +let is_immediate n = + n <= 32767 && n >= -32768 + +let is_native_immediate n = + n <= 32767n && n >= -32768n + + +type tocentry = + TocSymOfs of (string * int) + | TocLabel of int + | TocInt of nativeint + | TocFloat of string + +(* List of all labels in tocref (reverse order) *) +let tocref_entries = ref [] + +(* Output a TOC reference *) + +let emit_symbol_offset (s, d) = + emit_symbol s; + if d > 0 then `+`; + if d <> 0 then emit_int d + +let emit_tocentry entry = + match entry with + TocSymOfs(s,d) -> emit_symbol_offset(s,d) + | TocInt i -> emit_nativeint i + | TocFloat f -> emit_string f + | TocLabel lbl -> emit_label lbl + + let rec tocref_label = function + ( [] , content ) -> + let lbl = new_label() in + tocref_entries := (lbl, content) :: !tocref_entries; + lbl + | ( (lbl, o_content) :: lst, content) -> + if content = o_content then + lbl + else + tocref_label (lst, content) + +let emit_tocref entry = + let lbl = tocref_label (!tocref_entries,entry) in + emit_label lbl; emit_string "@toc(2) #"; emit_tocentry entry + + +(* Output a load or store operation *) + +let valid_offset instr ofs = + ofs land 3 = 0 || (instr <> "ld" && instr <> "std") + +let emit_load_store instr addressing_mode addr n arg = + match addressing_mode with + Ibased(s, d) -> + let dd = (d + 0x8000) in (* We can only offset by -0x8000 .. +0x7fff *) + let a = (dd land -0x10000) in + let b = (dd land 0xffff) - 0x8000 in + ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,a))}\n`; + ` {emit_string instr} {emit_reg arg}, {emit_int b}({emit_gpr 11})\n` + | Iindexed ofs -> + if is_immediate ofs && valid_offset instr ofs then + ` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n` + else begin + ` lis {emit_gpr 0}, {emit_int(high ofs)}\n`; + if low ofs <> 0 then + ` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`; + ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n` + end + | Iindexed2 -> + ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n` + +(* After a comparison, extract the result as 0 or 1 *) + +let emit_set_comp cmp res = + ` mfcr {emit_gpr 0}\n`; + let bitnum = + match cmp with + Ceq | Cne -> 2 + | Cgt | Cle -> 1 + | Clt | Cge -> 0 in +` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`; + begin match cmp with + Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n` + | _ -> () + end + +(* Record live pointers at call points *) + +type frame_descr = + { fd_lbl: int; (* Return address *) + fd_frame_size_lbl: int; (* Size of stack frame *) + fd_live_offset: (int * int) list } (* Offsets/regs of live addresses *) + +let frame_descriptors = ref([] : frame_descr list) + +let record_frame live = + let lbl = new_label() in + let live_offset = ref [] in + Reg.Set.iter + (function + {typ = Addr; loc = Reg r} -> + live_offset := (0, (r lsl 1) + 1) :: !live_offset + | {typ = Addr; loc = Stack s} as reg -> + live_offset := slot_offset s (register_class reg) :: !live_offset + | _ -> ()) + live; + frame_descriptors := + { fd_lbl = lbl; + fd_frame_size_lbl = !stack_size_lbl; (* frame_size *) + fd_live_offset = !live_offset } :: !frame_descriptors; + `{emit_label lbl}:\n` + +let emit_frame fd = + ` .quad {emit_label fd.fd_lbl} + 4\n`; + ` .short {emit_label fd.fd_frame_size_lbl}\n`; + ` .short {emit_int (List.length fd.fd_live_offset)}\n`; + List.iter + (fun (lbl,n) -> + ` .short `; + if lbl > 0 then `{emit_label lbl}+`; + `{emit_int n}\n`) + fd.fd_live_offset; + ` .align 3\n` + +(* Record external C functions to be called in a position-independent way + (for MacOSX) *) + +let pic_externals = (Config.system = "rhapsody") + +let external_functions = ref StringSet.empty + +let emit_external s = + ` .non_lazy_symbol_pointer\n`; + `L{emit_symbol s}$non_lazy_ptr:\n`; + ` .indirect_symbol {emit_symbol s}\n`; + ` .quad 0\n` + +(* Names for conditional branches after comparisons *) + +let branch_for_comparison = function + Ceq -> "beq" | Cne -> "bne" + | Cle -> "ble" | Cgt -> "bgt" + | Cge -> "bge" | Clt -> "blt" + +let name_for_int_comparison = function + Isigned cmp -> ("cmpd", branch_for_comparison cmp) + | Iunsigned cmp -> ("cmpld", branch_for_comparison cmp) + +(* Names for various instructions *) + +let name_for_intop = function + Iadd -> "add" + | Imul -> "mulld" + | Idiv -> "divd" + | Iand -> "and" + | Ior -> "or" + | Ixor -> "xor" + | Ilsl -> "sld" + | Ilsr -> "srd" + | Iasr -> "srad" + | _ -> Misc.fatal_error "Emit.Intop" + +let name_for_intop_imm = function + Iadd -> "addi" + | Imul -> "mulli" + | Iand -> "andi." + | Ior -> "ori" + | Ixor -> "xori" + | Ilsl -> "sldi" + | Ilsr -> "srdi" + | Iasr -> "sradi" + | _ -> Misc.fatal_error "Emit.Intop_imm" + +let name_for_floatop1 = function + Inegf -> "fneg" + | Iabsf -> "fabs" + | _ -> Misc.fatal_error "Emit.Iopf1" + +let name_for_floatop2 = function + Iaddf -> "fadd" + | Isubf -> "fsub" + | Imulf -> "fmul" + | Idivf -> "fdiv" + | _ -> Misc.fatal_error "Emit.Iopf2" + +let name_for_specific = function + Imultaddf -> "fmadd" + | Imultsubf -> "fmsub" + | _ -> Misc.fatal_error "Emit.Ispecific" + +(* Name of current function *) +let function_name = ref "" +(* Entry point for tail recursive calls *) +let tailrec_entry_point = ref 0 +(* Names of functions defined in the current file *) +let defined_functions = ref StringSet.empty +(* Label of glue code for calling the GC *) +let call_gc_label = ref 0 +(* Label of jump table *) +let lbl_jumptbl = ref 0 +(* List of all labels in jumptable (reverse order) *) +let jumptbl_entries = ref [] +(* Number of jumptable entries *) +let num_jumptbl_entries = ref 0 + +(* Fixup conditional branches that exceed hardware allowed range *) + +let load_store_size = function + Ibased(s, d) -> 2 + | Iindexed ofs -> if is_immediate ofs then 1 else 3 + | Iindexed2 -> 1 + +let instr_size = function + Lend -> 0 + | Lop(Imove | Ispill | Ireload) -> 1 + | Lop(Iconst_int n) -> if is_native_immediate n then 1 else 2 + | Lop(Iconst_float s) -> 2 + | Lop(Iconst_symbol s) -> 2 + | Lop(Icall_ind) -> 6 + | Lop(Icall_imm s) -> 7 + | Lop(Itailcall_ind) -> if !contains_calls then 7 else if has_stack_frame() then 5 else 4 + | Lop(Itailcall_imm s) -> if s = !function_name then 1 else + if !contains_calls then 8 else + if has_stack_frame() then 6 else 5 + | Lop(Iextcall(s, true)) -> 8 + | Lop(Iextcall(s, false)) -> 7 + | Lop(Istackoffset n) -> 0 + | Lop(Iload(chunk, addr)) -> + if chunk = Byte_signed + then load_store_size addr + 1 + else load_store_size addr + | Lop(Istore(chunk, addr)) -> load_store_size addr + | Lop(Ialloc n) -> 4 + | Lop(Ispecific(Ialloc_far n)) -> 5 + | Lop(Iintop Imod) -> 3 + | Lop(Iintop(Icomp cmp)) -> 4 + | Lop(Iintop op) -> 1 + | Lop(Iintop_imm(Idiv, n)) -> 2 + | Lop(Iintop_imm(Imod, n)) -> 4 + | Lop(Iintop_imm(Icomp cmp, n)) -> 4 + | Lop(Iintop_imm(op, n)) -> 1 + | Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1 + | Lop(Ifloatofint) -> 3 + | Lop(Iintoffloat) -> 3 + | Lop(Ispecific sop) -> 1 + | Lreloadretaddr -> 2 + | Lreturn -> if has_stack_frame() then 2 else 1 + | Llabel lbl -> 0 + | Lbranch lbl -> 1 + | Lcondbranch(tst, lbl) -> 2 + | Lcondbranch3(lbl0, lbl1, lbl2) -> + 1 + (if lbl0 = None then 0 else 1) + + (if lbl1 = None then 0 else 1) + + (if lbl2 = None then 0 else 1) + | Lswitch jumptbl -> 7 + | Lsetuptrap lbl -> 1 + | Lpushtrap -> 7 + | Lpoptrap -> 1 + | Lraise -> 6 + +let label_map code = + let map = Hashtbl.create 37 in + let rec fill_map pc instr = + match instr.desc with + Lend -> (pc, map) + | Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next + | op -> fill_map (pc + instr_size op) instr.next + in fill_map 0 code + +let max_branch_offset = 8180 +(* 14-bit signed offset in words. Remember to cut some slack + for multi-word instructions where the branch can be anywhere in + the middle. 12 words of slack is plenty. *) + +let branch_overflows map pc_branch lbl_dest = + let pc_dest = Hashtbl.find map lbl_dest in + let delta = pc_dest - (pc_branch + 1) in + delta <= -max_branch_offset || delta >= max_branch_offset + +let opt_branch_overflows map pc_branch opt_lbl_dest = + match opt_lbl_dest with + None -> false + | Some lbl_dest -> branch_overflows map pc_branch lbl_dest + +let fixup_branches codesize map code = + let expand_optbranch lbl n arg next = + match lbl with + None -> next + | Some l -> + instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l)) + arg [||] next in + let rec fixup did_fix pc instr = + match instr.desc with + Lend -> did_fix + | Lcondbranch(test, lbl) when branch_overflows map pc lbl -> + let lbl2 = new_label() in + let cont = + instr_cons (Lbranch lbl) [||] [||] + (instr_cons (Llabel lbl2) [||] [||] instr.next) in + instr.desc <- Lcondbranch(invert_test test, lbl2); + instr.next <- cont; + fixup true (pc + 2) instr.next + | Lcondbranch3(lbl0, lbl1, lbl2) + when opt_branch_overflows map pc lbl0 + || opt_branch_overflows map pc lbl1 + || opt_branch_overflows map pc lbl2 -> + let cont = + expand_optbranch lbl0 0 instr.arg + (expand_optbranch lbl1 1 instr.arg + (expand_optbranch lbl2 2 instr.arg instr.next)) in + instr.desc <- cont.desc; + instr.next <- cont.next; + fixup true pc instr + | Lop(Ialloc n) when codesize - pc >= max_branch_offset -> + instr.desc <- Lop(Ispecific(Ialloc_far n)); + fixup true (pc + 4) instr.next + | op -> + fixup did_fix (pc + instr_size op) instr.next + in fixup false 0 code + +(* Iterate branch expansion till all conditional branches are OK *) + +let rec branch_normalization code = + let (codesize, map) = label_map code in + if codesize >= max_branch_offset && fixup_branches codesize map code + then branch_normalization code + else () + + +(* Output the assembly code for an instruction *) + +let rec emit_instr i dslot = + match i.desc with + Lend -> () + | Lop(Imove | Ispill | Ireload) -> + let src = i.arg.(0) and dst = i.res.(0) in + if src.loc <> dst.loc then begin + match (src, dst) with + {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} -> + ` mr {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> + ` fmr {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} -> + ` std {emit_reg src}, {emit_stack dst}\n` + | {loc = Reg rs; typ = Float}, {loc = Stack sd} -> + ` stfd {emit_reg src}, {emit_stack dst}\n` + | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} -> + ` ld {emit_reg dst}, {emit_stack src}\n` + | {loc = Stack ss; typ = Float}, {loc = Reg rd} -> + ` lfd {emit_reg dst}, {emit_stack src}\n` + | (_, _) -> + fatal_error "Emit: Imove" + end + | Lop(Iconst_int n) -> + if is_native_immediate n then + ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` + else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin + ` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`; + if nativelow n <> 0 then + ` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n` + end else begin + ` ld {emit_reg i.res.(0)}, {emit_tocref (TocInt n)}\n` + end + | Lop(Iconst_float s) -> + ` lfd {emit_reg i.res.(0)}, {emit_tocref (TocFloat s)}\n` + | Lop(Iconst_symbol s) -> + ` ld {emit_reg i.res.(0)}, {emit_tocref (TocSymOfs (s,0))}\n` + | Lop(Icall_ind) -> + ` std {emit_gpr 2},40({emit_gpr 1})\n`; + ` ld {emit_gpr 2}, 8({emit_reg i.arg.(0)})\n`; + ` ld {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(0)})\n`; + ` mtctr {emit_reg i.arg.(0)}\n`; + record_frame i.live; + ` bctrl\n`; + ` ld {emit_gpr 2},40({emit_gpr 1})\n` + | Lop(Icall_imm s) -> + ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`; + ` std {emit_gpr 2},40({emit_gpr 1})\n`; + ` ld {emit_gpr 2}, 8({emit_gpr 11})\n`; + ` ld {emit_gpr 11}, 0({emit_gpr 11})\n`; + ` mtctr {emit_gpr 11}\n`; + record_frame i.live; + ` bctrl\n`; + ` ld {emit_gpr 2},40({emit_gpr 1})\n` + | Lop(Itailcall_ind) -> + ` ld {emit_gpr 2}, 8({emit_reg i.arg.(0)})\n`; + ` ld {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(0)})\n`; + ` mtctr {emit_reg i.arg.(0)}\n`; + if has_stack_frame() then + ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; + if !contains_calls then begin + ` ld {emit_gpr 11}, 16({emit_gpr 1})\n`; + ` mtlr {emit_gpr 11}\n` + end; + ` bctr\n` + | Lop(Itailcall_imm s) -> + if s = !function_name then + ` b {emit_label !tailrec_entry_point}\n` + else begin + if has_stack_frame() then + ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; + if !contains_calls then begin + ` ld {emit_gpr 11}, 16({emit_gpr 1})\n`; + ` mtlr {emit_gpr 11}\n` + end; + ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`; + ` ld {emit_gpr 2}, 8({emit_gpr 11})\n`; + ` ld {emit_gpr 11}, 0({emit_gpr 11})\n`; + ` mtctr {emit_gpr 11}\n`; + ` bctr\n` + end + | Lop(Iextcall(s, alloc)) -> + if alloc then begin + ` ld {emit_gpr 11}, {emit_tocref (TocSymOfs (s,0))}\n`; + ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_c_call",0))}\n`; + end else + ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs (s,0))}\n`; + ` std {emit_gpr 2}, 40({emit_gpr 1})\n`; + ` ld {emit_gpr 2}, 8({emit_gpr 12})\n`; + ` ld {emit_gpr 12}, 0({emit_gpr 12})\n`; + ` mtctr {emit_gpr 12}\n`; + if alloc then record_frame i.live; + ` bctrl\n`; + ` ld {emit_gpr 2}, 40({emit_gpr 1})\n` + | Lop(Istackoffset n) -> + if n > !stack_args_size then + stack_args_size := n + | Lop(Iload(chunk, addr)) -> + let loadinstr = + match chunk with + Byte_unsigned -> "lbz" + | Byte_signed -> "lbz" + | Sixteen_unsigned -> "lhz" + | Sixteen_signed -> "lha" + | Thirtytwo_unsigned -> "lwz" + | Thirtytwo_signed -> "lwa" + | Word -> "ld" + | Single -> "lfs" + | Double | Double_u -> "lfd" in + emit_load_store loadinstr addr i.arg 0 i.res.(0); + if chunk = Byte_signed then + ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Lop(Istore(chunk, addr)) -> + let storeinstr = + match chunk with + Byte_unsigned | Byte_signed -> "stb" + | Sixteen_unsigned | Sixteen_signed -> "sth" + | Thirtytwo_unsigned | Thirtytwo_signed -> "stw" + | Word -> "std" + | Single -> "stfs" + | Double | Double_u -> "stfd" in + emit_load_store storeinstr addr i.arg 1 i.arg.(0) + | Lop(Ialloc n) -> + if !call_gc_label = 0 then call_gc_label := new_label(); + ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; + ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`; + ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 8\n`; + record_frame i.live; + ` bltl {emit_label !call_gc_label}\n` (* Must be 4 insns to restart *) + | Lop(Ispecific(Ialloc_far n)) -> + if !call_gc_label = 0 then call_gc_label := new_label(); + let lbl = new_label() in + ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; + ` cmpld {emit_gpr 31}, {emit_gpr 30}\n`; + ` bge {emit_label lbl}\n`; + record_frame i.live; + ` bl {emit_label !call_gc_label}\n`; (* Must be 4 insns to restart *) + `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n` + | Lop(Iintop Isub) -> (* subfc has swapped arguments *) + ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` + | Lop(Iintop Imod) -> + ` divd {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` mulld {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`; + ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` + | Lop(Iintop(Icomp cmp)) -> + begin match cmp with + Isigned c -> + ` cmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + emit_set_comp c i.res.(0) + | Iunsigned c -> + ` cmpld {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + emit_set_comp c i.res.(0) + end + | Lop(Iintop Icheckbound) -> + ` tdlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop op) -> + let instr = name_for_intop op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Iintop_imm(Isub, n)) -> + ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` + | Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *) + let l = Misc.log2 n in + ` sradi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`; + ` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *) + let l = Misc.log2 n in + ` sradi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`; + ` addze {emit_gpr 0}, {emit_gpr 0}\n`; + ` sldi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`; + ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` + | Lop(Iintop_imm(Icomp cmp, n)) -> + begin match cmp with + Isigned c -> + ` cmpdi {emit_reg i.arg.(0)}, {emit_int n}\n`; + emit_set_comp c i.res.(0) + | Iunsigned c -> + ` cmpldi {emit_reg i.arg.(0)}, {emit_int n}\n`; + emit_set_comp c i.res.(0) + end + | Lop(Iintop_imm(Icheckbound, n)) -> + ` tdllei {emit_reg i.arg.(0)}, {emit_int n}\n` + | Lop(Iintop_imm(op, n)) -> + let instr = name_for_intop_imm op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` + | Lop(Inegf | Iabsf as op) -> + let instr = name_for_floatop1 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` + | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> + let instr = name_for_floatop2 op in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + | Lop(Ifloatofint) -> + let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in + ` std {emit_reg i.arg.(0)}, -{emit_int ofs}({emit_gpr 1})\n`; + ` lfd {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n`; + ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + | Lop(Iintoffloat) -> + let ofs = if has_stack_frame() then 16 else 8 * (2 + num_stack_slots.(0) + num_stack_slots.(1)) in + ` fctidz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`; + ` stfd {emit_fpr 0}, -{emit_int ofs}({emit_gpr 1})\n`; + ` ld {emit_reg i.res.(0)}, -{emit_int ofs}({emit_gpr 1})\n` + | Lop(Ispecific sop) -> + let instr = name_for_specific sop in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` + | Lreloadretaddr -> + if has_stack_frame() then begin + ` ld {emit_gpr 11}, {emit_label !stack_size_lbl}+16({emit_gpr 1})\n`; + ` mtlr {emit_gpr 11}\n` + end + | Lreturn -> + if has_stack_frame() then + ` ld {emit_gpr 1}, 0({emit_gpr 1})\n`; + ` blr\n` + | Llabel lbl -> + `{emit_label lbl}:\n` + | Lbranch lbl -> + ` b {emit_label lbl}\n` + | Lcondbranch(tst, lbl) -> + begin match tst with + Itruetest -> + ` cmpdi {emit_reg i.arg.(0)}, 0\n`; + emit_delay dslot; + ` bne {emit_label lbl}\n` + | Ifalsetest -> + ` cmpdi {emit_reg i.arg.(0)}, 0\n`; + emit_delay dslot; + ` beq {emit_label lbl}\n` + | Iinttest cmp -> + let (comp, branch) = name_for_int_comparison cmp in + ` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + emit_delay dslot; + ` {emit_string branch} {emit_label lbl}\n` + | Iinttest_imm(cmp, n) -> + let (comp, branch) = name_for_int_comparison cmp in + ` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`; + emit_delay dslot; + ` {emit_string branch} {emit_label lbl}\n` + | Ifloattest(cmp, neg) -> + ` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + (* bit 0 = lt, bit 1 = gt, bit 2 = eq *) + let (bitnum, negtst) = + match cmp with + Ceq -> (2, neg) + | Cne -> (2, not neg) + | Cle -> ` cror 3, 0, 2\n`; (* lt or eq *) + (3, neg) + | Cgt -> (1, neg) + | Cge -> ` cror 3, 1, 2\n`; (* gt or eq *) + (3, neg) + | Clt -> (0, neg) in + emit_delay dslot; + if negtst + then ` bf {emit_int bitnum}, {emit_label lbl}\n` + else ` bt {emit_int bitnum}, {emit_label lbl}\n` + | Ioddtest -> + ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; + emit_delay dslot; + ` bne {emit_label lbl}\n` + | Ieventest -> + ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; + emit_delay dslot; + ` beq {emit_label lbl}\n` + end + | Lcondbranch3(lbl0, lbl1, lbl2) -> + ` cmpdi {emit_reg i.arg.(0)}, 1\n`; + emit_delay dslot; + begin match lbl0 with + None -> () + | Some lbl -> ` blt {emit_label lbl}\n` + end; + begin match lbl1 with + None -> () + | Some lbl -> ` beq {emit_label lbl}\n` + end; + begin match lbl2 with + None -> () + | Some lbl -> ` bgt {emit_label lbl}\n` + end + | Lswitch jumptbl -> + if !lbl_jumptbl = 0 then lbl_jumptbl := new_label(); + ` ld {emit_gpr 11}, {emit_tocref (TocLabel !lbl_jumptbl)}\n`; + ` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`; + ` sldi {emit_gpr 0}, {emit_gpr 0}, 2\n`; + ` lwax {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; + ` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; + ` mtctr {emit_gpr 0}\n`; + ` bctr\n`; + for i = 0 to Array.length jumptbl - 1 do + jumptbl_entries := jumptbl.(i) :: !jumptbl_entries; + incr num_jumptbl_entries + done + | Lsetuptrap lbl -> + ` bl {emit_label lbl}\n`; + | Lpushtrap -> + stack_traps_size := !stack_traps_size + 32; + ` addi {emit_gpr 11}, {emit_gpr 1}, {emit_label !stack_size_lbl}-{emit_int !stack_traps_size}\n`; + ` mflr {emit_gpr 0}\n`; + ` std {emit_gpr 29}, 0({emit_gpr 11})\n`; + ` std {emit_gpr 0}, 8({emit_gpr 11})\n`; + ` std {emit_gpr 1}, 16({emit_gpr 11})\n`; + ` std {emit_gpr 2}, 24({emit_gpr 11})\n`; + ` mr {emit_gpr 29}, {emit_gpr 11}\n` + | Lpoptrap -> + ` ld {emit_gpr 29}, 0({emit_gpr 29})\n` + | Lraise -> + ` ld {emit_gpr 0}, 8({emit_gpr 29})\n`; + ` ld {emit_gpr 1}, 16({emit_gpr 29})\n`; + ` ld {emit_gpr 2}, 24({emit_gpr 29})\n`; + ` mtlr {emit_gpr 0}\n`; + ` ld {emit_gpr 29}, 0({emit_gpr 29})\n`; + ` blr\n` + +and emit_delay = function + None -> () + | Some i -> emit_instr i None + +(* Checks if a pseudo-instruction expands to instructions + that do not branch and do not affect CR0 nor R12. *) + +let is_simple_instr i = + match i.desc with + Lop op -> + begin match op with + Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind | + Iextcall(_, _) -> false + | Ialloc(_) -> false + | Iintop(Icomp _) -> false + | Iintop_imm(Iand, _) -> false + | Iintop_imm(Icomp _, _) -> false + | _ -> true + end + | Lreloadretaddr -> true + | _ -> false + +let no_interference res arg = + try + for i = 0 to Array.length arg - 1 do + for j = 0 to Array.length res - 1 do + if arg.(i).loc = res.(j).loc then raise Exit + done + done; + true + with Exit -> + false + +(* Emit a sequence of instructions, trying to fill delay slots for branches *) + +let rec emit_all i = + match i with + {desc = Lend} -> () + | {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}} + when is_simple_instr i & no_interference i.res i.next.arg -> + emit_instr i.next (Some i); + emit_all i.next.next + | _ -> + emit_instr i None; + emit_all i.next + +(* Emission of a function declaration *) + +let fundecl fundecl = + function_name := fundecl.fun_name; + defined_functions := StringSet.add fundecl.fun_name !defined_functions; + tailrec_entry_point := new_label(); + if has_stack_frame() then + stack_size_lbl := new_label(); + stack_slot_lbl := new_label(); + stack_args_size := 0; + stack_traps_size := 0; + call_gc_label := 0; + ` .globl {emit_symbol fundecl.fun_name}\n`; + begin match Config.system with + | "elf" | "bsd" -> + ` .section \".opd\",\"aw\"\n`; + ` .align 3\n`; + ` .type {emit_symbol fundecl.fun_name}, @function\n`; + `{emit_symbol fundecl.fun_name}:\n`; + ` .quad .L.{emit_symbol fundecl.fun_name},.TOC.@tocbase\n`; + ` .previous\n`; + ` .align 2\n`; + emit_string code_space; + `.L.{emit_symbol fundecl.fun_name}:\n` + | _ -> + ` .align 2\n`; + emit_string code_space; + `{emit_symbol fundecl.fun_name}:\n` + end; + if !contains_calls then begin + ` mflr {emit_gpr 0}\n`; + ` std {emit_gpr 0}, 16({emit_gpr 1})\n` + end; + if has_stack_frame() then + ` stdu {emit_gpr 1}, -{emit_label !stack_size_lbl}({emit_gpr 1})\n`; + `{emit_label !tailrec_entry_point}:\n`; + branch_normalization fundecl.fun_body; + emit_all fundecl.fun_body; + ` .size .L.{emit_symbol fundecl.fun_name}, . - .L.{emit_symbol fundecl.fun_name}\n`; + if has_stack_frame() then begin + ` .set {emit_label !stack_size_lbl},{emit_int (frame_size_sans_args() + !stack_args_size + !stack_traps_size)} # stack size including traps\n`; + ` .set {emit_label !stack_slot_lbl},{emit_int (48 + !stack_args_size)} # stack slot offset\n` + end else (* leave 8 bytes for float <-> conversions *) + ` .set {emit_label !stack_slot_lbl},{emit_int (40-frame_size_sans_args())} # stack slot offset (negative)\n`; + + (* Emit the glue code to call the GC *) + if !call_gc_label > 0 then begin + `{emit_label !call_gc_label}:\n`; + ` ld {emit_gpr 12}, {emit_tocref (TocSymOfs ("caml_call_gc",0))}\n`; + ` ld {emit_gpr 12}, 0({emit_gpr 12})\n`; + ` mtctr {emit_gpr 12}\n`; + ` bctr\n`; + end + +(* Emission of data *) + +let declare_global_data s = + ` .globl {emit_symbol s}\n`; + if Config.system = "elf" || Config.system = "bsd" then + ` .type {emit_symbol s}, @object\n` + +let emit_item = function + Cglobal_symbol s -> + declare_global_data s + | Cdefine_symbol s -> + `{emit_symbol s}:\n`; + | Cdefine_label lbl -> + `{emit_label (lbl + 100000)}:\n` + | Cint8 n -> + ` .byte {emit_int n}\n` + | Cint16 n -> + ` .short {emit_int n}\n` + | Cint32 n -> + ` .long {emit_nativeint n}\n` + | Cint n -> + ` .quad {emit_nativeint n}\n` + | Csingle f -> + ` .float 0d{emit_string f}\n` + | Cdouble f -> + ` .double 0d{emit_string f}\n` + | Csymbol_address s -> + ` .quad {emit_symbol s}\n` + | Clabel_address lbl -> + ` .quad {emit_label (lbl + 100000)}\n` + | Cstring s -> + emit_bytes_directive " .byte " s + | Cskip n -> + if n > 0 then ` .space {emit_int n}\n` + | Calign n -> + ` .align {emit_int (Misc.log2 n)}\n` + +let data l = + emit_string data_space; + List.iter emit_item l + +(* Beginning / end of an assembly file *) + +let begin_assembly() = + defined_functions := StringSet.empty; + external_functions := StringSet.empty; + tocref_entries := []; + num_jumptbl_entries := 0; + jumptbl_entries := []; + lbl_jumptbl := 0; + (* Emit the beginning of the segments *) + let lbl_begin = Compilenv.make_symbol (Some "data_begin") in + emit_string data_space; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n`; + let lbl_begin = Compilenv.make_symbol (Some "code_begin") in + emit_string code_space; + declare_global_data lbl_begin; + `{emit_symbol lbl_begin}:\n` + +let end_assembly() = + (* Emit the jump table *) + if !num_jumptbl_entries > 0 then begin + emit_string code_space; + `{emit_label !lbl_jumptbl}:\n`; + List.iter + (fun lbl -> ` .long {emit_label lbl} - {emit_label !lbl_jumptbl}\n`) + (List.rev !jumptbl_entries); + jumptbl_entries := [] + end; + if !tocref_entries <> [] then begin + emit_string toc_space; + List.iter + (fun (lbl, entry) -> + `{emit_label lbl}:\n`; + match entry with + TocFloat f -> + ` .double {emit_tocentry entry}\n` + | _ -> + ` .tc {emit_label lbl}[TC],{emit_tocentry entry}\n` + ) + !tocref_entries; + tocref_entries := [] + end; + if pic_externals then + (* Emit the pointers to external functions *) + StringSet.iter emit_external !external_functions; + (* Emit the end of the segments *) + emit_string code_space; + let lbl_end = Compilenv.make_symbol (Some "code_end") in + declare_global_data lbl_end; + `{emit_symbol lbl_end}:\n`; + ` .long 0\n`; + emit_string data_space; + let lbl_end = Compilenv.make_symbol (Some "data_end") in + declare_global_data lbl_end; + `{emit_symbol lbl_end}:\n`; + ` .quad 0\n`; + (* Emit the frame descriptors *) + emit_string rodata_space; + let lbl = Compilenv.make_symbol (Some "frametable") in + declare_global_data lbl; + `{emit_symbol lbl}:\n`; + ` .quad {emit_int (List.length !frame_descriptors)}\n`; + List.iter emit_frame !frame_descriptors; + frame_descriptors := [] diff --git a/asmcomp/power64/proc.ml b/asmcomp/power64/proc.ml new file mode 100644 index 0000000..119ad93 --- /dev/null +++ b/asmcomp/power64/proc.ml @@ -0,0 +1,241 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: proc.ml 9547 2010-01-22 12:48:24Z doligez $ *) + +(* Description of the Power PC *) + +open Misc +open Cmm +open Reg +open Arch +open Mach + +(* Instruction selection *) + +let word_addressed = false + +(* Registers available for register allocation *) + +(* Integer register map: + 0 temporary, null register for some operations + 1 stack pointer + 2 pointer to table of contents + 3 - 10 function arguments and results + 11 - 12 temporaries + 13 pointer to small data area + 14 - 28 general purpose, preserved by C + 29 trap pointer + 30 allocation limit + 31 allocation pointer + Floating-point register map: + 0 temporary + 1 - 13 function arguments and results + 14 - 31 general purpose, preserved by C +*) + +let int_reg_name = + if Config.system = "rhapsody" then + [| "r3"; "r4"; "r5"; "r6"; "r7"; "r8"; "r9"; "r10"; + "r14"; "r15"; "r16"; "r17"; "r18"; "r19"; "r20"; "r21"; + "r22"; "r23"; "r24"; "r25"; "r26"; "r27"; "r28" |] + else + [| "3"; "4"; "5"; "6"; "7"; "8"; "9"; "10"; + "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; + "22"; "23"; "24"; "25"; "26"; "27"; "28" |] + +let float_reg_name = + if Config.system = "rhapsody" then + [| "f1"; "f2"; "f3"; "f4"; "f5"; "f6"; "f7"; "f8"; + "f9"; "f10"; "f11"; "f12"; "f13"; "f14"; "f15"; "f16"; + "f17"; "f18"; "f19"; "f20"; "f21"; "f22"; "f23"; "f24"; + "f25"; "f26"; "f27"; "f28"; "f29"; "f30"; "f31" |] + else + [| "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; + "9"; "10"; "11"; "12"; "13"; "14"; "15"; "16"; + "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; + "25"; "26"; "27"; "28"; "29"; "30"; "31" |] + +let num_register_classes = 2 + +let register_class r = + match r.typ with + Int -> 0 + | Addr -> 0 + | Float -> 1 + +let num_available_registers = [| 23; 31 |] + +let first_available_register = [| 0; 100 |] + +let register_name r = + if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) + +let rotate_registers = true + +(* Representation of hard registers by pseudo-registers *) + +let hard_int_reg = + let v = Array.create 23 Reg.dummy in + for i = 0 to 22 do v.(i) <- Reg.at_location Int (Reg i) done; v + +let hard_float_reg = + let v = Array.create 31 Reg.dummy in + for i = 0 to 30 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v + +let all_phys_regs = + Array.append hard_int_reg hard_float_reg + +let phys_reg n = + if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) + +let stack_slot slot ty = + Reg.at_location ty (Stack slot) + +(* Calling conventions *) + +let calling_conventions + first_int last_int first_float last_float make_stack stack_ofs arg = + let loc = Array.create (Array.length arg) Reg.dummy in + let int = ref first_int in + let float = ref first_float in + let ofs = ref stack_ofs in + for i = 0 to Array.length arg - 1 do + match arg.(i).typ with + Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- phys_reg !int; + incr int + end else begin + loc.(i) <- stack_slot (make_stack !ofs) ty; + end; + ofs := !ofs + size_int + | Float -> + if !float <= last_float then begin + loc.(i) <- phys_reg !float; + incr float + end else begin + loc.(i) <- stack_slot (make_stack !ofs) Float; + end; + ofs := !ofs + size_float + done; + (loc, Misc.align !ofs 16) + (* Keep stack 16-aligned. *) + +let incoming ofs = Incoming ofs +let outgoing ofs = Outgoing ofs +let not_supported ofs = fatal_error "Proc.loc_results: cannot call" + +let loc_arguments arg = + calling_conventions 0 7 100 112 outgoing 48 arg +let loc_parameters arg = + let (loc, ofs) = calling_conventions 0 7 100 112 incoming 0 arg in loc +let loc_results res = + let (loc, ofs) = calling_conventions 0 7 100 112 not_supported 0 res in loc + +(* C calling conventions under PowerOpen: + use GPR 3-10 and FPR 1-13 just like ML calling + conventions, but always reserve stack space for all arguments. + Also, using a float register automatically reserves two int registers + (in 32-bit mode) or one int register (in 64-bit mode). + (If we were to call a non-prototyped C function, each float argument + would have to go both in a float reg and in the matching pair + of integer regs.) + + C calling conventions under SVR4: + use GPR 3-10 and FPR 1-8 just like ML calling conventions. + Using a float register does not affect the int registers. + Always reserve 8 bytes at bottom of stack, plus whatever is needed + to hold the overflow arguments. *) + +let poweropen_external_conventions first_int last_int + first_float last_float arg = + let loc = Array.create (Array.length arg) Reg.dummy in + let int = ref first_int in + let float = ref first_float in + let ofs = ref (14 * size_addr) in + for i = 0 to Array.length arg - 1 do + match arg.(i).typ with + Int | Addr as ty -> + if !int <= last_int then begin + loc.(i) <- phys_reg !int; + incr int + end else begin + loc.(i) <- stack_slot (Outgoing !ofs) ty; + ofs := !ofs + size_int + end + | Float -> + if !float <= last_float then begin + loc.(i) <- phys_reg !float; + incr float + end else begin + loc.(i) <- stack_slot (Outgoing !ofs) Float; + ofs := !ofs + size_float + end; + int := !int + 1 + done; + (loc, Misc.align !ofs 16) (* Keep stack 16-aligned *) + +let loc_external_arguments = + match Config.system with + | "rhapsody" -> poweropen_external_conventions 0 7 100 112 + | "elf" | "bsd" -> calling_conventions 0 7 100 107 outgoing 48 + | _ -> assert false + +let extcall_use_push = false + +(* Results are in GPR 3 and FPR 1 *) + +let loc_external_results res = + let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc + +(* Exceptions are in GPR 3 *) + +let loc_exn_bucket = phys_reg 0 + +(* Registers destroyed by operations *) + +let destroyed_at_c_call = + Array.of_list(List.map phys_reg + [0; 1; 2; 3; 4; 5; 6; 7; + 100; 101; 102; 103; 104; 105; 106; 107; 108; 109; 110; 111; 112]) + +let destroyed_at_oper = function + Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs + | Iop(Iextcall(_, false)) -> destroyed_at_c_call + | _ -> [||] + +let destroyed_at_raise = all_phys_regs + +(* Maximal register pressure *) + +let safe_register_pressure = function + Iextcall(_, _) -> 15 + | _ -> 23 + +let max_register_pressure = function + Iextcall(_, _) -> [| 15; 18 |] + | _ -> [| 23; 30 |] + +(* Layout of the stack *) + +let num_stack_slots = [| 0; 0 |] +let contains_calls = ref false + +(* Calling the assembler *) + +let assemble_file infile outfile = + Ccomp.command (Config.asm ^ " -o " ^ + Filename.quote outfile ^ " " ^ Filename.quote infile) + +open Clflags;; +open Config;; diff --git a/asmcomp/power64/reload.ml b/asmcomp/power64/reload.ml new file mode 100644 index 0000000..abcac6c --- /dev/null +++ b/asmcomp/power64/reload.ml @@ -0,0 +1,18 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: reload.ml 2553 1999-11-17 18:59:06Z xleroy $ *) + +(* Reloading for the PowerPC *) + +let fundecl f = + (new Reloadgen.reload_generic)#fundecl f diff --git a/asmcomp/power64/scheduling.ml b/asmcomp/power64/scheduling.ml new file mode 100644 index 0000000..b7bba9b --- /dev/null +++ b/asmcomp/power64/scheduling.ml @@ -0,0 +1,65 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: scheduling.ml 9547 2010-01-22 12:48:24Z doligez $ *) + +(* Instruction scheduling for the Power PC *) + +open Arch +open Mach + +class scheduler = object + +inherit Schedgen.scheduler_generic + +(* Latencies (in cycles). Based roughly on the "common model". *) + +method oper_latency = function + Ireload -> 2 + | Iload(_, _) -> 2 + | Iconst_float _ -> 2 (* turned into a load *) + | Iconst_symbol _ -> 1 + | Iintop Imul -> 9 + | Iintop_imm(Imul, _) -> 5 + | Iintop(Idiv | Imod) -> 36 + | Iaddf | Isubf -> 4 + | Imulf -> 5 + | Idivf -> 33 + | Ispecific(Imultaddf | Imultsubf) -> 5 + | _ -> 1 + +method reload_retaddr_latency = 12 + (* If we can have that many cycles between the reloadretaddr and the + return, we can expect that the blr branch will be completely folded. *) + +(* Issue cycles. Rough approximations. *) + +method oper_issue_cycles = function + Iconst_float _ | Iconst_symbol _ -> 2 + | Iload(_, Ibased(_, _)) -> 2 + | Istore(_, Ibased(_, _)) -> 2 + | Ialloc _ -> 4 + | Iintop(Imod) -> 40 (* assuming full stall *) + | Iintop(Icomp _) -> 4 + | Iintop_imm(Idiv, _) -> 2 + | Iintop_imm(Imod, _) -> 4 + | Iintop_imm(Icomp _, _) -> 4 + | Ifloatofint -> 9 + | Iintoffloat -> 4 + | _ -> 1 + +method reload_retaddr_issue_cycles = 3 + (* load then stalling mtlr *) + +end + +let fundecl f = (new scheduler)#schedule_fundecl f diff --git a/asmcomp/power64/selection.ml b/asmcomp/power64/selection.ml new file mode 100644 index 0000000..d2325e1 --- /dev/null +++ b/asmcomp/power64/selection.ml @@ -0,0 +1,103 @@ +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1997 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +(* $Id: selection.ml 10296 2010-04-22 12:51:06Z xleroy $ *) + +(* Instruction selection for the Power PC processor *) + +open Misc +open Cmm +open Reg +open Arch +open Mach + +(* Recognition of addressing modes *) + +type addressing_expr = + Asymbol of string + | Alinear of expression + | Aadd of expression * expression + +let rec select_addr = function + Cconst_symbol s -> + (Asymbol s, 0) + | Cop((Caddi | Cadda), [arg; Cconst_int m]) -> + let (a, n) = select_addr arg in (a, n + m) + | Cop((Caddi | Cadda), [Cconst_int m; arg]) -> + let (a, n) = select_addr arg in (a, n + m) + | Cop((Caddi | Cadda), [arg1; arg2]) -> + begin match (select_addr arg1, select_addr arg2) with + ((Alinear e1, n1), (Alinear e2, n2)) -> + (Aadd(e1, e2), n1 + n2) + | _ -> + (Aadd(arg1, arg2), 0) + end + | exp -> + (Alinear exp, 0) + +(* Instruction selection *) + +class selector = object (self) + +inherit Selectgen.selector_generic as super + +method is_immediate n = (n <= 32767) && (n >= -32768) + +method select_addressing chunk exp = + match select_addr exp with + (Asymbol s, d) -> + (Ibased(s, d), Ctuple []) + | (Alinear e, d) -> + (Iindexed d, e) + | (Aadd(e1, e2), d) -> + if d = 0 + then (Iindexed2, Ctuple[e1; e2]) + else (Iindexed d, Cop(Cadda, [e1; e2])) + +method! select_operation op args = + match (op, args) with + (* Prevent the recognition of (x / cst) and (x % cst) when cst is not + a power of 2, which do not correspond to an instruction. *) + (Cdivi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> + (Iintop_imm(Idiv, n), [arg]) + | (Cdivi, _) -> + (Iintop Idiv, args) + | (Cmodi, [arg; Cconst_int n]) when n = 1 lsl (Misc.log2 n) -> + (Iintop_imm(Imod, n), [arg]) + | (Cmodi, _) -> + (Iintop Imod, args) + (* The and, or and xor instructions have a different range of immediate + operands than the other instructions *) + | (Cand, _) -> self#select_logical Iand args + | (Cor, _) -> self#select_logical Ior args + | (Cxor, _) -> self#select_logical Ixor args + (* Recognize mult-add and mult-sub instructions *) + | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) -> + (Ispecific Imultaddf, [arg1; arg2; arg3]) + | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) -> + (Ispecific Imultaddf, [arg1; arg2; arg3]) + | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) -> + (Ispecific Imultsubf, [arg1; arg2; arg3]) + | _ -> + super#select_operation op args + +method select_logical op = function + [arg; Cconst_int n] when n >= 0 && n <= 0xFFFF -> + (Iintop_imm(op, n), [arg]) + | [Cconst_int n; arg] when n >= 0 && n <= 0xFFFF -> + (Iintop_imm(op, n), [arg]) + | args -> + (Iintop op, args) + +end + +let fundecl f = (new selector)#emit_fundecl f diff --git a/asmrun/Makefile b/asmrun/Makefile index d4f0c56..d58d451 100644 --- a/asmrun/Makefile +++ b/asmrun/Makefile @@ -89,6 +89,12 @@ power.o: power-$(SYSTEM).o power.p.o: power-$(SYSTEM).o cp power-$(SYSTEM).o power.p.o +power64.o: power64-$(SYSTEM).o + cp power64-$(SYSTEM).o power64.o + +power64.p.o: power64-$(SYSTEM).o + cp power64-$(SYSTEM).o power64.p.o + main.c: ../byterun/main.c ln -s ../byterun/main.c main.c misc.c: ../byterun/misc.c diff --git a/asmrun/power64-elf.S b/asmrun/power64-elf.S new file mode 100644 index 0000000..b2c24d6 --- /dev/null +++ b/asmrun/power64-elf.S @@ -0,0 +1,486 @@ +/*********************************************************************/ +/* */ +/* Objective Caml */ +/* */ +/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ +/* */ +/* Copyright 1996 Institut National de Recherche en Informatique et */ +/* en Automatique. All rights reserved. This file is distributed */ +/* under the terms of the GNU Library General Public License, with */ +/* the special exception on linking described in file ../LICENSE. */ +/* */ +/*********************************************************************/ + +/* $Id: ocaml-3.11.0-ppc64.patch,v 1.1 2008/11/20 15:30:55 rjones Exp $ */ + +#define Addrglobal(reg,glob) \ + addis reg, 0, glob@ha; \ + addi reg, reg, glob@l +#define Loadglobal(reg,glob,tmp) \ + addis tmp, 0, glob@ha; \ + ld reg, glob@l(tmp) +#define Storeglobal(reg,glob,tmp) \ + addis tmp, 0, glob@ha; \ + std reg, glob@l(tmp) + + .section ".text" + +/* Invoke the garbage collector. */ + + .globl caml_call_gc + .type caml_call_gc, @function + .section ".opd","aw" + .align 3 +caml_call_gc: + .quad .L.caml_call_gc,.TOC.@tocbase + .previous + .align 2 +.L.caml_call_gc: + /* Set up stack frame */ + mflr 0 + std 0, 16(1) + /* Record return address into Caml code */ + Storeglobal(0, caml_last_return_address, 11) + /* Record lowest stack address */ + Storeglobal(1, caml_bottom_of_stack, 11) + /* 0x220 = 8*32 (int regs) + 8*32 (float regs) + 48 (stack frame) */ + stdu 1, -0x230(1) + /* Record pointer to register array */ + addi 0, 1, 8*32 + 48 + Storeglobal(0, caml_gc_regs, 11) + /* Save current allocation pointer for debugging purposes */ + Storeglobal(31, caml_young_ptr, 11) + /* Save exception pointer (if e.g. a sighandler raises) */ + Storeglobal(29, caml_exception_pointer, 11) + /* Save all registers used by the code generator */ + addi 11, 1, 8*32 + 48 - 8 + stdu 3, 8(11) + stdu 4, 8(11) + stdu 5, 8(11) + stdu 6, 8(11) + stdu 7, 8(11) + stdu 8, 8(11) + stdu 9, 8(11) + stdu 10, 8(11) + stdu 14, 8(11) + stdu 15, 8(11) + stdu 16, 8(11) + stdu 17, 8(11) + stdu 18, 8(11) + stdu 19, 8(11) + stdu 20, 8(11) + stdu 21, 8(11) + stdu 22, 8(11) + stdu 23, 8(11) + stdu 24, 8(11) + stdu 25, 8(11) + stdu 26, 8(11) + stdu 27, 8(11) + stdu 28, 8(11) + addi 11, 1, 48 - 8 + stfdu 1, 8(11) + stfdu 2, 8(11) + stfdu 3, 8(11) + stfdu 4, 8(11) + stfdu 5, 8(11) + stfdu 6, 8(11) + stfdu 7, 8(11) + stfdu 8, 8(11) + stfdu 9, 8(11) + stfdu 10, 8(11) + stfdu 11, 8(11) + stfdu 12, 8(11) + stfdu 13, 8(11) + stfdu 14, 8(11) + stfdu 15, 8(11) + stfdu 16, 8(11) + stfdu 17, 8(11) + stfdu 18, 8(11) + stfdu 19, 8(11) + stfdu 20, 8(11) + stfdu 21, 8(11) + stfdu 22, 8(11) + stfdu 23, 8(11) + stfdu 24, 8(11) + stfdu 25, 8(11) + stfdu 26, 8(11) + stfdu 27, 8(11) + stfdu 28, 8(11) + stfdu 29, 8(11) + stfdu 30, 8(11) + stfdu 31, 8(11) + /* Call the GC */ + std 2,40(1) + Addrglobal(11, caml_garbage_collection) + ld 2,8(11) + ld 11,0(11) + mtlr 11 + blrl + ld 2,40(1) + /* Reload new allocation pointer and allocation limit */ + Loadglobal(31, caml_young_ptr, 11) + Loadglobal(30, caml_young_limit, 11) + /* Restore all regs used by the code generator */ + addi 11, 1, 8*32 + 48 - 8 + ldu 3, 8(11) + ldu 4, 8(11) + ldu 5, 8(11) + ldu 6, 8(11) + ldu 7, 8(11) + ldu 8, 8(11) + ldu 9, 8(11) + ldu 10, 8(11) + ldu 14, 8(11) + ldu 15, 8(11) + ldu 16, 8(11) + ldu 17, 8(11) + ldu 18, 8(11) + ldu 19, 8(11) + ldu 20, 8(11) + ldu 21, 8(11) + ldu 22, 8(11) + ldu 23, 8(11) + ldu 24, 8(11) + ldu 25, 8(11) + ldu 26, 8(11) + ldu 27, 8(11) + ldu 28, 8(11) + addi 11, 1, 48 - 8 + lfdu 1, 8(11) + lfdu 2, 8(11) + lfdu 3, 8(11) + lfdu 4, 8(11) + lfdu 5, 8(11) + lfdu 6, 8(11) + lfdu 7, 8(11) + lfdu 8, 8(11) + lfdu 9, 8(11) + lfdu 10, 8(11) + lfdu 11, 8(11) + lfdu 12, 8(11) + lfdu 13, 8(11) + lfdu 14, 8(11) + lfdu 15, 8(11) + lfdu 16, 8(11) + lfdu 17, 8(11) + lfdu 18, 8(11) + lfdu 19, 8(11) + lfdu 20, 8(11) + lfdu 21, 8(11) + lfdu 22, 8(11) + lfdu 23, 8(11) + lfdu 24, 8(11) + lfdu 25, 8(11) + lfdu 26, 8(11) + lfdu 27, 8(11) + lfdu 28, 8(11) + lfdu 29, 8(11) + lfdu 30, 8(11) + lfdu 31, 8(11) + /* Return to caller, restarting the allocation */ + Loadglobal(0, caml_last_return_address, 11) + addic 0, 0, -16 /* Restart the allocation (4 instructions) */ + mtlr 0 + /* Say we are back into Caml code */ + li 12, 0 + Storeglobal(12, caml_last_return_address, 11) + /* Deallocate stack frame */ + ld 1, 0(1) + /* Return */ + blr + .size .L.caml_call_gc,.-.L.caml_call_gc + +/* Call a C function from Caml */ + + .globl caml_c_call + .type caml_c_call, @function + .section ".opd","aw" + .align 3 +caml_c_call: + .quad .L.caml_c_call,.TOC.@tocbase + .previous + .align 2 +.L.caml_c_call: + .cfi_startproc + /* Save return address */ + mflr 25 + .cfi_register lr,25 + /* Get ready to call C function (address in 11) */ + ld 2, 8(11) + ld 11,0(11) + mtlr 11 + /* Record lowest stack address and return address */ + Storeglobal(1, caml_bottom_of_stack, 12) + Storeglobal(25, caml_last_return_address, 12) + /* Make the exception handler and alloc ptr available to the C code */ + Storeglobal(31, caml_young_ptr, 11) + Storeglobal(29, caml_exception_pointer, 11) + /* Call the function (address in link register) */ + blrl + /* Restore return address (in 25, preserved by the C function) */ + mtlr 25 + /* Reload allocation pointer and allocation limit*/ + Loadglobal(31, caml_young_ptr, 11) + Loadglobal(30, caml_young_limit, 11) + /* Say we are back into Caml code */ + li 12, 0 + Storeglobal(12, caml_last_return_address, 11) + /* Return to caller */ + blr + .cfi_endproc + .size .L.caml_c_call,.-.L.caml_c_call + +/* Raise an exception from C */ + + .globl caml_raise_exception + .type caml_raise_exception, @function + .section ".opd","aw" + .align 3 +caml_raise_exception: + .quad .L.caml_raise_exception,.TOC.@tocbase + .previous + .align 2 +.L.caml_raise_exception: + /* Reload Caml global registers */ + Loadglobal(29, caml_exception_pointer, 11) + Loadglobal(31, caml_young_ptr, 11) + Loadglobal(30, caml_young_limit, 11) + /* Say we are back into Caml code */ + li 0, 0 + Storeglobal(0, caml_last_return_address, 11) + /* Pop trap frame */ + ld 0, 8(29) + ld 1, 16(29) + mtlr 0 + ld 2, 24(29) + ld 29, 0(29) + /* Branch to handler */ + blr + .size .L.caml_raise_exception,.-.L.caml_raise_exception + +/* Start the Caml program */ + + .globl caml_start_program + .type caml_start_program, @function + .section ".opd","aw" + .align 3 +caml_start_program: + .quad .L.caml_start_program,.TOC.@tocbase + .previous + .align 2 +.L.caml_start_program: + Addrglobal(12, caml_program) + +/* Code shared between caml_start_program and caml_callback */ +.L102: + /* Allocate and link stack frame */ + mflr 0 + std 0, 16(1) + stdu 1, -0x190(1) /* 48 + 8*36(regs) + 32(callback) + 32(exc) */ + /* Save return address */ + /* Save all callee-save registers */ + /* GPR 14 ... GPR 31 then FPR 14 ... FPR 31 starting at sp+16 */ + addi 11, 1, 48-8 + stdu 14, 8(11) + stdu 15, 8(11) + stdu 16, 8(11) + stdu 17, 8(11) + stdu 18, 8(11) + stdu 19, 8(11) + stdu 20, 8(11) + stdu 21, 8(11) + stdu 22, 8(11) + stdu 23, 8(11) + stdu 24, 8(11) + stdu 25, 8(11) + stdu 26, 8(11) + stdu 27, 8(11) + stdu 28, 8(11) + stdu 29, 8(11) + stdu 30, 8(11) + stdu 31, 8(11) + stfdu 14, 8(11) + stfdu 15, 8(11) + stfdu 16, 8(11) + stfdu 17, 8(11) + stfdu 18, 8(11) + stfdu 19, 8(11) + stfdu 20, 8(11) + stfdu 21, 8(11) + stfdu 22, 8(11) + stfdu 23, 8(11) + stfdu 24, 8(11) + stfdu 25, 8(11) + stfdu 26, 8(11) + stfdu 27, 8(11) + stfdu 28, 8(11) + stfdu 29, 8(11) + stfdu 30, 8(11) + stfdu 31, 8(11) + /* Set up a callback link */ + Loadglobal(9, caml_bottom_of_stack, 11) + Loadglobal(10, caml_last_return_address, 11) + Loadglobal(11, caml_gc_regs, 11) + std 9, 0x150(1) + std 10, 0x158(1) + std 11, 0x160(1) + /* Build an exception handler to catch exceptions escaping out of Caml */ + bl .L103 + b .L104 +.L103: + mflr 0 + addi 29, 1, 0x170 /* Alignment */ + std 0, 8(29) + std 1, 16(29) + std 2, 24(29) + Loadglobal(11, caml_exception_pointer, 11) + std 11, 0(29) + /* Reload allocation pointers */ + Loadglobal(31, caml_young_ptr, 11) + Loadglobal(30, caml_young_limit, 11) + /* Say we are back into Caml code */ + li 0, 0 + Storeglobal(0, caml_last_return_address, 11) + /* Call the Caml code */ + std 2,40(1) + ld 2,8(12) + ld 12,0(12) + mtlr 12 +.L105: + blrl + ld 2,40(1) + /* Pop the trap frame, restoring caml_exception_pointer */ + ld 9, 0x170(1) + Storeglobal(9, caml_exception_pointer, 11) + /* Pop the callback link, restoring the global variables */ +.L106: + ld 9, 0x150(1) + ld 10, 0x158(1) + ld 11, 0x160(1) + Storeglobal(9, caml_bottom_of_stack, 12) + Storeglobal(10, caml_last_return_address, 12) + Storeglobal(11, caml_gc_regs, 12) + /* Update allocation pointer */ + Storeglobal(31, caml_young_ptr, 11) + /* Restore callee-save registers */ + addi 11, 1, 48-8 + ldu 14, 8(11) + ldu 15, 8(11) + ldu 16, 8(11) + ldu 17, 8(11) + ldu 18, 8(11) + ldu 19, 8(11) + ldu 20, 8(11) + ldu 21, 8(11) + ldu 22, 8(11) + ldu 23, 8(11) + ldu 24, 8(11) + ldu 25, 8(11) + ldu 26, 8(11) + ldu 27, 8(11) + ldu 28, 8(11) + ldu 29, 8(11) + ldu 30, 8(11) + ldu 31, 8(11) + lfdu 14, 8(11) + lfdu 15, 8(11) + lfdu 16, 8(11) + lfdu 17, 8(11) + lfdu 18, 8(11) + lfdu 19, 8(11) + lfdu 20, 8(11) + lfdu 21, 8(11) + lfdu 22, 8(11) + lfdu 23, 8(11) + lfdu 24, 8(11) + lfdu 25, 8(11) + lfdu 26, 8(11) + lfdu 27, 8(11) + lfdu 28, 8(11) + lfdu 29, 8(11) + lfdu 30, 8(11) + lfdu 31, 8(11) + /* Return */ + ld 1,0(1) + /* Reload return address */ + ld 0, 16(1) + mtlr 0 + blr + + /* The trap handler: */ +.L104: + /* Update caml_exception_pointer */ + Storeglobal(29, caml_exception_pointer, 11) + /* Encode exception bucket as an exception result and return it */ + ori 3, 3, 2 + b .L106 + .size .L.caml_start_program,.-.L.caml_start_program + +/* Callback from C to Caml */ + + .globl caml_callback_exn + .type caml_callback_exn, @function + .section ".opd","aw" + .align 3 +caml_callback_exn: + .quad .L.caml_callback_exn,.TOC.@tocbase + .previous + .align 2 +.L.caml_callback_exn: + /* Initial shuffling of arguments */ + mr 0, 3 /* Closure */ + mr 3, 4 /* Argument */ + mr 4, 0 + ld 12, 0(4) /* Code pointer */ + b .L102 + .size .L.caml_callback_exn,.-.L.caml_callback_exn + + + .globl caml_callback2_exn + .type caml_callback2_exn, @function + .section ".opd","aw" + .align 3 +caml_callback2_exn: + .quad .L.caml_callback2_exn,.TOC.@tocbase + .previous + .align 2 +.L.caml_callback2_exn: + mr 0, 3 /* Closure */ + mr 3, 4 /* First argument */ + mr 4, 5 /* Second argument */ + mr 5, 0 + Addrglobal(12, caml_apply2) + b .L102 + .size .L.caml_callback2_exn,.-.L.caml_callback2_exn + + + .globl caml_callback3_exn + .type caml_callback3_exn, @function + .section ".opd","aw" + .align 3 +caml_callback3_exn: + .quad .L.caml_callback3_exn,.TOC.@tocbase + .previous + .align 2 +.L.caml_callback3_exn: + mr 0, 3 /* Closure */ + mr 3, 4 /* First argument */ + mr 4, 5 /* Second argument */ + mr 5, 6 /* Third argument */ + mr 6, 0 + Addrglobal(12, caml_apply3) + b .L102 + .size .L.caml_callback3_exn,.-.L.caml_callback3_exn + +/* Frame table */ + + .section ".data" + .globl caml_system__frametable + .type caml_system__frametable, @object +caml_system__frametable: + .quad 1 /* one descriptor */ + .quad .L105 + 4 /* return address into callback */ + .short -1 /* negative size count => use callback link */ + .short 0 /* no roots here */ + .align 3 + diff --git a/asmrun/stack.h b/asmrun/stack.h index a801405..59a7bf2 100644 --- a/asmrun/stack.h +++ b/asmrun/stack.h @@ -47,6 +47,15 @@ #define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size)) #endif +#ifdef TARGET_power64 +#define Saved_return_address(sp) *((intnat *)((sp) +16)) +#define Already_scanned(sp, retaddr) ((retaddr) & 1) +#define Mark_scanned(sp, retaddr) (Saved_return_address(sp) = (retaddr) | 1) +#define Mask_already_scanned(retaddr) ((retaddr) & ~1) +#define Trap_frame_size 0x150 +#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size)) +#endif + #ifdef TARGET_arm #define Saved_return_address(sp) *((intnat *)((sp) - 4)) #define Callback_link(sp) ((struct caml_context *)((sp) + 8)) diff --git a/configure b/configure index eafd6d9..2f48daf 100755 --- a/configure +++ b/configure @@ -685,6 +685,7 @@ case "$host" in arch=i386; system=macosx fi;; i[3456]86-*-gnu*) arch=i386; system=gnu;; + powerpc64-*-linux*) arch=power64; model=ppc64; system=elf;; powerpc*-*-linux*) arch=power; model=ppc; system=elf;; powerpc-*-netbsd*) arch=power; model=ppc; system=elf;; powerpc-*-rhapsody*) arch=power; model=ppc; system=rhapsody;; @@ -756,6 +757,8 @@ case "$arch,$model,$system" in aspp='gcc -c';; power,*,elf) as='as -u -m ppc' aspp='gcc -c';; + power64,*,elf) as='as -u -m ppc64' + aspp='gcc -c';; power,*,bsd) as='as' aspp='gcc -c';; power,*,rhapsody) as="as -arch $model" -- 1.7.10.1