boost/SOURCES/boost-1.66-optimize-s390x-i...

352 lines
9.7 KiB
Diff

From 519786ec38bb5123547071021e4bd7678d7673ad Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.ibm.com>
Date: Mon, 23 Mar 2020 09:08:27 +0100
Subject: [PATCH 3/4] Optimize s390x instructions
---
src/asm/jump_s390x_sysv_elf_gas.S | 84 ++++++++++--------------------
src/asm/make_s390x_sysv_elf_gas.S | 27 ++++++----
src/asm/ontop_s390x_sysv_elf_gas.S | 81 +++++++++-------------------
3 files changed, 70 insertions(+), 122 deletions(-)
diff --git a/src/asm/jump_s390x_sysv_elf_gas.S b/src/asm/jump_s390x_sysv_elf_gas.S
index c011d53..b2163cc 100644
--- a/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
+++ b/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
@@ -36,47 +36,34 @@
.global jump_fcontext
.type jump_fcontext, @function
+#define GR_OFFSET 0
+#define LR_OFFSET 64
+#define SP_OFFSET 72
+#define FP_OFFSET 80
+#define PC_OFFSET 112
+#define L_CTX 120
+#define L_STACKFRAME 120
+
jump_fcontext:
# Reserved the space for stack to store the data of current context
# before we jump to the new context.
- lay 15,-120(15)
+ aghi %r15,-L_STACKFRAME
# save the registers to the stack
- stg 6, 0(15) # save R6
- stg 7, 8(15) # save R7
- stg 8, 16(15) # save R8
- stg 9, 24(15) # save R9
- stg 10, 32(15) # save R10
- stg 11, 40(15) # save R11
- stg 12, 48(15) # save R12
- stg 13, 56(15) # save R13
- stg 14, 64(15) # save R14
- stg 15, 72(15) # save R15
+ stmg %r6, %r15, GR_OFFSET(%r15)
# save the floating point registers
- # Load the FPR into R0 then save it to the stack
- # Load F1 into R0
- lgdr 0,1
- stg 0,80(15) # save F1
-
- # Load F3 into R0
- lgdr 0,3
- stg 0,88(15) # save F3
-
- # Load F5 into R0
- lgdr 0,5
- stg 0,96(15) # save F5
-
- # Load F7 into R0
- lgdr 0,7
- stg 0,104(15) # save F7
+ std %f0,FP_OFFSET(%r15)
+ std %f3,FP_OFFSET+8(%r15)
+ std %f5,FP_OFFSET+16(%r15)
+ std %f7,FP_OFFSET+24(%r15)
# Save LR as PC
- stg 14,112(15)
+ stg %r14,PC_OFFSET(%r15)
# Store the SP pointing to the old context-data into R0
- lgr 0,15
+ lgr %r0,%r15
# Get the SP pointing to the new context-data
# Note: Since the return type of the jump_fcontext is struct whose
@@ -88,46 +75,31 @@ jump_fcontext:
# R2 --> Address of the return transfer_t struct
# R3 --> Context we want to switch to
# R4 --> Data
- lgr 15,3
+ lgr %r15,%r3
# Load the registers with the data present in context-data of the
# context we are going to switch to
- lg 6, 0(15) # restore R6
- lg 7, 8(15) # restore R7
- lg 8, 16(15) # restore R8
- lg 9, 24(15) # restore R9
- lg 10, 32(15) # restore R10
- lg 11, 40(15) # restore R11
- lg 12, 48(15) # restore R12
- lg 13, 56(15) # restore R13
- lg 14, 64(15) # restore R14
+ lmg %r6, %r14, GR_OFFSET(%r15)
# Restore Floating point registers
- lg 1,80(15)
- ldgr 1,1 # restore F1
-
- lg 1,88(15)
- ldgr 1,3 # restore F3
-
- lg 1,96(15)
- ldgr 1,5 # restore F5
-
- lg 1,104(15)
- ldgr 1,7 # restore F7
+ ld %f1,FP_OFFSET(%r15)
+ ld %f3,FP_OFFSET+8(%r15)
+ ld %f5,FP_OFFSET+16(%r15)
+ ld %f7,FP_OFFSET+24(%r15)
# Load PC
- lg 1,112(15)
+ lg %r1,PC_OFFSET(%r15)
- # Adjust the stack
- lay 15, 120(15)
+ # Adjust the stack
+ aghi %r15,120
# R2 --> Address where the return transfer_t is stored
# R0 --> FCTX
# R4 --> DATA
# Store the elements to return transfer_t
- stg 15, 0(2)
- stg 4, 8(2)
+ stg %r15, 0(%r2)
+ stg %r4, 8(%r2)
# Note: The address in R2 points to the place where the return
# transfer_t is stored. Since context_function take transfer_t
@@ -135,7 +107,7 @@ jump_fcontext:
# first parameter value.
#jump to context
- br 1
+ br %r1
.size jump_fcontext,.-jump_fcontext
# Mark that we don't need executable stack.
diff --git a/src/asm/make_s390x_sysv_elf_gas.S b/src/asm/make_s390x_sysv_elf_gas.S
index f566533..d02856c 100644
--- a/libs/context/src/asm/make_s390x_sysv_elf_gas.S
+++ b/libs/context/src/asm/make_s390x_sysv_elf_gas.S
@@ -36,6 +36,14 @@
.global make_fcontext
.type make_fcontext, @function
+#define GR_OFFSET 0
+#define LR_OFFSET 64
+#define SP_OFFSET 72
+#define FP_OFFSET 80
+#define PC_OFFSET 112
+#define L_CTX 120
+#define L_STACKFRAME 120
+
make_fcontext:
# make_fcontext takes in 3 arguments
@@ -56,40 +64,39 @@ make_fcontext:
# address is zero or not. If not AND it with `-8`.
# Here we AND the lower 16 bits of the memory address present in the
- # R2 with the bits 1111 1111 1111 1000 which when converted into
- # decimal is 65528
- nill 2,65528
+ # R2 with the bits 1111 1111 1111 1000
+ nill %r2,0xfff0
# Reserve space for context-data on context-stack.
# This is done by shifting the SP/address by 112 bytes.
- lay 2,-120(2)
+ aghi %r2,-L_CTX
# third arg of make_fcontext() == address of the context-function
# Store the address as a PC to jump in, whenever we call the
# make_fcontext.
- stg 4,112(2)
+ stg %r4,PC_OFFSET(%r2)
# Save the address of finish as return-address for context-function
# This will be entered after context-function return
# The address of finish will be saved in Link register, this register
# specifies where we need to jump after the function executes
# completely.
- larl 1,finish
- stg 1,64(2)
+ larl %r1,finish
+ stg %r1,LR_OFFSET(%r2)
# Return pointer to context data
# R14 acts as the link register
# R2 holds the address of the context stack. When we return from the
# make_fcontext, R2 is passed back.
- br 14
+ br %r14
finish:
# In finish tasks, you load the exit code and exit the make_fcontext
# This is called when the context-function is entirely executed
- lghi 2,0
- brasl 14,_exit
+ lghi %r2,0
+ brasl %r14,_exit@PLT
.size make_fcontext,.-make_fcontext
# Mark that we don't need executable stack.
diff --git a/src/asm/ontop_s390x_sysv_elf_gas.S b/src/asm/ontop_s390x_sysv_elf_gas.S
index 7ab2cf5..4488654 100644
--- a/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
+++ b/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
@@ -36,47 +36,32 @@
.global ontop_fcontext
.type ontop_fcontext, @function
+#define GR_OFFSET 0
+#define LR_OFFSET 64
+#define SP_OFFSET 72
+#define FP_OFFSET 80
+#define PC_OFFSET 112
+#define L_CTX 120
+
ontop_fcontext:
# Reserved the space for stack to store the data of current context
# before we jump to the new context.
- lay 15,-120(15)
+ aghi %r15,-L_CTX
# save the registers to the stack
- stg 6, 0(15) # save R6
- stg 7, 8(15) # save R7
- stg 8, 16(15) # save R8
- stg 9, 24(15) # save R9
- stg 10, 32(15) # save R10
- stg 11, 40(15) # save R11
- stg 12, 48(15) # save R12
- stg 13, 56(15) # save R13
- stg 14, 64(15) # save R14
- stg 15, 72(15) # save R15
+ stmg %r6, %r15, GR_OFFSET(%r15)
# save the floating point registers
- # Load the FPR into R0 then save it to the stack
- # Load F1 into R0
- lgdr 0,1
- stg 0,80(15) # save F1
-
- # Load F3 into R0
- lgdr 0,3
- stg 0,88(15) # save F3
-
- # Load F5 into R0
- lgdr 0,5
- stg 0,96(15) # save F5
-
- # Load F7 into R0
- lgdr 0,7
- stg 0,104(15) # save F7
-
+ std %f0,FP_OFFSET(%r15)
+ std %f3,FP_OFFSET+8(%r15)
+ std %f5,FP_OFFSET+16(%r15)
+ std %f7,FP_OFFSET+24(%r15)
# Save LR as PC
- stg 14,112(15)
+ stg %r14,PC_OFFSET(%r15)
# Store the SP pointing to the old context-data into R0
- lgr 0,15
+ lgr %r0,%r15
# Get the SP pointing to the new context-data
# Note: Since the return type of the jump_fcontext is struct whose
@@ -88,38 +73,22 @@ ontop_fcontext:
# R2 --> Address of the return transfer_t struct
# R3 --> Context we want to switch to
# R4 --> Data
- lgr 15,3
+ lgr %r15,%r3
# Load the registers with the data present in context-data of the
# context we are going to switch to
- lg 6, 0(15) # restore R6
- lg 7, 8(15) # restore R7
- lg 8, 16(15) # restore R8
- lg 9, 24(15) # restore R9
- lg 10, 32(15) # restore R10
- lg 11, 40(15) # restore R11
- lg 12, 48(15) # restore R12
- lg 13, 56(15) # restore R13
- lg 14, 64(15) # restore R14
- lg 15, 72(15) # restore R15
+ lmg %r6,%r15,GR_OFFSET(%r15)
# Restore Floating point registers
- lg 1,80(15)
- ldgr 1,1 # restore F1
-
- lg 1,88(15)
- ldgr 1,3 # restore F3
-
- lg 1,96(15)
- ldgr 1,5 # restore F5
-
- lg 1,104(15)
- ldgr 1,7 # restore F7
+ ld %f1,FP_OFFSET(%r15)
+ ld %f3,FP_OFFSET+8(%r15)
+ ld %f5,FP_OFFSET+16(%r15)
+ ld %f7,FP_OFFSET+24(%r15)
# Skip PC
# Adjust the stack
- lay 15, 120(15)
+ aghi %r15,L_CTX
# R2 --> Address where the return transfer_t is stored
# R0 --> FCTX
@@ -127,8 +96,8 @@ ontop_fcontext:
# R5 --> Context function
# Store the elements to return transfer_t
- stg 15, 0(2)
- stg 4, 8(2)
+ stg %r15, 0(%r2)
+ stg %r4, 8(%r2)
# Note: The address in R2 points to the place where the return
# transfer_t is stored. Since context_function take transfer_t
@@ -136,7 +105,7 @@ ontop_fcontext:
# first parameter value.
#jump to context function
- br 5
+ br %r5
.size ontop_fcontext,.-ontop_fcontext
# Mark that we don't need executable stack.
--
2.18.1