assembly


Calcolatori Elettronici e Sistemi Operativi
ARM
Registers rules (ARM Procedure Call Standard [APCS]):
name register use
R0 <--> R0 function result (on function return) and first argument (on function entry)
R1 <--> R1 second function argument (caller saved)
R2 <--> R2 third function argument function argument (caller saved)
R3 <--> R3 forth function argument function argument (caller saved)
R4 <--> R4 local variable (callee saved)
Assembly
...
R8 <--> R9 local variable (callee saved)
R9 <--> R9 local variable (callee saved) (or platform specific data as stack base [static])
R10 <--> R10 local variable (callee saved) (or platform specific data as stack limit [static])
fp <--> R11 frame pointer
ip <--> R12 intra-procedure call scratch (can be used for local vars)
sp <--> R13 stack pointer
lr <--> R14 return address (HW)
pc <--> R15 program counter (HW)
Arguments above 4th: on stack
MIPS Execution flow
Registers rules (MIPS call convention):
Examples
nome registro uso tipico
zero <--> R0 0 (HW constant)
at <--> R1
v0 <--> R2 function result (lowest 32 bits)
v1 <--> R3 function result (highestest 32 bits, if needed)
a0 <--> R4 first function argument
MAX2
a1 <--> R5 second function argument
a2 <--> R6 third function argument
return maximum between 2 integers
a3 <--> R7 forth function argument
t0 <--> R8 temporary (not saved)
...
t7 <--> R15 temporary (not saved)
s0 <--> R16 temporary (saved)
...
MAX3
s7 <--> R23 temporary (saved)
t8 <--> R24 temporary (not saved)
return maximum among 3 integers
t9 <--> R25 temporary (not saved)
k0 <--> R26 reserved for kernel
k1 <--> R27 reserved for kernel
Arguments above 4th: on stack
gp <--> R28 global data pointer
sp <--> R29 stack pointer
fp <--> R30 frame pointer
ra <--> R31 return address (HW)
MAX2: ARM - (1) MAX2: ARM - (2)
.global max2 .global max2
.type max2, %function // not mandatory .type max2, %function // not mandatory
int max2(int a, int b) int max2(int a, int b)
{ {
if (a>b) return a; if (a>b) return a;
max2: max2:
else return b; else return b;
} cmp r0, r1 // if a>b } cmp r0, r1 // if a>b
ble ELSE bgt ENDF // return a
THEN: // unused label mov r0, r1 // else return b
mov r0, r0 // return a ENDF:
mov pc, lr mov pc, lr
not needed
ELSE:
mov r0, r1 // return b
return to caller
mov pc, lr
MAX2: ARM - (3) ARM condition suffixes
.global max2
.type max2, %function // not mandatory
int max2(int a, int b)
{
Main mnemonics
if (a>b) return a;
else return b; max2:
} cmp r0, r1 // if ! a>b
EQ: equal
movle r0, r1 // return b
mov pc, lr
NE: not equal
LT: less than (for signed values)
conditional execution
LE: less or equal (for signed values)
GT: greater than (for signed values)
GE: greater or equal (for signed values)
HI: greater than (for unsigned values)
LS: less or equal (for unsigned values)
MAX2: MIPS - (1) MAX2: MIPS - (2)
.set noreorder .set noreorder
.globl max2 .globl max2
int max2(int a, int b) int max2(int a, int b)
{ {
if (a>b) return a; if (a>b) return a;
max2: max2:
else return b; else return b;
} slt $t0, $a1, $a0 // t0 <- (a>b) } slt $t0, $a1, $a0 // t0 <- (a>b)
beq $t0, $zero, ELSE beq $t0, $zero, ELSE
nop nop
THEN: // unused label THEN: // unused label
move $v0, $a0 // return a ($v0<-$a0) j $ra // j $ra
j $ra // j $ra move $v0, $a0 // return a ($v0<-$a0)
nop
use delay slot
ELSE: ELSE:
move $v0, $a1 // return b ($v0<-$a1) j $ra // j $ra
j $ra // j $ra move $v0, $a1 // return b ($v0<-$a1)
nop
zero <--> R0 0 (costant) zero <--> R0 0 (costant)
at <--> R1 at <--> R1
v0 <--> R2 retval (LO) v0 <--> R2 retval (LO)
v1 <--> R3 retval (HI) v1 <--> R3 retval (HI)
a0 <--> R4 arg 1 a0 <--> R4 arg 1
a1 <--> R5 arg 2 a1 <--> R5 arg 2
MAX2: MIPS - (3) MAX2: MIPS - (3)
.set noreorder .set noreorder
.globl max2 .globl max2
int max2(int a, int b) int max2(int a, int b)
{ {
if (a>b) return a; if (a>b) return a;
max2: max2:
else return b; else return b;
} slt $t0, $a1, $a0 // t0 <- (a>b) } slt $8, $5, $4 // t0 <- (a>b)
bne $t0, $zero, ENDF bne $8, $0, ENDF
move $v0, $a0 // return a ($v0<-$a0) move $2, $4 // return a ($v0<-$a0)
move $v0, $a1 // return b ($v0<-$a1) move $2, $5 // return b ($v0<-$a1)
use delay slot use delay slot
ENDF: ENDF:
j $ra // j $ra j $31 // j $ra
nop nop
zero <--> R0 0 (costant) zero <--> R0 0 (costant)
at <--> R1 at <--> R1
v0 <--> R2 retval (LO) v0 <--> R2 retval (LO)
Explicit register names
v1 <--> R3 retval (HI) v1 <--> R3 retval (HI)
a0 <--> R4 arg 1 a0 <--> R4 arg 1
a1 <--> R5 arg 2 a1 <--> R5 arg 2
MAX3 - (1) MAX3 - (2)
.set noreorder
MIPS
int max3a(int a, int b, int int max3a(int a, int b, int
.globl max3
c) c)
{ {
max3:
.set noreorder
if (a>b) /* m = MAX(a,b) */ MIPS
slt $t0, $a1, $a0 //t0 <- (a>b) (1)
.globl max3
if (a>c) return a; /* return MAX(m,c) */
beq $t0, $zero, ELSE1
else return c; int m;
nop
max3:
else if (a>b) m=a;
THEN1 // unused label
slt $t0, $a1, $a0 // t0 <- (a>b)
if (b>c) return b; else m=b;
slt $t0, $a2, $a0 //t0 <- (a>c) (2)
beq $t0, $zero, M1
else return c; if (m>c) return m;
beq $t0, $zero, ELSE2
move $v0, $a1
} else return c;
nop
move $v0, $a0 // $v0 <- max(a,b)
}
THEN2: // unused label
M1:
j $ra
slt $t0, $a2, $v0 // t0 <- (v0>c)
.global max3
move $v0, $a0 //return a
ARM
bne $t0, $zero, ENDF
ELSE2:
nop // move $v0, $v0
max3:
j $ra
move $v0, $a2 // $v0 <- max(v0,c)
cmp r0, r1 // if a>b
.global max3
move $v0, $a2 //return c
ENDF:
ARM
ble ELSE
ELSE1:
j $ra
THEN: // unused label
max3:
slt $t0, $a2, $a1 //t0 <- (b>c) (3)
nop
cmp r0, r2
cmp r0, r1
beq $t0, $zero, ELSE3
movlt r0, r2 // if ! a>c r0 <- c
movlt r0, r1 // r0 <- max(a,b)
nop
mov pc, lr // return (res. in r0)
cmp r0, r2
THEN3: // unused label
ELSE1:
movlt r0, r2 // r0 <- max(r0,c)
j $ra
cmp r1, r2
mov pc, lr // return
move $v0, $a1 //return b
movge r0, r1 // if b >= c r0 <- b
ELSE3:
movlt r0, r2 // if b < c r0 <- c
j $ra
mov pc, lr // return (res. in r0)
move $v0, $a2 //return c
Arrays and stack ARM Addressing Modes
LDMDA (Decrement After) <==> LDMFA (Full Ascending)
LDMIA (Increment After) <==> LDMFD (Full Descending)
Examples
LDMDB (Decrement Before) <==> LDMEA (Empty Ascending)
strcpy
es. ldmdb fp,{r4, r5} <==> ldmea fp,{r4, r5}
uses strings LDMIB (Increment Before) <==> LDMED (Empty Descending)
factorial
function (recursive) call
STMDA (Decrement After) <==> STMED (Empty Descending)
Mul
STMIA (Increment After) <==> STMEA (Empty Ascending)
function call
STMDB (Decrement Before) <==> STMFD (Full Descending)
mcd es. stmdb sp!,{r4, r5} <==> stmfd sp!, {r4, r5}
STMIB (Increment Before) <==> STMFA (Full Ascending)
function call
Stack ARM Addressing Modes
stmfd sp! , {r4, r5, fp, ip, lr}
sp sp
sp
sp r4 r4 r4 r4
stmfd: store multiple full-descending (decrement before)
r5 r5 r5 r5
r11 (fp) r11 (fp) r11 (fp) r11 (fp)
start address = sp - 4·5 (5 registers to store)
r12 (ip) r12 (ip) r12 (ip) r12 (ip)
r14 (lr) r14 (lr) r14 (lr) r14 (lr)
! update index register
sp
sp
r15 (pc) r15 (pc) r15 (pc) r15 (pc)
sp sp
sp = sp - 4·5
save
Full Descending Empy Descending Empty Ascending Full Ascending
from r4 sp r4
r5 r5
here
ST: decrement before ST: decrement after ST: increment after ST: increment before
r11 (fp) r11 (fp)
LD: increment after LD: increment before LD: decrement before LD: decrement after
r12 (ip) r12 (ip)
r14 (lr) r14 (lr)
sp sp
STMFD STMED STMEA STMFA
LDMFD LDMED LDMEA LDMFA
STMDB STMDA STMIA STMIB
saving order is fixed (registers are saved in increasing order) and cannot be varied
e.g.: stmfd sp!, {r5, r4} is not valid
LDMIA LDMIB LDMDB LDMDA
ARM Addressing Modes ARM Addressing Modes
stmfd sp! , {r4, r5, fp, ip, lr} full-descending (decrement before) stmfd sp! , {r4, r5, fp, ip, lr} full-descending (decrement before)
start address = sp - 4·5 (5 registers to store) start address = sp - 4·5 (5 registers to store)
restore restore
sp r4 r4 sp r4 r4
from from
sp
r5 r5 r5 r5
here here
r11 (fp) r11 (fp) r11 (fp) r11 (fp)
r12 (ip) r12 (ip) r12 (ip) r12 (ip)
r14 (lr) r14 (lr) r14 (lr) r14 (lr)
sp sp fp
sp fp fp
ldmfd sp! , {r4, r5, fp, ip, lr} full-descending (increment after) ldmea fp , {r4, r5, fp, ip, lr} empty-ascending (decrement before)
start address = sp start address = fp - 4·5 (5 registers to restore)
sp = sp + 4·5 fp fixed (sp can change during the execution)
ARM stack frame ARM: function prologue/epilogue
No stack frame allocation: local vars in r0-r3 - no other functions called
...
function body
Registers lr must be saved if
...
another function is called.
local-1
mov pc, lr return to caller
local
local-2
variables
local-3
For r4-r14, only actually
r4-r10
saved
(if changed) modified registers are saved.
changed
fp (old)
registers
Stack frame allocation (to save local registers): local vars in r0-r6 (with or without functions calling)
lr
Registers r1-r3 must not be saved.
fp (after frame allocation)
sp (on entry)
caller
stmfd sp!, {r4, r5, r6, lr}
save data (r4, r5, r6 are used by function save)
stack frame
Register r0 is the return value.
...
function body
...
ldmfd sp!, {r4, r5, r6, pc}
restore registers and return to caller
Function arguments above fourth
are in the caller stack frame.
sp r4
Position of fp is not a standard
r5 stack
r6
frame
lr
sp
ARM: function prologue/epilogue ARM: function prologue/epilogue
Stack frame allocation (save local registers, make room for other local variables, use the frame pointer)
Stack frame allocation: local vars in r0-r5 and on stack
mov ip, sp
save current value of sp
stmfd sp!, {r4, r5, lr} save data (r4, r5 are used by function save) stmfd sp!, {r4, r5, fp, lr} save data (r5, r4 are used by function save)
sub sp, sp, #4
allocate room on stack (4 bytes in this example)
sub fp, ip, #0
compute the stack-frame address
...
function body
sub sp, sp, #8 allocate local variables
...
...function body (sp may change)
add sp, sp, #4
unroll sp
add sp, sp, #8
ldmfd sp!, {r4, r5, pc} fp fp: restore old frame pointer
restore registers and return to caller
ldmfd sp!, {r4, r5, fp, pc} restore registers
lr pc: return to caller
sp
local-1
r4
stack
r5
frame
lr
sp
local2
sp
local1
sp r4 r4 stack
r5 r5
frame
Stack frame allocation (save lr only): local vars in r0-r3 another function is called
r11 (fp) r11 (fp)
r14 (lr) r14 (lr)
str lr, [sp, #-4]!
save lr
sp fp
...
function body
...
ldr pc, [sp], #4
return to caller
ip can be used as a scratch register on function enter
stack frame
increasing addresses
ARM: function prologue/epilogue ARM: function prologue/epilogue
Stack frame allocation (save local registers, make room for other local variables, use the frame pointer) Stack frame allocation (save local registers, make room for other local variables, use the frame pointer)
mov ip, sp stmfd sp!, {r4, r5, fp, lr}
save current value of sp
save data (r4, r5 are used by function save)
stmfd sp!, {r4, r5, fp, lr} save data (r5, r4 are used by function save)
add fp, sp, #16 compute the stack-frame address
sub fp, ip, #4 sub sp, sp, #8
allocate local variables
compute the stack-frame address
...
sub sp, sp, #8 allocate local variables
function body (sp may change)
...
...function body (sp may change)
add sp, sp, #8
add sp, sp, #8 fp fp: restore old frame pointer
fp fp: restore old frame pointer ldmfd sp!, {r4, r5, fp, pc}
restore registers
ldmfd sp!, {r4, r5, fp, pc} restore registers
lr pc: return to caller
lr pc: return to caller
sp sp
local2 local2
local1 local1
sp r4 r4 stack sp r4 r4 stack
r5 r5 r5 r5
frame frame
r11 (fp) r11 (fp) r11 (fp) r11 (fp)
fp
r14 (lr) r14 (lr) r14 (lr) r14 (lr)
sp sp fp fp
Using fp:
ip can be used as a scratch register on function enter
local variables are at fixed offsets from fp (local1: [fp-20] local2: [fp-24])
Rules for fp can differ (e.g., point to the upper used location)
ARM: post/pre - increment/decrement MIPS stack frame
The frame must be 8-byte aligned
stack frame
Access with no pointer changes:
fp can be used as frame pointer.
sp fp
arg 1
arg 2
outgoing
(after frame allocation)
ldr / ldrb / str / strb Rd, [Rs, #offset]
Outgoing args (if needed):
arg 3
arguments
arg 4
args passed in registers are not written
Example: ldr R4, [R6, #20] -- R4 <= MEM[R6+20]
arg 5
ptr to on stack by the caller (but room is
global ptr
global data
allocated).
local 1
local vars
local 2
Access with pre-change on pointer Ptr to global: not mandatory if not changed
saved
s0-s7 Space for local vars is 8-byte aligned
ldr / ldrb / str / strb Rd, [Rs, #offset]! registers fp
ra Space for saved registers is 8-byte aligned
Example: ldr R2, [R1, #4]! -- R1 <= R1+4 ; R2 <= MEM[R1]
arg 1
input sp (on entry)
Other blocks can be 8-byte aligned (not
arg 2
arguments
mandatory)
Space for s0-s7 must be reserved and used
Access with post-change on pointer
only for registers actually modified.
ldr / ldrb / str / strb Rd, [Rs], #offset ra must be saved for non-leaf functions.
Example: ldrb R7, [R0], #4 -- R7(lsb) <= MEM[R0] ; R0 <= R0+4
Position of fp is not a standard.
increasing addresses
MIPS: function prologue/epilogue MIPS: function prologue/epilogue
No stack frame allocation: local vars in $t0-$t9 (and $a0-$a3, $v0, $v1) - no other functions called Stack frame allocation: other functions called
subu $sp,$sp,24 allocate frame (8-byte aligned)
... sp
arg 1
function body
sw $ra,20($sp)
save $ra
...
arg 2
...
function body
arg 3
j $ra return to caller ...
stack
arg 4
restore $ra
lw $ra,20($sp)
frame
j $ra return to caller and restore $sp
addu $sp,$sp,24
$ra
sp
Stack frame allocation (to save local registers): a local var in $s0 - no other functions called
Stack frame allocation: save a local register ($s0) - other functions called
subu $sp,$sp,8 allocate frame (8-byte aligned) subu $sp,$sp,24 allocate frame (8-byte aligned)
sw $ra,20($sp)
save registers
sw $s0,4($sp) save $s0
sw $s0,16($sp)
...
function body ...
... function body
sp
arg 1
...
lw $s0,4($sp)
restore registers
arg 2
j $ra lw $ra,20($sp) restore registers
return to caller and restore $sp
arg 3
addu $sp,$sp,8 lw $s0,26($sp)
stack
arg 4
j $ra return to caller and restore $sp
frame
addu $sp,$sp,24
sp padding stack $s0
frame
$s0 $ra
sp
sp
MIPS: function prologue/epilogue MIPS: memory access
Stack frame allocation: save a local register ($s0)  use locals on stack - other functions called
subu $sp,$sp,32 allocate frame (8-byte aligned)
sp
arg 1
sw $ra,28($sp) lw / lb / sw / sb $Rd, offset($Rs)
save registers
arg 2
sw $s0,24($sp)
... arg 3
function body
stack
... Examples:
local_var is 24($sp) arg 4
frame
lw $ra,28($sp) restore registers
local_var
lw $s0,24($sp)
lw $a0, 12($a4) -- $a0 <= MEM[$a4+12]
j $ra $s0
return to caller and restore $sp
addu $sp,$sp,32 $ra
lb $a3, 0($a2) -- $a3(lsb) <= MEM[$a2]
sp
using a frame pointer: sw $v1, 4($t0) -- MEM[$t0+4] <= $v1
subu $sp,$sp,40 allocate frame and set frame ptr
sp fp
arg 1
sw $fp,32($sp)
arg 2
move $fp,$sp
arg 3
sw $ra,36($sp)
save registers
stack
sw $s0,28($sp)
arg 4
frame
...
function body
...
local_var is 20($fp)
local_var
A cycle delay is needed before loaded data are ready (use nop to
lw $ra,36($sp)
restore registers
avoid hazards)
$s0
lw $fp,32($sp)
lw $s0,28($sp)
$fp
j $ra
$ra
return to caller and restore $sp
addu $sp,$sp,40 sp
strcpy: C strcpy: ARM assembly
my_strcpy:
void my_strcpy(char *dest, char *src)
{
on function entry:
on function entry:
while (*src) *(dest++) = *(src++);
r0 = dest
r0 = dest
*dest=0;
r1 = src
} r1 = src
setup stack frame
setup stack frame
local_1 <= r0 [dest]
local_1 <- r0
local_2 <= r1 [src]
local_2 <- r1
loop:
loop:
if (*local_2 == 0) goto endloop
r1 <- local_2 ; r2 <- MEM[r1] ; if (r2== 0) goto endloop
*local_1 <= *local_2
r1 <- local_1 ; r2 <- local_2 ; r3 <- MEM[r2] ; MEM[r1] <- r3
local_1++
r1++ ; local_1 <- r1 ; r2++ ; local_2 <- r2
local_2++
goto loop
goto loop
endloop:
endloop:
*local_1 <= 0
r1 <-local_1 ; r2 <- 0 ; MEM[r1] <- r2
return (restore stack frame)
return (restore stack frame)
strcpy: ARM assembly strcpy: ARM assembly
my_strcpy: my_strcpy:
using local vars allocate stack frame
stmfd sp!, {fp, lr} ; save on stack regs that will be modified
choice: use frame pointer
add fp, sp, #8 ; compute fp (base of stack frame)
sub sp, sp, #8 ; reserve room on stack for local vars
save on stack frame:
; (8 bytes)
fp (frame pointer of caller)
; local_1: MEM[fp-12] local_2: MEM[fp-16]
lr return address (not really needed, since there are no function calls)
str r0, [fp, #-12] ; local_1 <- dest (dest is in r0)
allocate on stack frame:
str r1, [fp, #-16] ; local_2 <- src (src is in r1)
local1
loop:
local2
ldr r1, [fp, #-16] ; r1 <- local_2
ldrb r2, [r1, #0] ; r2 <- *local_2
cmp r2, #0 ; compare *local_2 and 0
beq endloop ; jump to endloop label
sp
local2
fp-16
local1
fp-12
stack
r11 (fp)
frame
r14 (lr)
sp sp fp
strcpy: ARM assembly strcpy: ARM assembly (opt)
ldr r2, [fp, #-16] ; r2 <- local_2 my_strcpy:
ldr r1, [fp, #-12] ; r1 <- local_1
ldrb r3, [r2, #0] ; r3 <- MEM[r2] (r3 <- *local_2
; load only one byte)
on function entry: on function entry:
strb r3, [r1, #0] ; MEM[r1] <- r3 (*local_1 <- *local_2
r0 = dest r0 = dest
; write only one byte)
r1 = src r1 = src
add r1, r1, #1 ; r1 <- r1+1 (r1 <- local_1+1)
str r1, [fp, #-12] ; local_1 <- r1 (update local_1)
add r2, r2, #1 ; r2 <- r2+1 (r2 <- local_2+1)
setup stack frame setup stack frame
str r2, [fp, #-16] ; local_2 <- r2 (update local_2)
if (*r1 == 0) goto endloop r3 <- MEM[r1] ; if (r3 == 0) goto endloop
b loop ; jump back to loop
loop: loop:
endloop:
ldr r1, [fp, #-12] ; r1 <- MEM[fp-12] (r1 <- local_1) r0++ <= r3 MEM[r0++] <- r3
r3 <= *r1++ r3 <- MEM[r1++]
mov r2, #0 ; r2 <- 0
if (r3!=0) goto loop if (r3!=0) goto loop
strb r2, [r1, #0] ; MEM[r1] <- r2 (*local_1 <- 0)
return:
endloop: endloop:
add sp, sp, #8 ;  delete local vars
*r0 <= 0 r3 <- 0 ; MEM[r0] <- r3
ldmfd sp!, {fp, pc} ; restore registers from stack
; (pc is loaded with the previous value
return (restore stack frame) return (restore stack frame)
; of lr)
strcpy: ARM assembly (opt) strcpy: MIPS assembly
my_strcpy: my_strcpy:
choice: save args on stack
ldrb r3, [r1, #0] ; r3 <- MEM[r1] (r3 <- *src)
choice: use frame pointer
cmp r3, #0 ; compare *src and 0
save on stack frame:
beq endloop ; if r3==0 jump to endloop label
fp (frame pointer of caller)
loop:
strb r3, [r0], #1 ; MEM[r0] <- r3 ; r0 <- r0 + 1 (*(dest++) <-r3 )
save on stack:
ldrb r3, [r1, #1]! ; r3 <- MEM[r1+1] ; r1<-r1+1 (r3 <- *++src)
dest ($a0)
cmp r3, #0 ; compare r3 and 0
src ($a1)
bne loop ; if r3 != 0 jump back to loop label
endloop:
mov r3, #0 ; r3 <- 0
strb r3, [r0, #0] ; MEM[r0] <- r3 (*dest <- 0)
mov pc, lr ; return
no local vars, no function calls stack frame is not required
sp
dest
function
src
arguments
strcpy: MIPS assembly strcpy: MIPS assembly
my_strcpy: lw $a0,12($fp) ; a0 <- src
subu $sp,$sp,8 ; reserve room on stack (8 bytes: fp and ra)
lw $v0,8($fp) ; v0 <- dest
sw $fp,0($sp) ; save fp
lbu $a3,0($a0) ; a3 <- *src (load one byte)
nop
move $fp,$sp ; fp <- sp
sb $a3,0($v0) ; *dest <- a3 (*dest <- *src write one byte)
sw $a0,8($fp) ; save first function argument (in MEM[fp+8])
addu $a0,$a0,1 ; a0 <- a0+1
sw $a1,12($fp) ; save second function argument (in MEM[fp+12])
sw $a0,12($fp) ; src <- a0 (src++)
loop:
addu $v0,$v0,1 ; v0 <- v0+1
lw $v0,12($fp) ; v0 <- src
sw $v0,8($fp) ; dest <- v0 (dest++)
nop ; wait for memory read
j loop ; jump back to loop label
lb $v1,0($v0) ; v1 <- *src
nop
nop ; wait for memory read
endloop:
lw $v0,8($fp) ; v0 <- dest
beq $v1,$zero,endloop ; if v1==0 jump to endloop label
nop
nop ; delay slot (jump slot)
sb $zero,0($v0) ; *dest <- 0
return:
move $sp,$fp ; restore sp
sp
saved
fp
lw $fp,0($sp) ; restore fp from stack frame
fp
registers
ra
addu $sp,$sp,8 ; free memory used reserved for local variables
sp
dest: fp+8
dest
function
j $ra ; return
src: fp+12
src
arguments
nop
strcpy: MIPS assembly (opt) ARM: multiplication / division
my_strcpy:
loop:
Multiplication: instruction mul
lb $v0,0($a1) ; v0 <- MEM[a1] (v0 <- *src)
nop
mul R0, R1, R2 -- R0 <= R1*R2
beq $v0,$zero,endloop ; if se v0==0 jump to endloop
addu $a1,$a1,1 ; a1 <- a1+1 (src++) (use delay slot)
sb $v0,0($a0) ; MEM[a0] <- v0 (*dest <- v0)
j loop ; jump back to loop
Division/module: functions (__divsi3 and __modsi3)
addu $a0,$a0,1 ; a0 <- a0+1 (dest++) (use delay slot)
endloop:
j $ra ; return
res = __divsi3(a,b) -- res <= a/b
sb $zero,0($a0) ; *dest <- 0 (use delay slot)
mov R0, dividend mov R0, dividend
mov R1, divisor mov R1, divisor
bl __divsi3 bl __modsi3
-- in R0 the division result -- in R0 the module
MIPS: multiplication / division factorial: C
int factorial(int n)
Multiplication: instruction mult
{
if (n<0) return 0;
else if (n==0) return 1;
64-bit result in special registers hi and low
else return n*factorial(n-1);
}
mult $a0,$v1
mflo $a1 -- $a1 <= $a0 * $v1 (32 low bits)
Division/module: instruction div
Division result in lo, module in hi
div $t8,$t1
mflo $a1 -- $a1 <= $t8 / $t1
mfhi $a2 -- $a2 <= $t8 % $t1
factorial: ARM assembly factorial: MIPS assembly
factorial:
factorial:
subu $sp,$sp,24 ; reserve stack room
stmfd sp!, {r4, lr} ; save registers (r4 will be used)
; (memory for ra, s0, arguments for called f [4 word])
subs r4, r0, #0 ; r4 <- r0-0 and set flags
movlt r0, #0 ; if r0-0 < 0 (last operation that set flags) r0<-0
ldmltfd sp!, {r4, pc} ; if r0-0 < 0 (last operation that set flags) restore
; registers e return
compute:
sp
beq return ; if r0-0 = 0 jump to return
n
arguments
sp+4
sub r0, r4, #1 ; r0 <- r4-1 (argument for the next for called
sp+8
; function call)
function
sp+12
bl factorial ; recursive function call
saved
s0 sp+16
registers
mul r0, r4, r0 ; r0 <- r4·r0 (compute the result to return) ra
sp+20
sp sp
n n
ldmfd sp!, {r4, pc} ; restore registers and return function function
arguments arguments
return:
mov r0, #1 ; r0 <- 1
ldmfd sp!, {r4, pc} ; restore registers and return
factorial: MIPS assembly Mul: C
factorial:
subu $sp,$sp,24 ; reserve stack (room for ra, s0, args [4 word])
int Mul(unsigned int a, unsigned int b)
sw $s0,16($sp) ; save s0 (will be used)
{
if (b==0) return 0;
move $s0,$a0 ; s0 <- a0 (s0 <- n)
else if (b==1) return a;
bgez $s0,compute ; if s0>=0 jump to compute
else return a+Mul(a,b-1);
sw $ra,20($sp) ; save return address (use delay slot) }
j return ; jump to return
move $v0,$zero ; v0 <- 0 (use delay slot)
compute:
beq $s0,$zero,return ; if s0==zero jump to return
li $v0,1 ; v0 <- 1 (use delay slot)
jal factorial ; recursive function call
addu $a0,$s0,-1 ; a0 <- s0-1 (argument for the recursive function call)
; (use delay slot)
mult $s0,$v0 ; <- s0·v0 (hi e lo: special registers)
mflo $v0 ; v0 <- lo
return:
lw $ra,20($sp) ; restore ra
lw $s0,16($sp) ; restore s0
j $ra ; return
addu $sp,$sp,24 ; free allocated stack space
; (use delay slot)
Mul: ARM assembly Mul: MIPS assembly
Mul: Mul:
stmfd sp!, {r4, lr} subu $sp,$sp,24
sw $s0,16($sp)
mov r4, r0 ; save r0 (r0 will be written by the recursive call) move $s0,$a0 ; save a0 (a0 will be overwritten by the recursive call)
sw $ra,20($sp)
cmp r1, #0 ; b ==0 ?
moveq r0, r1 ; return 0 beq $a1,$zero,return ; b ==0 ?
ldmeqfd sp!, {r4, pc} move $v0,$zero ; return 0 (use delay slot)
li $v0,1
cmp r1, #1 ; b==1 ? beq $a1,$v0,label ; b ==1 ?
beq return ; return a (a is still in r0) move $a0,$s0 ; return a (a is in s0) (use delay slot)
sub r1, r1, #1 jal Mul
bl Mul ; r0 = Mul(a,b-1) addu $a1,$a1,-1 ; v0 = Mul(a,b-1) (use delay slot)
add r0, r4, r0 ; r0 = r0+a j return
ldmfd sp!, {r4, pc} ; return addu $v0,$s0,$v0 ; compute the return value: a+v0 (use delay slot)
return: label:
ldmfd sp!, {r4, pc} move $v0,$s0
return:
lw $ra,20($sp)
lw $s0,16($sp)
j $ra
addu $sp,$sp,24
mcd: C mcd: ARM assembly
mcd:
int mcd(int a, int b) stmfd sp!, {r4, r5, lr}
{
if (b==0) return a; mov r5, r0
if (a>b) return mcd(b, a%b); mov r4, r1
else return mcd(a, b%a); label:
} cmp r4, #0
moveq r0, r5
ldmeqfd sp!, {r4, r5, pc} ; b==0? return a
cmp r5, r4 ; set flags
movgt r0, r5
tail-end recursion: can easily become a loop
movgt r1, r4
movle r0, r4
movle r1, r5 ; r0=max(r4,r5) r1=min(r4,r5)
movgt r5, r4 ; r5=min(r4,r5) (for next computation iterations)
bl __modsi3 ; r0 = r0 % r1
mov r4, r0 ; r4 = r0 % r1
b label ; recursion (with no function call: tail end recursion)
; loop computes mcd(r4,r5)
Note:
the library function "__modsi3" computes the remainder between its two arguments
mcd: MIPS assembly Reversing
mcd_ricor:
label_3:
bne $a1,$zero,label_1 Notes:
To inspect:
slt $v0,$a1,$a0
1. when a division is performed, the quotient is
j $ra
stored into the special register  lo and the
move $v0,$a0
remainder is stored into the special register  hi
label_1:
unkwnown1
beq $v0,$zero,label_2
2. the instruction  mfhi loads the content of
nop
 hi into the indicated general purpose register
ARM assembly
div $zero,$a0,$a1
mfhi $v0
move $a0,$a1
j label_3
move $a1,$v0 ; a0=b; a1=a%b
unkwnown2
label_2: ; a1>=a0
MIPS assembly
div $zero,$a1,$a0
mfhi $v0
j label_3
move $a1,$v0 ; a0=a; a1=b%a
?: ARM assembly ?: MIPS assembly
unknown1: unknown2:
ldrb r2, [r0], #1 move $v0,$zero
ldrb r3, [r1], #1 lbu $v1,0($a0)
cmp r2, #0 nop
beq label_1
cmp r2, r3 beq $v1,$zero,label_3
beq unknown1 andi $a1,$a1,0x00ff
mvnls r0, #0 // ls: comparison result is < (unsigned values)
movhi r0, #1 // hi: comparison result is > (unsigned values) label_1:
mov pc, lr bne $v1,$a1,label_2
nop
label_1: addu $v0,$v0,1
mvn r0, #0 // mvn: mov negated (mov r0, not #0)
cmp r3, #0 label_2:
moveq r0, #0 addu $a0,$a0,1
mov pc, lr lbu $v1,0($a0)
nop
bne $v1,$zero,label_1
nop
label_3:
j $ra
nop


Wyszukiwarka

Podobne podstrony:
Assembly of outer membrane proteins in bacteria nad mitochondria
13 79 Pistons and connecting rods assembly
Assembly HOWTO pl 5 (2)
CA ARM Assembler Linker Testvector
Kurs Assemblera by Skowik3
bee hive 10 frame assembly jig
Assembler ENG
assembler intro
DUI0379C using the assembler
Assembly1
AVR Assembler User Guide
Rear Wheelhouse Assembly
bushwarbler assembly
assembly howto
assembly howto 5
instrukcje assemblera

więcej podobnych podstron