Part A
Task 2: ReLU
Concept
这一节需要我们实现 relu
函数,该函数输入一个数组和数组长度,需要把数组内的负数元素置为 0。
Code
loop_start:
addi sp,sp,-4
sw s0,0(sp)
li t0,0
loop:
beq t0,a1,loop_end # if t0 == length go
slli t1 , t0, 2 # offset, t1 = t0 * 4
add s0,a0,t1 # s0 = &a0[t0]
lw t2,0(s0) # t2 = a0[t0]
bge t2,x0,loop_continue # if t2 >= 0 go
sw x0,0(s0) # else a0[t0] = 0
loop_continue:
addi t0,t0,1 # t0 = t0 + 1
j loop
loop_end:
# Epilogue
lw s0,0(sp)
addi sp,sp,4
ret
error:
li a0,36
j exit
Task 3: Argmax
Concept
argmax
需要我们输入一个数组和数组长度,返回该数组中最大元素的下标。
Code
.globl argmax
.text
# =================================================================
# FUNCTION: Given a int array, return the index of the largest
# element. If there are multiple, return the one
# with the smallest index.
# Arguments:
# a0 (int*) is the pointer to the start of the array
# a1 (int) is the # of elements in the array
# Returns:
# a0 (int) is the first index of the largest element
# Exceptions:
# - If the length of the array is less than 1,
# this function terminates the program with error code 36
# =================================================================
argmax:
# Prologue
li t0,1
blt a1,t0,error
loop_start:
addi sp,sp,-4
sw s0,0(sp)
li t0,1
li t3,0 # max_index = 0
lw t4,0(a0) # max_value = num[0]
loop:
beq t0,a1,loop_end # if t0 == length go
slli t1 , t0, 2 # offset, t1 = t0 * 4
add s0,a0,t1 # s0 = &num[t0]
lw t2,0(s0) # t2 = num[t0]
blt t2,t4,loop_continue # if t2 < max_value go
mv t3,t0 # else max_index = t0
mv t4,t2 # max_value = t4
loop_continue:
addi,t0,t0,1
j loop
loop_end:
# Epilogue
mv a0,t3
lw s0,0(sp)
addi sp,sp,4
ret
error:
li a0,36
j exit
Task 4: Dot Product
Concept
这一节将输入两个数组,每个数组以输入的 stride 遍历 n 个元素,把每轮的两个元素相乘的结果最后相加返回。
Code
.globl dot
.text
# =======================================================
# FUNCTION: Dot product of 2 int arrays
# Arguments:
# a0 (int*) is the pointer to the start of arr0
# a1 (int*) is the pointer to the start of arr1
# a2 (int) is the number of elements to use
# a3 (int) is the stride of arr0
# a4 (int) is the stride of arr1
# Returns:
# a0 (int) is the dot product of arr0 and arr1
# Exceptions:
# - If the length of the array is less than 1,
# this function terminates the program with error code 36
# - If the stride of either array is less than 1,
# this function terminates the program with error code 37
# =======================================================
dot:
li t0,1
blt a2,t0,error1
blt a3,t0,error2
blt a4,t0,error2
loop_start:
addi sp,sp,-8
sw s0,0(sp)
sw s1,4(sp)
mv s0,a0
mv s1,a1
li t0,0 # for loop
li t1,4
mul t1,t1,a3 # offset for a0
li t2 4
mul t2,t2,a4 # offset for a1
li t6,0 # sum
loop:
beq t0,a2,loop_end
lw t3,0(s0)
add s0,s0,t1 # s0 = &a0[t0]
lw t4,0(s1)
add s1,s1,t2 # s1 = &a1[t0]
mul t5,t3,t4
add t6,t6,t5
addi t0,t0,1
j loop
loop_end:
mv a0,t6
lw s0,0(sp)
lw s1,4(sp)
addi sp,sp,8
ret
error1:
li a0,36
j exit
error2:
li a0,37
j exit
Task 5: Matrix Multiplication
Concept
这一节需要我们用汇编实现矩阵乘法。
对于一个矩阵而言,我们完全可以把它看作一个数组,只不过是会根据横向和纵向决定其排列顺序。
如上图,我们把矩阵看作一个数组,每轮矩阵相乘可以看作一次 dot 运算(见上一节),其中 a2 为 a0 的列和 a1 的行,a3 为 1,a4 为 a1 的行(说起来有些抽象,画个图能很好理解)。
这样我们可以通过两层循环来进行计算,每轮调用一次 dot
函数,再根据具体情况对数组进行偏移。
需要注意的是在调用外部函数之前,我们需要保存 temp 寄存器和 a 寄存器的一些值以防。
Code
.globl matmul
.text
# =======================================================
# FUNCTION: Matrix Multiplication of 2 integer matrices
# d = matmul(m0, m1)
# Arguments:
# a0 (int*) is the pointer to the start of m0
# a1 (int) is the # of rows (height) of m0
# a2 (int) is the # of columns (width) of m0
# a3 (int*) is the pointer to the start of m1
# a4 (int) is the # of rows (height) of m1
# a5 (int) is the # of columns (width) of m1
# a6 (int*) is the pointer to the the start of d
# Returns:
# None (void), sets d = matmul(m0, m1)
# Exceptions:
# Make sure to check in top to bottom order!
# - If the dimensions of m0 do not make sense,
# this function terminates the program with exit code 38
# - If the dimensions of m1 do not make sense,
# this function terminates the program with exit code 38
# - If the dimensions of m0 and m1 don't match,
# this function terminates the program with exit code 38
# =======================================================
matmul:
# Error checks
li t0,1
blt a1,t0,error
blt a2,t0,error
blt a4,t0,error
blt a5,t0,error
bne a2,a4,error
# Prologue
li t0,0 # t0 for all loop count
outer_loop_start:
#init
li t1,0 # t1 for outer_loop count
outer_loop:
beq t1,a1,outer_loop_end
inner_loop_start:
#init
li t2,0 # t2 for inner_loop count
mv t4,a3 # t4 = &a3
inner_loop:
beq t2,a5,inner_loop_end
addi sp,sp,-52
sw ra,0(sp)
sw a0,4(sp)
sw a1,8(sp)
sw a2,12(sp)
sw a3,16(sp)
sw a4,20(sp)
sw a5,24(sp)
sw t0,28(sp)
sw t1,32(sp)
sw t2,36(sp)
sw t3,40(sp)
sw t4,44(sp)
sw a6,48(sp)
mv a1,t4
li a3,1
mv a4,a5
jal ra,dot # dot(a0,a3,a2,1,a5)
lw t0,28(sp)
lw a6,48(sp)
slli t0,t0,2
add t3,a6,t0 # t3 = &a6[t0]
sw a0,0(t3) # a6[t0] = sum
srli t0,t0,2
lw ra,0(sp)
lw a0,4(sp)
lw a1,8(sp)
lw a2,12(sp)
lw a3,16(sp)
lw a4,20(sp)
lw a5,24(sp)
lw t1,32(sp)
lw t2,36(sp)
lw t3,40(sp)
lw t4,44(sp)
addi sp,sp,52
addi t0,t0,1
addi t2,t2,1
addi t4,t4,4
j inner_loop
inner_loop_end:
slli a2,a2,2
add a0,a0,a2
srli a2,a2,2
addi t1,t1,1
j outer_loop
outer_loop_end:
ret
error:
li a0,38
j exit
Task 6: Testing
这一节和之前相反,是给出了我们汇编代码,需要我们阅读之前的 test framework 来编写 python 调用 asm 的测试用例。
Part B
Part B 主要设计一些对文件读写的操作,
Task 7: Read Matrix
从前面几个小节我们知道,矩阵可以用数组的形式存储,只是我们需要提前给出行列值。下图为一个矩阵在文件中以16进制打开的形式,其中第一个字节为行数,第二个字节为列数:
🌀 read-matrix-1 [main] ⚡ xxd -e input.bin
00000000: 00000003 00000003 00000001 00000002 ................
00000010: 00000003 00000004 00000005 00000006 ................
00000020: 00000007 00000008 00000009 ............
Appendix 里有我们可能需要调用函数的全部 api ,对着写即可:
.globl read_matrix
.text
# ==============================================================================
# FUNCTION: Allocates memory and reads in a binary file as a matrix of integers
#
# FILE FORMAT:
# The first 8 bytes are two 4 byte ints representing the # of rows and columns
# in the matrix. Every 4 bytes afterwards is an element of the matrix in
# row-major order.
# Arguments:
# a0 (char*) is the pointer to string representing the filename
# a1 (int*) is a pointer to an integer, we will set it to the number of rows
# a2 (int*) is a pointer to an integer, we will set it to the number of columns
# Returns:
# a0 (int*) is the pointer to the matrix in memory
# Exceptions:
# - If malloc returns an error,
# this function terminates the program with error code 26
# - If you receive an fopen error or eof,
# this function terminates the program with error code 27
# - If you receive an fclose error or eof,
# this function terminates the program with error code 28
# - If you receive an fread error or eof,
# this function terminates the program with error code 29
# ==============================================================================
read_matrix:
# Prologue
addi sp, sp, -20
sw ra, 0(sp)
sw s0, 4(sp)
sw s1, 8(sp) # store the rows
sw s2,12(sp) # store the columns
sw s3,16(sp)
# Epilogue
mv s1,a1
mv s2,a2
li a1,0 #only-read
jal ra,fopen
li t0,-1
beq a0, t0, fopen_error # if a0 == t0 then fopen_error
#read the rows
mv s0,a0 #store the file descriptor
mv a1,s1
li a2,4
mv s3,a2
jal ra,fread
bne s3, a0, fread_error # if a2 != t0 then fread_error
#read the columns
mv a0,s0 #restore the file descriptor
mv a1,s2
li a2,4
mv s3,a2
jal ra,fread
bne s3,a0, fread_error # if a2 != t0 then fread_error
lw t1,0(s1) # rows
lw t2,0(s2) # columns
mul s1,t1,t2
slli s1,s1,2 # the size of the memory to be allocated
mv a0,s1
jal ra,malloc
li t0,0
beq a0,t0,malloc_error
mv s2,a0
mv a1,s2
mv a0,s0
mv a2,s1
jal ra,fread
bne s1,a0,fread_error
mv a0,s0
jal ra,fclose
bnez a0,fclose_error
mv a0,s2
lw ra, 0(sp)
lw s0, 4(sp)
lw s1, 8(sp)
lw s2,12(sp)
lw s3,16(sp)
addi sp, sp, 20
ret
fopen_error:
li a0, 27
j exit
fread_error:
li a0, 29
j exit
malloc_error:
li a0, 26
j exit
fclose_error:
li a0, 28
j exit
Task 8: Write Matrix
这一节和上节差不多,只是从读变成了写
.globl write_matrix
.text
# ==============================================================================
# FUNCTION: Writes a matrix of integers into a binary file
# FILE FORMAT:
# The first 8 bytes of the file will be two 4 byte ints representing the
# numbers of rows and columns respectively. Every 4 bytes thereafter is an
# element of the matrix in row-major order.
# Arguments:
# a0 (char*) is the pointer to string representing the filename
# a1 (int*) is the pointer to the start of the matrix in memory
# a2 (int) is the number of rows in the matrix
# a3 (int) is the number of columns in the matrix
# Returns:
# None
# Exceptions:
# - If you receive an fopen error or eof,
# this function terminates the program with error code 27
# - If you receive an fclose error or eof,
# this function terminates the program with error code 28
# - If you receive an fwrite error or eof,
# this function terminates the program with error code 30
# ==============================================================================
write_matrix:
# Prologue
addi sp, sp, -24
sw ra, 0(sp)
sw s0, 4(sp)
sw s1, 8(sp)
sw s2,12(sp)
sw s3,16(sp)
sw s4,20(sp)
# Epilogue
mv s1,a1 #pointer to matrix
mv s2,a2 #rows
mv s3,a3 #columns
li a1,1 #write
jal ra,fopen
li t0,-1
beq a0, t0, fopen_error
mv s0,a0
li a0,8
jal ra,malloc
beqz a0,malloc_error
#write the rows and columns
mv s4,a0
sw s2,0(s4)
sw s3,4(s4)
mv a1,s4
mv a0,s0
li a2,2
mv s4,a2
li a3,4
jal ra,fwrite
bne a0,s4,fwrite_error
mv a0,s0
mv a1,s1
mul a2,s2,s3
mv s4,a2
li a3,4
jal ra,fwrite
bne a0,s4,fwrite_error
mv a0,s0
jal ra,fclose
bnez a0,fclose_error
lw ra, 0(sp)
lw s0, 4(sp)
lw s1, 8(sp)
lw s2,12(sp)
lw s3,16(sp)
lw s4,20(sp)
addi sp, sp, 24
ret
malloc_error:
li a0, 26
j exit
fopen_error:
li a0, 27
j exit
fclose_error:
li a0, 28
j exit
fwrite_error:
li a0, 30
j exit
PREVIOUS『 CMU-15445 』课程小结
NEXT『 CS61C 』 Labs