『 CS61C 』Project 2 Classify

Part A

Task 2: ReLU

Concept

这一节需要我们实现 relu 函数,该函数输入一个数组和数组长度,需要把数组内的负数元素置为 0。

image-20221007122605265

Code

loop_start:
	addi sp,sp,-4
	sw s0,0(sp)
	li t0,0

loop:
	beq t0,a1,loop_end      # if  t0 == length go
	slli t1 , t0, 2	        # offset, t1 = t0 * 4
	add s0,a0,t1            # s0 = &a0[t0]
	lw t2,0(s0)	            # t2 = a0[t0]
	bge t2,x0,loop_continue # if t2 >= 0 go
	sw x0,0(s0)             # else a0[t0] = 0
loop_continue:
	addi t0,t0,1            # t0 = t0 + 1
	j loop
loop_end:
	# Epilogue
	lw s0,0(sp)
	addi sp,sp,4
	ret
error:
	li a0,36
	j exit

Task 3: Argmax

Concept

argmax 需要我们输入一个数组和数组长度,返回该数组中最大元素的下标。

Code

.globl argmax

.text
# =================================================================
# FUNCTION: Given a int array, return the index of the largest
#   element. If there are multiple, return the one
#   with the smallest index.
# Arguments:
#   a0 (int*) is the pointer to the start of the array
#   a1 (int)  is the # of elements in the array
# Returns:
#   a0 (int)  is the first index of the largest element
# Exceptions:
#   - If the length of the array is less than 1,
#     this function terminates the program with error code 36
# =================================================================
argmax:
	# Prologue
	li t0,1
	blt a1,t0,error

loop_start:
	addi sp,sp,-4
	sw s0,0(sp)
	li t0,1
	li t3,0	                # max_index = 0
	lw t4,0(a0)             # max_value = num[0]

loop:
	beq t0,a1,loop_end      # if  t0 == length go
	slli t1 , t0, 2	        # offset, t1 = t0 * 4
	add s0,a0,t1            # s0 = &num[t0]
	lw t2,0(s0)	            # t2 = num[t0]
	blt t2,t4,loop_continue # if t2 < max_value  go
	mv t3,t0                # else max_index = t0
	mv t4,t2                #      max_value = t4
loop_continue:
	addi,t0,t0,1
	j loop
loop_end:
	# Epilogue
	mv a0,t3
	lw s0,0(sp)
	addi sp,sp,4
	ret

error:
	li a0,36
	j exit

Task 4: Dot Product

Concept

这一节将输入两个数组,每个数组以输入的 stride 遍历 n 个元素,把每轮的两个元素相乘的结果最后相加返回。

image-20221007125341085

Code

.globl dot

.text
# =======================================================
# FUNCTION: Dot product of 2 int arrays
# Arguments:
#   a0 (int*) is the pointer to the start of arr0
#   a1 (int*) is the pointer to the start of arr1
#   a2 (int)  is the number of elements to use
#   a3 (int)  is the stride of arr0
#   a4 (int)  is the stride of arr1
# Returns:
#   a0 (int)  is the dot product of arr0 and arr1
# Exceptions:
#   - If the length of the array is less than 1,
#     this function terminates the program with error code 36
#   - If the stride of either array is less than 1,
#     this function terminates the program with error code 37
# =======================================================
dot:
    li t0,1
    blt a2,t0,error1
    blt a3,t0,error2
    blt a4,t0,error2


loop_start:
    addi sp,sp,-8
    sw s0,0(sp)     
    sw s1,4(sp)
    mv s0,a0
    mv s1,a1
    li t0,0           # for loop
    li t1,4
    mul t1,t1,a3      # offset for a0
    li t2 4
    mul t2,t2,a4      # offset for a1
    li t6,0           # sum
loop:
    beq t0,a2,loop_end 
    lw t3,0(s0)
    add s0,s0,t1      # s0 = &a0[t0]
    lw t4,0(s1)
    add s1,s1,t2      # s1 = &a1[t0]
    mul t5,t3,t4
    add t6,t6,t5
    addi t0,t0,1
    j loop 
loop_end:
    mv a0,t6
    lw s0,0(sp)
    lw s1,4(sp)
    addi sp,sp,8
    ret
error1:
	li a0,36
	j exit
error2:
	li a0,37
	j exit
    

Task 5: Matrix Multiplication

Concept

这一节需要我们用汇编实现矩阵乘法。

对于一个矩阵而言,我们完全可以把它看作一个数组,只不过是会根据横向和纵向决定其排列顺序。

image-20221013155628163

image-20221007211215736

如上图,我们把矩阵看作一个数组,每轮矩阵相乘可以看作一次 dot 运算(见上一节),其中 a2 为 a0 的列和 a1 的行,a3 为 1,a4 为 a1 的行(说起来有些抽象,画个图能很好理解)。

这样我们可以通过两层循环来进行计算,每轮调用一次 dot 函数,再根据具体情况对数组进行偏移。

需要注意的是在调用外部函数之前,我们需要保存 temp 寄存器和 a 寄存器的一些值以防。

Code

.globl matmul
.text
# =======================================================
# FUNCTION: Matrix Multiplication of 2 integer matrices
#   d = matmul(m0, m1)
# Arguments:
#   a0 (int*)  is the pointer to the start of m0
#   a1 (int)   is the # of rows (height) of m0
#   a2 (int)   is the # of columns (width) of m0
#   a3 (int*)  is the pointer to the start of m1
#   a4 (int)   is the # of rows (height) of m1
#   a5 (int)   is the # of columns (width) of m1
#   a6 (int*)  is the pointer to the the start of d
# Returns:
#   None (void), sets d = matmul(m0, m1)
# Exceptions:
#   Make sure to check in top to bottom order!
#   - If the dimensions of m0 do not make sense,
#     this function terminates the program with exit code 38
#   - If the dimensions of m1 do not make sense,
#     this function terminates the program with exit code 38
#   - If the dimensions of m0 and m1 don't match,
#     this function terminates the program with exit code 38
# =======================================================
matmul:

	# Error checks
   li t0,1
   blt a1,t0,error
   blt a2,t0,error
   blt a4,t0,error
   blt a5,t0,error
   bne a2,a4,error

	# Prologue
    li t0,0       # t0 for all loop count 

outer_loop_start:
    #init
    li t1,0       # t1 for outer_loop count

outer_loop:
    beq t1,a1,outer_loop_end

inner_loop_start:
    #init 
    li t2,0       # t2 for inner_loop count 
    mv t4,a3      # t4 = &a3
inner_loop:
    beq t2,a5,inner_loop_end
    addi sp,sp,-52
    sw ra,0(sp)
    sw a0,4(sp)
    sw a1,8(sp)
    sw a2,12(sp)
    sw a3,16(sp)
    sw a4,20(sp)
    sw a5,24(sp)
    sw t0,28(sp)
    sw t1,32(sp)
    sw t2,36(sp)
    sw t3,40(sp)
    sw t4,44(sp)
    sw a6,48(sp)
    mv a1,t4
    li a3,1
    mv a4,a5
    jal ra,dot    # dot(a0,a3,a2,1,a5) 
    lw t0,28(sp)
    lw a6,48(sp)
    slli t0,t0,2
    add t3,a6,t0  # t3 = &a6[t0]
    sw a0,0(t3)   # a6[t0] = sum
    srli t0,t0,2
    lw ra,0(sp)
    lw a0,4(sp)
    lw a1,8(sp)
    lw a2,12(sp)
    lw a3,16(sp)
    lw a4,20(sp)
    lw a5,24(sp)
    lw t1,32(sp)
    lw t2,36(sp)
    lw t3,40(sp)
    lw t4,44(sp)
    addi sp,sp,52
    addi t0,t0,1
    addi t2,t2,1 
    addi t4,t4,4
    j inner_loop 
inner_loop_end:
    slli a2,a2,2
    add a0,a0,a2
    srli a2,a2,2
    addi t1,t1,1
    j outer_loop 

outer_loop_end:
	ret
error:
    li a0,38
    j exit

Task 6: Testing

这一节和之前相反,是给出了我们汇编代码,需要我们阅读之前的 test framework 来编写 python 调用 asm 的测试用例。

Part B

Part B 主要设计一些对文件读写的操作,

Task 7: Read Matrix

​ 从前面几个小节我们知道,矩阵可以用数组的形式存储,只是我们需要提前给出行列值。下图为一个矩阵在文件中以16进制打开的形式,其中第一个字节为行数,第二个字节为列数:

🌀  read-matrix-1 [main] ⚡  xxd -e input.bin 
00000000: 00000003 00000003 00000001 00000002  ................
00000010: 00000003 00000004 00000005 00000006  ................
00000020: 00000007 00000008 00000009           ............

Appendix 里有我们可能需要调用函数的全部 api ,对着写即可:

.globl read_matrix

.text
# ==============================================================================
# FUNCTION: Allocates memory and reads in a binary file as a matrix of integers
#
# FILE FORMAT:
#   The first 8 bytes are two 4 byte ints representing the # of rows and columns
#   in the matrix. Every 4 bytes afterwards is an element of the matrix in
#   row-major order.
# Arguments:
#   a0 (char*) is the pointer to string representing the filename
#   a1 (int*)  is a pointer to an integer, we will set it to the number of rows
#   a2 (int*)  is a pointer to an integer, we will set it to the number of columns
# Returns:
#   a0 (int*)  is the pointer to the matrix in memory
# Exceptions:
#   - If malloc returns an error,
#     this function terminates the program with error code 26
#   - If you receive an fopen error or eof,
#     this function terminates the program with error code 27
#   - If you receive an fclose error or eof,
#     this function terminates the program with error code 28
#   - If you receive an fread error or eof,
#     this function terminates the program with error code 29
# ==============================================================================
read_matrix:

	# Prologue
	addi sp, sp, -20
	sw ra, 0(sp) 
	sw s0, 4(sp)
	sw s1, 8(sp)  # store the rows
	sw s2,12(sp)  # store the columns
	sw s3,16(sp)
		
	# Epilogue
	mv s1,a1
	mv s2,a2
	li a1,0    #only-read
	jal ra,fopen
	li t0,-1
	beq a0, t0, fopen_error # if a0 == t0 then fopen_error
	
	#read the rows
	mv s0,a0   #store the file descriptor 
	mv a1,s1
	li a2,4
	mv s3,a2
	jal ra,fread
	bne s3, a0, fread_error # if a2 != t0 then fread_error

	#read the columns
	mv a0,s0   #restore the file descriptor 
	mv a1,s2
	li a2,4
	mv s3,a2 
	jal ra,fread
	bne s3,a0, fread_error # if a2 != t0 then fread_error

	lw t1,0(s1)   # rows
	lw t2,0(s2)   # columns
	mul s1,t1,t2 
	slli s1,s1,2  # the size of the memory to be allocated 

	mv a0,s1
	jal ra,malloc
	li t0,0
	beq a0,t0,malloc_error

	mv s2,a0
	mv a1,s2
	mv a0,s0
	mv a2,s1
	jal ra,fread
	bne s1,a0,fread_error

	mv a0,s0
	jal ra,fclose
	bnez a0,fclose_error
	mv a0,s2

	lw ra, 0(sp) 
	lw s0, 4(sp)
	lw s1, 8(sp)  
	lw s2,12(sp) 
	lw s3,16(sp)
	addi sp, sp, 20

	ret

fopen_error:
	li a0, 27
	j exit

fread_error:
	li a0, 29
	j exit

malloc_error:
	li a0, 26
	j exit

fclose_error:
	li a0, 28
	j exit

Task 8: Write Matrix

这一节和上节差不多,只是从读变成了写

.globl write_matrix

.text
# ==============================================================================
# FUNCTION: Writes a matrix of integers into a binary file
# FILE FORMAT:
#   The first 8 bytes of the file will be two 4 byte ints representing the
#   numbers of rows and columns respectively. Every 4 bytes thereafter is an
#   element of the matrix in row-major order.
# Arguments:
#   a0 (char*) is the pointer to string representing the filename
#   a1 (int*)  is the pointer to the start of the matrix in memory
#   a2 (int)   is the number of rows in the matrix
#   a3 (int)   is the number of columns in the matrix
# Returns:
#   None
# Exceptions:
#   - If you receive an fopen error or eof,
#     this function terminates the program with error code 27
#   - If you receive an fclose error or eof,
#     this function terminates the program with error code 28
#   - If you receive an fwrite error or eof,
#     this function terminates the program with error code 30
# ==============================================================================
write_matrix:

	# Prologue
	addi sp, sp, -24
	sw ra, 0(sp) 
	sw s0, 4(sp)
	sw s1, 8(sp) 
	sw s2,12(sp)  
	sw s3,16(sp)
	sw s4,20(sp)

	# Epilogue
	mv s1,a1    #pointer to matrix
	mv s2,a2    #rows
	mv s3,a3    #columns
	li a1,1     #write
	jal ra,fopen
	li t0,-1
	beq a0, t0, fopen_error
	mv s0,a0

	li a0,8
	jal ra,malloc
	beqz a0,malloc_error

	#write the rows and columns
	mv s4,a0
	sw s2,0(s4)
	sw s3,4(s4)
	mv a1,s4
	mv a0,s0
	li a2,2
	mv s4,a2
	li a3,4
	jal ra,fwrite
	bne a0,s4,fwrite_error

	mv a0,s0
	mv a1,s1
	mul a2,s2,s3
	mv s4,a2
	li a3,4
	jal ra,fwrite
	bne a0,s4,fwrite_error

	mv a0,s0
	jal ra,fclose
	bnez a0,fclose_error

	lw ra, 0(sp) 
	lw s0, 4(sp)
	lw s1, 8(sp)  
	lw s2,12(sp) 
	lw s3,16(sp)
	lw s4,20(sp)
	addi sp, sp, 24
	ret

malloc_error:
	li a0, 26
	j exit
fopen_error:
	li a0, 27
	j exit

fclose_error:
	li a0, 28
	j exit

fwrite_error:
	li a0, 30
	j exit