Multithreaded matrix multiply closer to working, but segfaulting.
This commit is contained in:
@@ -4,11 +4,13 @@ HARPEM = ../harptool -E
|
|||||||
HARPDIS = ../harptool -D
|
HARPDIS = ../harptool -D
|
||||||
4BARCH = 4b16/16/2
|
4BARCH = 4b16/16/2
|
||||||
|
|
||||||
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin
|
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
|
||||||
|
matmul-mt.s
|
||||||
|
|
||||||
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out
|
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
|
||||||
|
matmul-mt.out
|
||||||
|
|
||||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d
|
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
|
||||||
|
|
||||||
%.4b.out : %.4b.bin
|
%.4b.out : %.4b.bin
|
||||||
$(HARPEM) -a $(4BARCH) -c $< > $@
|
$(HARPEM) -a $(4BARCH) -c $< > $@
|
||||||
@@ -40,6 +42,9 @@ dotprod.bin : boot.HOF lib.HOF dotprod.HOF
|
|||||||
matmul.bin : boot.HOF lib.HOF matmul.HOF
|
matmul.bin : boot.HOF lib.HOF matmul.HOF
|
||||||
$(HARPLD) -o $@ $^
|
$(HARPLD) -o $@ $^
|
||||||
|
|
||||||
|
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
|
||||||
|
$(HARPLD) -o $@ $^
|
||||||
|
|
||||||
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
|
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ ploop: ld %r7, %r1, #0;
|
|||||||
matgen: ldi %r2, #0;
|
matgen: ldi %r2, #0;
|
||||||
st %r5, %r2, retaddr;
|
st %r5, %r2, retaddr;
|
||||||
ldi %r2, #1;
|
ldi %r2, #1;
|
||||||
shli %r2, %r2, (`__WORD + 1); /* Multiply r0 by 2*__WORD */
|
shl %r2, %r2, %r1;
|
||||||
shl %r2, %r2, %r1;
|
shl %r2, %r2, %r1;
|
||||||
ori %r3, %r0, #0;
|
ori %r3, %r0, #0;
|
||||||
|
|
||||||
@@ -59,7 +59,8 @@ mgloop: jali %r5, randf;
|
|||||||
/* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The
|
/* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The
|
||||||
size of these matrices is 2^Nx2^N, where N = %r3 */
|
size of these matrices is 2^Nx2^N, where N = %r3 */
|
||||||
|
|
||||||
matmul: ldi %r4, #1;
|
matmul: ori %r22, %r5, #0;
|
||||||
|
ldi %r4, #1;
|
||||||
ldi %r10, (`__WORD); /* ` is the log base 2 operator */
|
ldi %r10, (`__WORD); /* ` is the log base 2 operator */
|
||||||
shl %r4, %r4, %r3;
|
shl %r4, %r4, %r3;
|
||||||
add %r10, %r10, %r3;
|
add %r10, %r10, %r3;
|
||||||
@@ -67,9 +68,25 @@ matmul: ldi %r4, #1;
|
|||||||
shl %r14, %r14, %r10;
|
shl %r14, %r14, %r10;
|
||||||
|
|
||||||
divi %r17, %r14, THREADS; /* Spawn threads */
|
divi %r17, %r14, THREADS; /* Spawn threads */
|
||||||
sloop:
|
ori %r18, %r0, #0;
|
||||||
|
ori %r19, %r2, #0;
|
||||||
|
ldi %r20, #0;
|
||||||
|
sloop: add %r0, %r0, %r17;
|
||||||
|
add %r2, %r2, %r17;
|
||||||
|
addi %r20, %r20, #1;
|
||||||
|
subi %r21, %r20, THREADS;
|
||||||
|
rtop @p0, %r21;
|
||||||
|
notp @p1, @p0;
|
||||||
|
@p1 ? clone %r20;
|
||||||
|
@p0 ? jmpi sloop;
|
||||||
|
|
||||||
jmpr %r5;
|
ori %r0, %r18, #0;
|
||||||
|
ori %r2, %r19, #0;
|
||||||
|
clone %r20;
|
||||||
|
|
||||||
|
jalis %r5, matmulthd;
|
||||||
|
|
||||||
|
jmpr %r22;
|
||||||
|
|
||||||
/* One thread of matrix multiplication. Expected register values at start:
|
/* One thread of matrix multiplication. Expected register values at start:
|
||||||
* %r0 - matrix a pointer (plus offset)
|
* %r0 - matrix a pointer (plus offset)
|
||||||
@@ -115,10 +132,11 @@ iloop: ld %r7, %r11, #0;
|
|||||||
|
|
||||||
jmprt %r5;
|
jmprt %r5;
|
||||||
|
|
||||||
.align 4096
|
|
||||||
.perm rw
|
.perm rw
|
||||||
matrix_a: .space 64;
|
.align 4096
|
||||||
matrix_b: .space 64;
|
|
||||||
|
matrix_a: .space 64
|
||||||
|
matrix_b: .space 64
|
||||||
matrix_r: .space 64
|
matrix_r: .space 64
|
||||||
|
|
||||||
retaddr: .word 0
|
retaddr: .word 0
|
||||||
|
|||||||
Reference in New Issue
Block a user