From c103a42fdb3860e267d328d3d9d126e396b3f7d9 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Sat, 9 Feb 2019 13:51:24 -0500 Subject: [PATCH] Added matadd.s and 2warp_matadd.s --- src/core.cpp | 3 ++ src/test/2warp_matadd.s | 82 +++++++++++++++++++++++++++++++++++++++++ src/test/Makefile | 6 +-- src/test/matadd.s | 66 +++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 src/test/2warp_matadd.s create mode 100644 src/test/matadd.s diff --git a/src/core.cpp b/src/core.cpp index 32804c50..e64c1957 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -4,6 +4,9 @@ #include +// #define USE_DEBUG 7 +// #define PRINT_ACTIVE_THREADS + #include "include/types.h" #include "include/util.h" #include "include/archdef.h" diff --git a/src/test/2warp_matadd.s b/src/test/2warp_matadd.s new file mode 100644 index 00000000..f5abd25b --- /dev/null +++ b/src/test/2warp_matadd.s @@ -0,0 +1,82 @@ +/******************************************************************************* + Harptools by Chad D. Kersey, Summer 2011 +******************************************************************************** + + Sample HARP assmebly program. + +*******************************************************************************/ +/* Divergent branch: test immediate postdominator branch divergence support. */ +.def THREADS 8 + +.align 4096 +.perm x +.entry +.global +entry: ldi %r20, #1 + ldi %r21, Begin + wspawn %r20, %r21, %r20 + ldi %r20, #0 +Begin: ldi %r0, #1 + ldi %r1, THREADS +tc_loop: clone %r0 + + addi %r0, %r0, #1 + sub %r2, %r1, %r0 + rtop @p0, %r2 + @p0 ? jmpi tc_loop + + ldi %r0, #0 + jalis %r5, %r1, dthread; + + ldi %r25, #55 + ldi %r26, #1 + bar %r25, %r26 + + subi %r20, %r20, #1 + + iszero @p0, %r20 + + @p0 ? trap; + + ldi %r0, #0 + ldi %r1, (__WORD * THREADS) + shli %r1, %r1, #1 + +ploop: ld %r7, %r0, RESULT + jali %r5, printdec + + addi %r0, %r0, __WORD + sub %r7, %r1, %r0 + rtop @p0, %r7 + @p0 ? jmpi ploop + + trap; + + +dthread: shli %r15, %r20 , #6 + shli %r10, %r0 , #3 + add %r10, %r10, %r15 + ld %r11, %r10, Array1 + ld %r12, %r10, Array2 + + subi %r13, %r0, #4 + isneg @p0, %r13 + @p0 ? split + @p0 ? jmpi SUBT + add %r14, %r11, %r12 + jmpi after +SUBT: sub %r14, %r11, %r12 +after: join + + st %r14, %r10, RESULT + + jmprt %r5; + +.align 4096 +Array1: + .word 1 5 10 0 3 1 1 2 + .word 8 7 8 7 5 7 7 9 +Array2: + .word 0 2 2 0 5 0 1 1 + .word 4 2 2 0 3 2 3 2 +RESULT: .space 512 diff --git a/src/test/Makefile b/src/test/Makefile index bf9afbb0..fabd2fa2 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -5,14 +5,14 @@ HARPDIS = ../harptool -D 4BARCH = 4b16/16/2/1 all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \ - matmul-mt.s lfsr.bin diverge.bin + matmul-mt.s lfsr.bin diverge.bin matadd.bin 2warp_matadd.bin run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\ - matmul-mt.out lfsr.4b.out lfsr.out diverge.out + matmul-mt.out lfsr.4b.out lfsr.out diverge.out matadd.out 2warp_matadd.out disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \ bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d lfsr.d \ -diverge.d +diverge.d matadd.d 2warp_matadd.d %.4b.out : %.4b.bin $(HARPEM) -a $(4BARCH) -c $< > $@ diff --git a/src/test/matadd.s b/src/test/matadd.s new file mode 100644 index 00000000..3cecdcd9 --- /dev/null +++ b/src/test/matadd.s @@ -0,0 +1,66 @@ +/******************************************************************************* + Harptools by Chad D. Kersey, Summer 2011 +******************************************************************************** + + Sample HARP assmebly program. + +*******************************************************************************/ +/* Divergent branch: test immediate postdominator branch divergence support. */ +.def THREADS 8 + +.align 4096 +.perm x +.entry +.global +entry: + ldi %r0, #1 + ldi %r1, THREADS +tc_loop: clone %r0 + + addi %r0, %r0, #1 + sub %r2, %r1, %r0 + rtop @p0, %r2 + @p0 ? jmpi tc_loop + + ldi %r0, #0 + jalis %r5, %r1, dthread; + + ldi %r0, #0 + ldi %r1, (__WORD * THREADS) + +ploop: ld %r7, %r0, RESULT + jali %r5, printdec + + addi %r0, %r0, __WORD + sub %r7, %r1, %r0 + rtop @p0, %r7 + @p0 ? jmpi ploop + + trap; + + +dthread: shli %r10, %r0 , #3 + ld %r11, %r10, Array1 + ld %r12, %r10, Array2 + + subi %r13, %r0, #4 + isneg @p0, %r13 + @p0 ? split + @p0 ? jmpi SUBT + add %r14, %r11, %r12 + jmpi after +SUBT: sub %r14, %r11, %r12 +after: join + + st %r14, %r10, RESULT + + jmprt %r5; + +.align 4096 +Array1: + .word 1 5 10 0 + .word 3 1 1 2 +Array2: + .word 0 2 2 0 + .word 5 0 1 1 +RESULT: .space 512