rtl refactoring
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
|
||||
#include "../config.h"
|
||||
|
||||
# .section .FileIO
|
||||
|
||||
69
runtime/intrinsics/vx_intrinsics.S
Normal file
69
runtime/intrinsics/vx_intrinsics.S
Normal file
@@ -0,0 +1,69 @@
|
||||
#include "../config.h"
|
||||
|
||||
.section .text
|
||||
|
||||
.type vx_wspawn, @function
|
||||
.global vx_wspawn
|
||||
vx_wspawn:
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
ret
|
||||
|
||||
.type vx_tmc, @function
|
||||
.global vx_tmc
|
||||
vx_tmc:
|
||||
.word 0x0005006b # tmc a0
|
||||
ret
|
||||
|
||||
.type vx_barrier, @function
|
||||
.global vx_barrier
|
||||
vx_barrier:
|
||||
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
|
||||
ret
|
||||
|
||||
.type vx_split, @function
|
||||
.global vx_split
|
||||
vx_split:
|
||||
.word 0x0005206b # split a0
|
||||
ret
|
||||
|
||||
.type vx_join, @function
|
||||
.global vx_join
|
||||
vx_join:
|
||||
.word 0x0000306b #join
|
||||
ret
|
||||
|
||||
.type vx_warp_id, @function
|
||||
.global vx_warp_id
|
||||
vx_warp_id:
|
||||
csrr a0, CSR_LWID # read warp index
|
||||
ret
|
||||
|
||||
.type vx_warp_gid, @function
|
||||
.global vx_warp_gid
|
||||
vx_warp_gid:
|
||||
csrr a0, CSR_GWID # read warp index
|
||||
ret
|
||||
|
||||
.type vx_thread_id, @function
|
||||
.global vx_thread_id
|
||||
vx_thread_id:
|
||||
csrr a0, CSR_LTID # read thread index
|
||||
ret
|
||||
|
||||
.type vx_thread_gid, @function
|
||||
.global vx_thread_gid
|
||||
vx_thread_gid:
|
||||
csrr a0, CSR_GTID # read thread index
|
||||
ret
|
||||
|
||||
.type vx_num_cycles, @function
|
||||
.global vx_num_cycles
|
||||
vx_num_cycles:
|
||||
csrr a0, CSR_CYCLL
|
||||
ret
|
||||
|
||||
.type vx_num_instrs, @function
|
||||
.global vx_num_instrs
|
||||
vx_num_instrs:
|
||||
csrr a0, CSR_INSTL
|
||||
ret
|
||||
@@ -7,35 +7,38 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Spawns Warps
|
||||
// Spawn warps
|
||||
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
|
||||
|
||||
// Changes thread mask (activated/deactivates threads)
|
||||
// Set thread mask
|
||||
void vx_tmc(unsigned numThreads);
|
||||
|
||||
// Warp Barrier
|
||||
void vx_barrier(unsigned barriedID, unsigned numWarps);
|
||||
|
||||
// split on a predicate
|
||||
// Split on a predicate
|
||||
void vx_split(unsigned predicate);
|
||||
|
||||
// Join
|
||||
void vx_join(void);
|
||||
|
||||
// Get Hardware thread ID
|
||||
unsigned vx_threadID(void);
|
||||
// Return the warp thread index
|
||||
unsigned vx_thread_id(void);
|
||||
|
||||
// Get hardware warp ID
|
||||
unsigned vx_warpID(void);
|
||||
// Return the core warp index
|
||||
unsigned vx_warp_id(void);
|
||||
|
||||
// Get global warp number
|
||||
unsigned vx_warpNum(void);
|
||||
// Return processsor unique thread id
|
||||
unsigned vx_thread_gid(void);
|
||||
|
||||
// Get Number cycles/Inst
|
||||
unsigned vx_getCycles(void);
|
||||
unsigned vx_getInst(void);
|
||||
// Return processsor unique warp id
|
||||
unsigned vx_warp_gid(void);
|
||||
|
||||
void vx_resetStack(void);
|
||||
// Return number cycles
|
||||
unsigned vx_num_cycles(void);
|
||||
|
||||
// Return number instructions
|
||||
unsigned vx_num_instrs(void);
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
.section .text
|
||||
|
||||
.type vx_wspawn, @function
|
||||
.global vx_wspawn
|
||||
vx_wspawn:
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
ret
|
||||
|
||||
.type vx_tmc, @function
|
||||
.global vx_tmc
|
||||
vx_tmc:
|
||||
.word 0x0005006b # tmc a0
|
||||
ret
|
||||
|
||||
.type vx_barrier, @function
|
||||
.global vx_barrier
|
||||
vx_barrier:
|
||||
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
|
||||
ret
|
||||
|
||||
.type vx_split, @function
|
||||
.global vx_split
|
||||
vx_split:
|
||||
.word 0x0005206b # split a0
|
||||
ret
|
||||
|
||||
.type vx_join, @function
|
||||
.global vx_join
|
||||
vx_join:
|
||||
.word 0x0000306b #join
|
||||
ret
|
||||
|
||||
.type vx_warpID, @function
|
||||
.global vx_warpID
|
||||
vx_warpID:
|
||||
csrr a0, 0x21 # read warp IDs
|
||||
ret
|
||||
|
||||
.type vx_warpNum, @function
|
||||
.global vx_warpNum
|
||||
vx_warpNum:
|
||||
csrr a0, 0x22 # read warp IDs
|
||||
ret
|
||||
|
||||
.type vx_threadID, @function
|
||||
.global vx_threadID
|
||||
vx_threadID:
|
||||
csrr a0, 0x20 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_getCycles, @function
|
||||
.global vx_getCycles
|
||||
vx_getCycles:
|
||||
csrr a0, 0x26 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_getInst, @function
|
||||
.global vx_getInst
|
||||
vx_getInst:
|
||||
csrr a0, 0x25 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_resetStack, @function
|
||||
.global vx_resetStack
|
||||
vx_resetStack:
|
||||
li a0, 4
|
||||
.word 0x0005006b # tmc 4
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
slli a3, a3, 15 # shift by wid
|
||||
csrr a2, 0x20 # get tid
|
||||
slli a1, a2, 10 # multiply tid by 1024
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
lui sp, 0x6ffff # load base sp
|
||||
sub sp, sp, a1 # sub sp - (1024*tid)
|
||||
sub sp, sp, a3 # shoft per warp
|
||||
add sp, sp, a2 # shift sp for better performance
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
beqz a3, RETURN
|
||||
li a0, 0
|
||||
.word 0x0005006b # tmc 0
|
||||
RETURN:
|
||||
ret
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
#include "../config.h"
|
||||
|
||||
.type vx_print_str, @function
|
||||
.global vx_print_str
|
||||
@@ -29,5 +29,5 @@ vx_printc:
|
||||
|
||||
.section .data
|
||||
print_addr:
|
||||
.word 0x00010000
|
||||
.word IO_BUS_ADDR
|
||||
|
||||
@@ -8,6 +8,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
|
||||
|
||||
void vx_print_hex(unsigned);
|
||||
void vx_printf(const char *, unsigned);
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ void _kill()
|
||||
|
||||
unsigned _getpid()
|
||||
{
|
||||
return vx_threadID();
|
||||
return vx_thread_id();
|
||||
}
|
||||
|
||||
void _unlink()
|
||||
|
||||
@@ -38,20 +38,18 @@ vx_set_sp:
|
||||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
csrr a3, 0x22 # get global warp number
|
||||
slli a3, a3, 0x1a # shift by wid
|
||||
csrr a2, 0x20 # get tid
|
||||
slli a1, a2, 10 # multiply tid by 1024
|
||||
csrr a1, CSR_GTID # get gtid
|
||||
slli a1, a1, 10 # multiply tid by 1024
|
||||
csrr a2, CSR_LTID # get tid
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
lui sp, 0x6ffff # load base sp
|
||||
sub sp, sp, a1 # sub sp - (1024*tid)
|
||||
sub sp, sp, a3 # shoft per warp
|
||||
add sp, sp, a2 # shift sp for better performance
|
||||
lui sp, STACK_BASE_ADDR # load base sp
|
||||
sub sp, sp, a1 # sub thread block
|
||||
add sp, sp, a2 # reduce addr collision for perf
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
csrr a3, CSR_LWID # get wid
|
||||
beqz a3, RETURN
|
||||
li a0, 0
|
||||
.word 0x0005006b # tmc 0
|
||||
.word 0x0005006b # tmc 0
|
||||
RETURN:
|
||||
ret
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ CPY = $(TOOLPATH)/riscv32-unknown-elf-objcopy
|
||||
|
||||
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_TEST = ../common/tests.c
|
||||
|
||||
|
||||
@@ -41,8 +41,8 @@ void mat_add_kernel(void * void_arguments)
|
||||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
@@ -77,7 +77,7 @@ int main()
|
||||
// void * hellp = malloc(4);
|
||||
vx_print_str("Confirm Dev Main\n");
|
||||
|
||||
vx_print_str("vx_spawnWarps\n");
|
||||
vx_print_str("vx_spawn_warps\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
@@ -91,7 +91,7 @@ int main()
|
||||
int numThreads = 4;
|
||||
|
||||
// First kernel call
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ int main()
|
||||
arguments.numRows = 4;
|
||||
|
||||
// Second Kernel Call
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
|
||||
|
||||
@@ -77,23 +77,23 @@ Disassembly of section .text:
|
||||
800000cc: 0000306b 0x306b
|
||||
800000d0: 00008067 ret
|
||||
|
||||
800000d4 <vx_warpID>:
|
||||
800000d4 <vx_warp_id>:
|
||||
800000d4: 02102573 csrr a0,0x21
|
||||
800000d8: 00008067 ret
|
||||
|
||||
800000dc <vx_threadID>:
|
||||
800000dc <vx_thread_id>:
|
||||
800000dc: 02002573 csrr a0,0x20
|
||||
800000e0: 00008067 ret
|
||||
|
||||
800000e4 <vx_getCycles>:
|
||||
800000e4 <vx_num_cycles>:
|
||||
800000e4: 02602573 csrr a0,0x26
|
||||
800000e8: 00008067 ret
|
||||
|
||||
800000ec <vx_getInst>:
|
||||
800000ec <vx_num_instrs>:
|
||||
800000ec: 02502573 csrr a0,0x25
|
||||
800000f0: 00008067 ret
|
||||
|
||||
800000f4 <vx_resetStack>:
|
||||
800000f4 <vx_reset_stack>:
|
||||
800000f4: 00400513 li a0,4
|
||||
800000f8: 0005006b 0x5006b
|
||||
800000fc: 021026f3 csrr a3,0x21
|
||||
@@ -219,7 +219,7 @@ Disassembly of section .text:
|
||||
8000029c: 3a01a783 lw a5,928(gp) # 80016ba8 <global_argument_struct>
|
||||
800002a0: 00078513 mv a0,a5
|
||||
800002a4: 000700e7 jalr a4
|
||||
800002a8: e2dff0ef jal ra,800000d4 <vx_warpID>
|
||||
800002a8: e2dff0ef jal ra,800000d4 <vx_warp_id>
|
||||
800002ac: fea42623 sw a0,-20(s0)
|
||||
800002b0: fec42783 lw a5,-20(s0)
|
||||
800002b4: 00078863 beqz a5,800002c4 <setup_call+0x48>
|
||||
@@ -234,7 +234,7 @@ Disassembly of section .text:
|
||||
800002d8: 02010113 addi sp,sp,32
|
||||
800002dc: 00008067 ret
|
||||
|
||||
800002e0 <vx_spawnWarps>:
|
||||
800002e0 <vx_spawn_warps>:
|
||||
800002e0: fe010113 addi sp,sp,-32
|
||||
800002e4: 00112e23 sw ra,28(sp)
|
||||
800002e8: 00812c23 sw s0,24(sp)
|
||||
@@ -269,10 +269,10 @@ Disassembly of section .text:
|
||||
80000354: 3b01a783 lw a5,944(gp) # 80016bb8 <pocl_threads>
|
||||
80000358: 00078513 mv a0,a5
|
||||
8000035c: d59ff0ef jal ra,800000b4 <vx_tmc>
|
||||
80000360: d7dff0ef jal ra,800000dc <vx_threadID>
|
||||
80000360: d7dff0ef jal ra,800000dc <vx_thread_id>
|
||||
80000364: 00050793 mv a5,a0
|
||||
80000368: fef42023 sw a5,-32(s0)
|
||||
8000036c: d69ff0ef jal ra,800000d4 <vx_warpID>
|
||||
8000036c: d69ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
80000370: 00050793 mv a5,a0
|
||||
80000374: fcf42e23 sw a5,-36(s0)
|
||||
80000378: fe042623 sw zero,-20(s0)
|
||||
@@ -389,14 +389,14 @@ Disassembly of section .text:
|
||||
8000052c: 00078593 mv a1,a5
|
||||
80000530: 00070513 mv a0,a4
|
||||
80000534: b79ff0ef jal ra,800000ac <vx_wspawn>
|
||||
80000538: badff0ef jal ra,800000e4 <vx_getCycles>
|
||||
80000538: badff0ef jal ra,800000e4 <vx_num_cycles>
|
||||
8000053c: fea42623 sw a0,-20(s0)
|
||||
80000540: badff0ef jal ra,800000ec <vx_getInst>
|
||||
80000540: badff0ef jal ra,800000ec <vx_num_instrs>
|
||||
80000544: fea42423 sw a0,-24(s0)
|
||||
80000548: dfdff0ef jal ra,80000344 <pocl_spawn_real>
|
||||
8000054c: b99ff0ef jal ra,800000e4 <vx_getCycles>
|
||||
8000054c: b99ff0ef jal ra,800000e4 <vx_num_cycles>
|
||||
80000550: fea42223 sw a0,-28(s0)
|
||||
80000554: b99ff0ef jal ra,800000ec <vx_getInst>
|
||||
80000554: b99ff0ef jal ra,800000ec <vx_num_instrs>
|
||||
80000558: fea42023 sw a0,-32(s0)
|
||||
8000055c: fe442703 lw a4,-28(s0)
|
||||
80000560: fec42783 lw a5,-20(s0)
|
||||
@@ -424,7 +424,7 @@ Disassembly of section .text:
|
||||
800005b0: b85ff0ef jal ra,80000134 <vx_print_str>
|
||||
800005b4: 00400513 li a0,4
|
||||
800005b8: afdff0ef jal ra,800000b4 <vx_tmc>
|
||||
800005bc: b21ff0ef jal ra,800000dc <vx_threadID>
|
||||
800005bc: b21ff0ef jal ra,800000dc <vx_thread_id>
|
||||
800005c0: fea42623 sw a0,-20(s0)
|
||||
800005c4: fec42703 lw a4,-20(s0)
|
||||
800005c8: 88418693 addi a3,gp,-1916 # 8001608c <tmc_array>
|
||||
@@ -473,7 +473,7 @@ Disassembly of section .text:
|
||||
8000066c: 00112e23 sw ra,28(sp)
|
||||
80000670: 00812c23 sw s0,24(sp)
|
||||
80000674: 02010413 addi s0,sp,32
|
||||
80000678: a65ff0ef jal ra,800000dc <vx_threadID>
|
||||
80000678: a65ff0ef jal ra,800000dc <vx_thread_id>
|
||||
8000067c: fea42623 sw a0,-20(s0)
|
||||
80000680: fec42783 lw a5,-20(s0)
|
||||
80000684: 0027b793 sltiu a5,a5,2
|
||||
@@ -568,7 +568,7 @@ Disassembly of section .text:
|
||||
800007e0: 00112e23 sw ra,28(sp)
|
||||
800007e4: 00812c23 sw s0,24(sp)
|
||||
800007e8: 02010413 addi s0,sp,32
|
||||
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warpID>
|
||||
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
800007f0: fea42623 sw a0,-20(s0)
|
||||
800007f4: 3c418713 addi a4,gp,964 # 80016bcc <wsapwn_arr>
|
||||
800007f8: fec42783 lw a5,-20(s0)
|
||||
@@ -664,9 +664,9 @@ Disassembly of section .text:
|
||||
80000948: fca42623 sw a0,-52(s0)
|
||||
8000094c: fcc42783 lw a5,-52(s0)
|
||||
80000950: fef42623 sw a5,-20(s0)
|
||||
80000954: f80ff0ef jal ra,800000d4 <vx_warpID>
|
||||
80000954: f80ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
80000958: fea42423 sw a0,-24(s0)
|
||||
8000095c: f80ff0ef jal ra,800000dc <vx_threadID>
|
||||
8000095c: f80ff0ef jal ra,800000dc <vx_thread_id>
|
||||
80000960: fea42223 sw a0,-28(s0)
|
||||
80000964: fec42783 lw a5,-20(s0)
|
||||
80000968: 0107a783 lw a5,16(a5)
|
||||
@@ -808,7 +808,7 @@ Disassembly of section .text:
|
||||
80000b78: 800017b7 lui a5,0x80001
|
||||
80000b7c: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
|
||||
80000b80: 00070513 mv a0,a4
|
||||
80000b84: f5cff0ef jal ra,800002e0 <vx_spawnWarps>
|
||||
80000b84: f5cff0ef jal ra,800002e0 <vx_spawn_warps>
|
||||
80000b88: fe442783 lw a5,-28(s0)
|
||||
80000b8c: 00078713 mv a4,a5
|
||||
80000b90: fe042783 lw a5,-32(s0)
|
||||
@@ -833,7 +833,7 @@ Disassembly of section .text:
|
||||
80000bdc: 800017b7 lui a5,0x80001
|
||||
80000be0: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
|
||||
80000be4: 00070513 mv a0,a4
|
||||
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawnWarps>
|
||||
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawn_warps>
|
||||
80000bec: fe442783 lw a5,-28(s0)
|
||||
80000bf0: 00078713 mv a4,a5
|
||||
80000bf4: fe042783 lw a5,-32(s0)
|
||||
|
||||
@@ -9,7 +9,7 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
|
||||
VX_STR =
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO =
|
||||
VX_API =
|
||||
VX_FIO =
|
||||
|
||||
@@ -328,7 +328,7 @@ Disassembly of section .text:
|
||||
80000488: 00112623 sw ra,12(sp)
|
||||
8000048c: 00812423 sw s0,8(sp)
|
||||
80000490: 01010413 addi s0,sp,16
|
||||
80000494: 0b4000ef jal ra,80000548 <vx_threadID>
|
||||
80000494: 0b4000ef jal ra,80000548 <vx_thread_id>
|
||||
80000498: 00050793 mv a5,a0
|
||||
8000049c: 00078513 mv a0,a5
|
||||
800004a0: 00c12083 lw ra,12(sp)
|
||||
@@ -388,15 +388,15 @@ Disassembly of section .text:
|
||||
80000538: 0000306b 0x306b
|
||||
8000053c: 00008067 ret
|
||||
|
||||
80000540 <vx_warpID>:
|
||||
80000540 <vx_warp_id>:
|
||||
80000540: 02102573 csrr a0,0x21
|
||||
80000544: 00008067 ret
|
||||
|
||||
80000548 <vx_threadID>:
|
||||
80000548 <vx_thread_id>:
|
||||
80000548: 02002573 csrr a0,0x20
|
||||
8000054c: 00008067 ret
|
||||
|
||||
80000550 <vx_resetStack>:
|
||||
80000550 <vx_reset_stack>:
|
||||
80000550: 00400513 li a0,4
|
||||
80000554: 0005006b 0x5006b
|
||||
80000558: 021026f3 csrr a3,0x21
|
||||
|
||||
@@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
VX_MAIN = vx_pocl_main
|
||||
|
||||
@@ -485,7 +485,7 @@ Disassembly of section .text:
|
||||
800006ec: 00112623 sw ra,12(sp)
|
||||
800006f0: 00812423 sw s0,8(sp)
|
||||
800006f4: 01010413 addi s0,sp,16
|
||||
800006f8: 0e8000ef jal ra,800007e0 <vx_threadID>
|
||||
800006f8: 0e8000ef jal ra,800007e0 <vx_thread_id>
|
||||
800006fc: 00050793 mv a5,a0
|
||||
80000700: 00078513 mv a0,a5
|
||||
80000704: 00c12083 lw ra,12(sp)
|
||||
@@ -558,15 +558,15 @@ Disassembly of section .text:
|
||||
800007d0: 0000306b 0x306b
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_warpID>:
|
||||
800007d8 <vx_warp_id>:
|
||||
800007d8: 02102573 csrr a0,0x21
|
||||
800007dc: 00008067 ret
|
||||
|
||||
800007e0 <vx_threadID>:
|
||||
800007e0 <vx_thread_id>:
|
||||
800007e0: 02002573 csrr a0,0x20
|
||||
800007e4: 00008067 ret
|
||||
|
||||
800007e8 <vx_resetStack>:
|
||||
800007e8 <vx_reset_stack>:
|
||||
800007e8: 00400513 li a0,4
|
||||
800007ec: 0005006b 0x5006b
|
||||
800007f0: 021026f3 csrr a3,0x21
|
||||
|
||||
@@ -9,10 +9,10 @@ CPY = /opt/riscv/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
|
||||
VX_MAIN = ./vx_nl_main.c
|
||||
|
||||
|
||||
@@ -407,15 +407,15 @@ Disassembly of section .text:
|
||||
800005bc: 0000306b 0x306b
|
||||
800005c0: 00008067 ret
|
||||
|
||||
800005c4 <vx_warpID>:
|
||||
800005c4 <vx_warp_id>:
|
||||
800005c4: 02102573 csrr a0,0x21
|
||||
800005c8: 00008067 ret
|
||||
|
||||
800005cc <vx_threadID>:
|
||||
800005cc <vx_thread_id>:
|
||||
800005cc: 02002573 csrr a0,0x20
|
||||
800005d0: 00008067 ret
|
||||
|
||||
800005d4 <vx_resetStack>:
|
||||
800005d4 <vx_reset_stack>:
|
||||
800005d4: 00400513 li a0,4
|
||||
800005d8: 0005006b 0x5006b
|
||||
800005dc: 021026f3 csrr a3,0x21
|
||||
@@ -544,7 +544,7 @@ Disassembly of section .text:
|
||||
80000788: 9947a783 lw a5,-1644(a5) # 81001994 <_PathLocale+0xffffff9c>
|
||||
8000078c: 00078513 mv a0,a5
|
||||
80000790: 000700e7 jalr a4
|
||||
80000794: e31ff0ef jal ra,800005c4 <vx_warpID>
|
||||
80000794: e31ff0ef jal ra,800005c4 <vx_warp_id>
|
||||
80000798: fea42623 sw a0,-20(s0)
|
||||
8000079c: fec42783 lw a5,-20(s0)
|
||||
800007a0: 00078863 beqz a5,800007b0 <setup_call+0x54>
|
||||
@@ -559,7 +559,7 @@ Disassembly of section .text:
|
||||
800007c4: 02010113 addi sp,sp,32
|
||||
800007c8: 00008067 ret
|
||||
|
||||
800007cc <vx_spawnWarps>:
|
||||
800007cc <vx_spawn_warps>:
|
||||
800007cc: fe010113 addi sp,sp,-32
|
||||
800007d0: 00112e23 sw ra,28(sp)
|
||||
800007d4: 00812c23 sw s0,24(sp)
|
||||
@@ -599,7 +599,7 @@ Disassembly of section .text:
|
||||
80000854: dc1ff0ef jal ra,80000614 <vx_print_str>
|
||||
80000858: 00400513 li a0,4
|
||||
8000085c: d49ff0ef jal ra,800005a4 <vx_tmc>
|
||||
80000860: d6dff0ef jal ra,800005cc <vx_threadID>
|
||||
80000860: d6dff0ef jal ra,800005cc <vx_thread_id>
|
||||
80000864: fea42623 sw a0,-20(s0)
|
||||
80000868: fec42703 lw a4,-20(s0)
|
||||
8000086c: 810017b7 lui a5,0x81001
|
||||
@@ -652,7 +652,7 @@ Disassembly of section .text:
|
||||
80000920: 00112e23 sw ra,28(sp)
|
||||
80000924: 00812c23 sw s0,24(sp)
|
||||
80000928: 02010413 addi s0,sp,32
|
||||
8000092c: ca1ff0ef jal ra,800005cc <vx_threadID>
|
||||
8000092c: ca1ff0ef jal ra,800005cc <vx_thread_id>
|
||||
80000930: fea42623 sw a0,-20(s0)
|
||||
80000934: fec42783 lw a5,-20(s0)
|
||||
80000938: 0027b793 sltiu a5,a5,2
|
||||
@@ -754,7 +754,7 @@ Disassembly of section .text:
|
||||
80000ab0: 00112e23 sw ra,28(sp)
|
||||
80000ab4: 00812c23 sw s0,24(sp)
|
||||
80000ab8: 02010413 addi s0,sp,32
|
||||
80000abc: b09ff0ef jal ra,800005c4 <vx_warpID>
|
||||
80000abc: b09ff0ef jal ra,800005c4 <vx_warp_id>
|
||||
80000ac0: fea42623 sw a0,-20(s0)
|
||||
80000ac4: 810027b7 lui a5,0x81002
|
||||
80000ac8: fec42703 lw a4,-20(s0)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
COMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-g++
|
||||
#COMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-g++
|
||||
|
||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
|
||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
|
||||
|
||||
DMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
|
||||
CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
@@ -10,10 +10,10 @@ CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
|
||||
VX_MAIN = vx_simple_main
|
||||
|
||||
|
||||
@@ -6,15 +6,19 @@
|
||||
|
||||
int tmc_array[4] = {5,5,5,5};
|
||||
|
||||
void test_tmc_impl()
|
||||
{
|
||||
unsigned tid = vx_thread_id(); // Get TID
|
||||
tmc_array[tid] = tid;
|
||||
}
|
||||
|
||||
void test_tmc()
|
||||
{
|
||||
//vx_print_str("testing_tmc\n");
|
||||
vx_print_str("testing_tmc\n");
|
||||
|
||||
vx_tmc(4);
|
||||
|
||||
unsigned tid = vx_threadID(); // Get TID
|
||||
|
||||
tmc_array[tid] = tid;
|
||||
test_tmc_impl();
|
||||
|
||||
vx_tmc(1);
|
||||
|
||||
@@ -34,7 +38,7 @@ int div_arr[4];
|
||||
|
||||
void test_divergence()
|
||||
{
|
||||
unsigned tid = vx_threadID(); // Get TID
|
||||
unsigned tid = vx_thread_id(); // Get TID
|
||||
|
||||
bool b = tid < 2;
|
||||
__if (b)
|
||||
@@ -73,20 +77,16 @@ void test_divergence()
|
||||
vx_print_str("\n");
|
||||
vx_print_hex(div_arr[3]);
|
||||
vx_print_str("\n");
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned wsapwn_arr[4];
|
||||
|
||||
|
||||
void simple_kernel()
|
||||
{
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned wid = vx_warp_id();
|
||||
|
||||
wsapwn_arr[wid] = wid;
|
||||
|
||||
wid = vx_warpID();
|
||||
if (wid != 0)
|
||||
{
|
||||
vx_tmc(0);
|
||||
|
||||
@@ -24,7 +24,7 @@ unsigned y[] = {1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1};
|
||||
|
||||
F
|
||||
unsigned z[] = {0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
@@ -34,8 +34,8 @@ void mat_add_kernel(void * void_arguments)
|
||||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
@@ -50,7 +50,7 @@ void mat_add_kernel(void * void_arguments)
|
||||
|
||||
int main()
|
||||
{
|
||||
// Main is called with all threads active of warp 0
|
||||
// ensure single thread
|
||||
vx_tmc(1);
|
||||
|
||||
vx_print_str("Let's start... (This might take a while)\n");
|
||||
@@ -84,11 +84,9 @@ int main()
|
||||
vx_print_str("Wr->read and repeat(Wr) tests passed!\n");
|
||||
}
|
||||
|
||||
|
||||
vx_print_str("Simple Main\n");
|
||||
|
||||
|
||||
// // TMC test
|
||||
// TMC test
|
||||
test_tmc();
|
||||
|
||||
// Control Divergence Test
|
||||
@@ -118,7 +116,7 @@ int main()
|
||||
|
||||
}
|
||||
|
||||
vx_print_str("vx_spawnWarps mat_add_kernel\n");
|
||||
vx_print_str("vx_spawn_warps mat_add_kernel\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
@@ -131,7 +129,7 @@ int main()
|
||||
int numWarps = 4;
|
||||
int numThreads = 4;
|
||||
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
|
||||
vx_print_str("Waiting to ensure other warps are done... (Takes a while)\n");
|
||||
for (int i = 0; i < 5000; i++) {}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
|
||||
|
||||
#include "io/io.h" // Printing functions
|
||||
#include "intrinsics/instrinsics.h" // vx_threadID and vx_WarpID
|
||||
#include "intrinsics/instrinsics.h" // vx_thread_id and vx_WarpID
|
||||
|
||||
struct args
|
||||
{
|
||||
@@ -14,7 +14,7 @@ void function(void * arg)
|
||||
{
|
||||
struct args * real_arg = (struct args *) arg;
|
||||
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned tid = vx_thread_id();
|
||||
unsigned wid = vx_WarpID();
|
||||
|
||||
__if(something) // Control divergent if
|
||||
@@ -36,7 +36,7 @@ int main()
|
||||
struct args arg;
|
||||
arg.data = data;
|
||||
|
||||
vx_spawnWarps(numWarps, numThreads, function, &data);
|
||||
vx_spawn_warps(numWarps, numThreads, function, &data);
|
||||
|
||||
|
||||
}
|
||||
@@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
VX_MAIN = vx_pocl_main
|
||||
|
||||
@@ -344,7 +344,7 @@ Disassembly of section .text:
|
||||
800004b8: 00112623 sw ra,12(sp)
|
||||
800004bc: 00812423 sw s0,8(sp)
|
||||
800004c0: 01010413 addi s0,sp,16
|
||||
800004c4: 0ac000ef jal ra,80000570 <vx_threadID>
|
||||
800004c4: 0ac000ef jal ra,80000570 <vx_thread_id>
|
||||
800004c8: 00050793 mv a5,a0
|
||||
800004cc: 00078513 mv a0,a5
|
||||
800004d0: 00c12083 lw ra,12(sp)
|
||||
@@ -406,11 +406,11 @@ Disassembly of section .text:
|
||||
80000568: 02102573 csrr a0,0x21
|
||||
8000056c: 00008067 ret
|
||||
|
||||
80000570 <vx_threadID>:
|
||||
80000570 <vx_thread_id>:
|
||||
80000570: 02002573 csrr a0,0x20
|
||||
80000574: 00008067 ret
|
||||
|
||||
80000578 <vx_resetStack>:
|
||||
80000578 <vx_reset_stack>:
|
||||
80000578: 00400513 li a0,4
|
||||
8000057c: 0005006b 0x5006b
|
||||
80000580: 021026f3 csrr a3,0x21
|
||||
|
||||
@@ -16,9 +16,9 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
VX_VEC = vx_vec.s
|
||||
#LIBS = /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libc.a /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
@@ -500,7 +500,7 @@ Disassembly of section .text:
|
||||
80000718: 00112623 sw ra,12(sp)
|
||||
8000071c: 00812423 sw s0,8(sp)
|
||||
80000720: 01010413 addi s0,sp,16
|
||||
80000724: 0e8000ef jal ra,8000080c <vx_threadID>
|
||||
80000724: 0e8000ef jal ra,8000080c <vx_thread_id>
|
||||
80000728: 00050793 mv a5,a0
|
||||
8000072c: 00078513 mv a0,a5
|
||||
80000730: 00c12083 lw ra,12(sp)
|
||||
@@ -573,15 +573,15 @@ Disassembly of section .text:
|
||||
800007fc: 0000306b 0x306b
|
||||
80000800: 00008067 ret
|
||||
|
||||
80000804 <vx_warpID>:
|
||||
80000804 <vx_warp_id>:
|
||||
80000804: 02102573 csrr a0,0x21
|
||||
80000808: 00008067 ret
|
||||
|
||||
8000080c <vx_threadID>:
|
||||
8000080c <vx_thread_id>:
|
||||
8000080c: 02002573 csrr a0,0x20
|
||||
80000810: 00008067 ret
|
||||
|
||||
80000814 <vx_resetStack>:
|
||||
80000814 <vx_reset_stack>:
|
||||
80000814: 00400513 li a0,4
|
||||
80000818: 0005006b 0x5006b
|
||||
8000081c: 021026f3 csrr a3,0x21
|
||||
|
||||
@@ -19,12 +19,12 @@ void spawn_warp_runonce() {
|
||||
global_function_pointer(global_argument_struct);
|
||||
|
||||
// resume single-thread execution on exit
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
|
||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
|
||||
global_function_pointer = func_ptr;
|
||||
global_argument_struct = args;
|
||||
global_num_threads = numThreads;
|
||||
@@ -43,15 +43,15 @@ void pocl_spawn_warp_runonce() {
|
||||
// active all threads
|
||||
vx_tmc(pocl_threads);
|
||||
|
||||
int x = vx_threadID();
|
||||
int y = vx_warpNum();
|
||||
int x = vx_thread_id();
|
||||
int y = vx_warp_gid();
|
||||
|
||||
// call kernel routine
|
||||
(pocl_pfn)(pocl_args, pocl_ctx, x, y, 0);
|
||||
|
||||
// resume single-thread execution on exit
|
||||
int wid = vx_warpID();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ extern "C" {
|
||||
|
||||
typedef void (*func_t)(void *);
|
||||
|
||||
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
|
||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
|
||||
|
||||
struct context_t {
|
||||
uint32_t num_groups[3];
|
||||
|
||||
Reference in New Issue
Block a user