OpenCL benchmarks running

This commit is contained in:
fares
2019-11-23 20:37:14 -05:00
parent 69139e47bb
commit d2bd560593
10 changed files with 130 additions and 30 deletions

View File

@@ -31,6 +31,10 @@ unsigned vx_threadID(void);
// Get hardware warp ID
unsigned vx_warpID(void);
// Get Number cycles/Inst
unsigned vx_getCycles(void);
unsigned vx_getInst(void);
void vx_resetStack(void);

View File

@@ -49,6 +49,19 @@ vx_threadID:
csrr a0, 0x20 # read thread IDs
ret
.type vx_getCycles, @function
.global vx_getCycles
vx_getCycles:
csrr a0, 0x26 # read thread IDs
ret
.type vx_getInst, @function
.global vx_getInst
vx_getInst:
csrr a0, 0x25 # read thread IDs
ret
.type vx_resetStack, @function
.global vx_resetStack

View File

@@ -148,7 +148,7 @@ int _fstat(int file, struct stat * st)
int _isatty (int file)
{
vx_print_str("Hello from _isatty\n");
// vx_print_str("Hello from _isatty\n");
return 1;
}
@@ -237,8 +237,8 @@ static int head_end = (int) 0x20000000;
void * _sbrk (int nbytes)
{
vx_print_str("Hello from _sbrk\n");
vx_printf("nbytes: ", nbytes);
// vx_print_str("Hello from _sbrk\n");
// vx_printf("nbytes: ", nbytes);
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
// printf("nBytes: %d\n", nbytes);
@@ -248,7 +248,7 @@ void * _sbrk (int nbytes)
nbytes = nbytes * -1;
}
vx_printf("New nbytes: ", nbytes);
// vx_printf("New nbytes: ", nbytes);
// if (nbytes > 10240)
// {
@@ -260,9 +260,9 @@ void * _sbrk (int nbytes)
{
int base = heap_start;
heap_start += nbytes;
vx_print_str("_sbrk returning: ");
vx_print_hex((unsigned) base);
vx_print_str("\n");
// vx_print_str("_sbrk returning: ");
// vx_print_hex((unsigned) base);
// vx_print_str("\n");
return (void *) base;
}
else

View File

@@ -20,7 +20,7 @@ _start:
# Initialize SP
# la sp, __stack_top
la a1, vx_set_sp
li a0, 4
li a0, 32
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
jal vx_set_sp
li a0, 1
@@ -46,7 +46,7 @@ _start:
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
li a0, 4
li a0, 32
.word 0x0005006b # tmc 4
.option push

View File

@@ -7,6 +7,8 @@
extern "C" {
#endif
#define TOTAL_WARPS 2
#define TOTAL_THREADS 16
func_t global_function_pointer;
// void (func_t)(void *)
@@ -46,16 +48,39 @@ uint8_t * pocl_args;
uint8_t * pocl_ctx;
vx_pocl_workgroup_func pocl_pfn;
unsigned global_z;
unsigned global_y;
unsigned global_x;
void pocl_spawn_real()
{
vx_tmc(pocl_threads);
int x = vx_threadID();
int y = vx_warpID();
int base_x = vx_threadID();
int base_y = vx_warpID();
(pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
int local_x;
int local_y;
if (y != 0)
for (int iter_z = 0; iter_z < global_z; iter_z++)
{
for (int iter_x = 0; iter_x < global_x; iter_x++)
{
for (int iter_y = 0; iter_y < global_y; iter_y++)
{
local_x = (iter_x * TOTAL_THREADS) + base_x;
local_y = (iter_y * TOTAL_WARPS ) + base_y;
(pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z);
}
}
}
// (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
if (base_y != 0)
{
vx_tmc(0);
}
@@ -66,24 +91,67 @@ void pocl_spawn_real()
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments)
{
if (ctx->num_groups[2] > 1)
// printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]);
// printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]);
// printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]);
// printf("\n\n");
// printf("ctx->local_size[0]: %d\n", ctx->local_size[0]);
// printf("ctx->local_size[1]: %d\n", ctx->local_size[1]);
// printf("ctx->local_size[2]: %d\n", ctx->local_size[2]);
if (ctx->num_groups[0] > TOTAL_THREADS)
{
printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n");
return;
pocl_threads = TOTAL_THREADS;
global_x = ctx->num_groups[0] / TOTAL_THREADS;
printf("pocl_threads: %d\n", pocl_threads);
// printf("global_x: %d\n", global_x);
}
else
{
pocl_threads = ctx->num_groups[0];
global_x = 1;
// printf("pocl_threads: %d\n", pocl_threads);
// printf("global_x: %d\n", global_x);
}
pocl_threads = ctx->num_groups[0];
global_z = ctx->num_groups[2];
pocl_pfn = (vx_pocl_workgroup_func) pfn;
pocl_ctx = (uint8_t *) ctx;
pocl_args = (uint8_t *) arguments;
if (ctx->num_groups[1] > 1)
{
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
if (ctx->num_groups[1] > TOTAL_WARPS)
{
global_y = ctx->num_groups[1] / TOTAL_WARPS;
vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", TOTAL_WARPS);
}
else
{
global_y = 1;
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", ctx->num_groups[1]);
}
}
unsigned starting_cycles = vx_getCycles();
unsigned starting_inst = vx_getInst();
pocl_spawn_real();
unsigned end_cycles = vx_getCycles();
unsigned end_inst = vx_getInst();
printf("pocl_spawn: Total Cycles: %d\n", (end_cycles - starting_cycles));
printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst ));
// int z;
// int y;
// int x;