From f7e0d1e4913cc7748c5d487eb3f444de3daa0fe5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Mar 2020 22:51:54 -0400 Subject: [PATCH] missing runtime changes from OPAE --- runtime/io/vx_io.c | 2 +- runtime/io/vx_io.h | 4 +- runtime/io/vx_io.s | 6 +- runtime/vx_api/vx_api.c | 161 +++++++--------------------------------- runtime/vx_api/vx_api.h | 33 ++------ 5 files changed, 40 insertions(+), 166 deletions(-) diff --git a/runtime/io/vx_io.c b/runtime/io/vx_io.c index 79f5f8ca..8da5a8c0 100644 --- a/runtime/io/vx_io.c +++ b/runtime/io/vx_io.c @@ -26,7 +26,7 @@ void vx_print_hex(unsigned f) } -void vx_printf(char * c, unsigned f) +void vx_printf(const char * c, unsigned f) { vx_print_str(c); vx_print_hex(f); diff --git a/runtime/io/vx_io.h b/runtime/io/vx_io.h index faf73d6d..f4c69c57 100644 --- a/runtime/io/vx_io.h +++ b/runtime/io/vx_io.h @@ -9,9 +9,9 @@ extern "C" { static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}; void vx_print_hex(unsigned); -void vx_printf(char *, unsigned); +void vx_printf(const char *, unsigned); -void vx_print_str(char *); +void vx_print_str(const char *); void vx_printc(unsigned, char c); diff --git a/runtime/io/vx_io.s b/runtime/io/vx_io.s index 806f2790..86d90ed0 100644 --- a/runtime/io/vx_io.s +++ b/runtime/io/vx_io.s @@ -22,9 +22,11 @@ be: .type vx_printc, @function .global vx_printc vx_printc: - la t0, 0x00010000 + la t0, print_addr sw a1, 0(t0) ret - +.section .data +print_addr: + .word 0x00010000 diff --git a/runtime/vx_api/vx_api.c b/runtime/vx_api/vx_api.c index aeba2c10..98fa136a 100644 --- a/runtime/vx_api/vx_api.c +++ b/runtime/vx_api/vx_api.c @@ -1,182 +1,75 @@ -#pragma once - #include "../intrinsics/vx_intrinsics.h" #include "vx_api.h" #include -#include "../config.h" - #ifdef __cplusplus extern "C" { #endif func_t global_function_pointer; -// void (func_t)(void *) - -void * global_argument_struct; - +void * global_argument_struct; unsigned global_num_threads; -void setup_call() -{ + +void setup_call() { vx_tmc(global_num_threads); global_function_pointer(global_argument_struct); unsigned wid = vx_warpID(); - if (wid != 0) - { + if (wid != 0) { vx_tmc(0); // Halt Warp Execution - } - else - { + } else { vx_tmc(1); // Only activate one thread } } -void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) -{ +void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) { global_function_pointer = func_ptr; global_argument_struct = args; global_num_threads = numThreads; vx_wspawn(numWarps, (unsigned) setup_call); setup_call(); - } - unsigned pocl_threads; -uint8_t * pocl_args; -uint8_t * pocl_ctx; +struct context_t * pocl_ctx; vx_pocl_workgroup_func pocl_pfn; +const void * pocl_args; -unsigned global_z; -unsigned global_y; -unsigned global_x; - - -void pocl_spawn_real() -{ +void pocl_spawn_runonce() { + vx_tmc(pocl_threads); - int base_x = vx_threadID(); - int base_y = vx_warpID(); - int local_x; - int local_y; + int x = vx_threadID(); + int y = vx_warpID(); - for (int iter_z = 0; iter_z < global_z; iter_z++) - { - for (int iter_x = 0; iter_x < global_x; iter_x++) - { - for (int iter_y = 0; iter_y < global_y; iter_y++) - { + (pocl_pfn)(pocl_args, pocl_ctx, x, y, 0); - local_x = (iter_x * TOTAL_THREADS) + base_x; - local_y = (iter_y * TOTAL_WARPS ) + base_y; - - (pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z); - - } - } - } - - // (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0); - - if (base_y != 0) - { + if (y != 0) { vx_tmc(0); } + vx_tmc(1); } - -void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments) -{ - - - // printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]); - // printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]); - // printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]); - - // printf("\n\n"); - - // printf("ctx->local_size[0]: %d\n", ctx->local_size[0]); - // printf("ctx->local_size[1]: %d\n", ctx->local_size[1]); - // printf("ctx->local_size[2]: %d\n", ctx->local_size[2]); - if (ctx->num_groups[0] > TOTAL_THREADS) - { - pocl_threads = TOTAL_THREADS; - global_x = ctx->num_groups[0] / TOTAL_THREADS; - // printf("pocl_threads: %d\n", pocl_threads); - // printf("global_x: %d\n", global_x); - } - else - { - pocl_threads = ctx->num_groups[0]; - global_x = 1; - // printf("pocl_threads: %d\n", pocl_threads); - // printf("global_x: %d\n", global_x); +void pocl_spawn(struct context_t * ctx, vx_pocl_workgroup_func pfn, const void * args) { + if (ctx->num_groups[2] > 1) { + printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n"); + return; } + pocl_threads = ctx->num_groups[0]; + pocl_ctx = ctx; + pocl_pfn = pfn; + pocl_args = args; - global_z = ctx->num_groups[2]; - pocl_pfn = (vx_pocl_workgroup_func) pfn; - pocl_ctx = (uint8_t *) ctx; - pocl_args = (uint8_t *) arguments; - - if (ctx->num_groups[1] > 1) - { - if (ctx->num_groups[1] > TOTAL_WARPS) - { - global_y = ctx->num_groups[1] / TOTAL_WARPS; - vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real); - // printf("global_y: %d\n", global_y); - // printf("Warps: %d\n", TOTAL_WARPS); - } - else - { - global_y = 1; - vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real); - // printf("global_y: %d\n", global_y); - // printf("Warps: %d\n", ctx->num_groups[1]); - } + if (ctx->num_groups[1] > 1) { + vx_wspawn(ctx->num_groups[1], (unsigned)&pocl_spawn_runonce); } - unsigned starting_cycles = vx_getCycles(); - unsigned starting_inst = vx_getInst(); - - pocl_spawn_real(); - - unsigned end_cycles = vx_getCycles(); - unsigned end_inst = vx_getInst(); - - - unsigned total_cycles = (unsigned) (end_cycles - starting_cycles); - // float total_inst = (float) (end_inst - starting_inst ); - - // float ipc = total_inst/total_cycles; - - printf("%d\n", total_cycles); - - vx_tmc(0); - - // printf("pocl_spawn: Total Cycles: %d\n", ); - // printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst )); - - // int z; - // int y; - // int x; - // for (z = 0; z < ctx->num_groups[2]; ++z) - // { - // for (y = 0; y < ctx->num_groups[1]; ++y) - // { - // for (x = 0; x < ctx->num_groups[0]; ++x) - // { - // (use_pfn)((uint8_t *)arguments, (uint8_t *)ctx, x, y, z); - // } - // } - // } + pocl_spawn_runonce(); } #ifdef __cplusplus } -#endif +#endif \ No newline at end of file diff --git a/runtime/vx_api/vx_api.h b/runtime/vx_api/vx_api.h index 6737fac1..a4ffcb44 100644 --- a/runtime/vx_api/vx_api.h +++ b/runtime/vx_api/vx_api.h @@ -1,6 +1,5 @@ #ifndef VX_API_ - #define VX_API_ #include @@ -14,47 +13,27 @@ typedef void (*func_t)(void *); void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args); -// struct context_t { - -// unsigned num_groups[3]; // use {2, 1, 1} for vecadd - -// unsigned global_offset[3]; // use {0, 0, 0} for vecadd - -// unsigned local_size[3]; // use {2, 1, 1} for vecadd - -// unsigned char *printf_buffer; // zero for now - -// unsigned *printf_buffer_position; // initialized to zero - -// unsigned printf_buffer_capacity; // zero for now - -// unsigned work_dim; // use ‘1’ for vecadd - -// }; - struct context_t { uint32_t num_groups[3]; uint32_t global_offset[3]; - uint32_t local_size[3]; - uint8_t *printf_buffer; + uint32_t local_size[3]; + char * printf_buffer; uint32_t *printf_buffer_position; - uint32_t printf_buffer_capacity; + uint32_t printf_buffer_capacity; uint32_t work_dim; }; - /* The default work-group function prototype as generated by Workgroup.cc. */ -typedef void (*vx_pocl_workgroup_func) (uint8_t * /* args */, - uint8_t * /* pocl_context */, +typedef void (*vx_pocl_workgroup_func) (const void * /* args */, + const struct context_t * /* context */, uint32_t /* group_x */, uint32_t /* group_y */, uint32_t /* group_z */); -void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments); +void pocl_spawn(struct context_t * ctx, vx_pocl_workgroup_func pfn, const void * args); #ifdef __cplusplus } #endif - #endif \ No newline at end of file