From 6a1a506b6489db2b6a09e0e92cd9d26e31f5c056 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 12 Feb 2024 20:48:23 -0800 Subject: [PATCH] sgemm_wg: save args and input bin --- tests/regression/sgemm_wg/Makefile | 2 +- tests/regression/sgemm_wg/common.h | 3 ++- tests/regression/sgemm_wg/kernel.cpp | 18 ++++++++++---- tests/regression/sgemm_wg/main.cpp | 36 +++++++++++++++++++++++----- 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/tests/regression/sgemm_wg/Makefile b/tests/regression/sgemm_wg/Makefile index 6fbe650b..f57f6124 100644 --- a/tests/regression/sgemm_wg/Makefile +++ b/tests/regression/sgemm_wg/Makefile @@ -4,6 +4,6 @@ SRCS = main.cpp VX_SRCS = kernel.cpp -OPTS ?= -n256 +OPTS ?= -n16 include ../common.mk diff --git a/tests/regression/sgemm_wg/common.h b/tests/regression/sgemm_wg/common.h index c150a28f..b82ea12f 100644 --- a/tests/regression/sgemm_wg/common.h +++ b/tests/regression/sgemm_wg/common.h @@ -1,7 +1,8 @@ #ifndef _COMMON_H_ #define _COMMON_H_ -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 +#define KERNEL_ARG_DEV_MEM_ADDR 0x7fff0000 +#define DEV_SMEM_START_ADDR 0xff000000 typedef struct { uint32_t num_points; diff --git a/tests/regression/sgemm_wg/kernel.cpp b/tests/regression/sgemm_wg/kernel.cpp index da824888..574798f6 100644 --- a/tests/regression/sgemm_wg/kernel.cpp +++ b/tests/regression/sgemm_wg/kernel.cpp @@ -4,15 +4,23 @@ #include "common.h" void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - uint32_t num_points = arg->num_points; - float* src_ptr = (float*)arg->src_addr; - float* dst_ptr = (float*)arg->dst_addr; + uint32_t num_points = arg->num_points; + float *src_ptr = (float *)arg->src_addr; + float *dst_ptr = (float *)arg->dst_addr; - dst_ptr[task_id] = 2 * src_ptr[task_id]; + float *local_a = (float *)DEV_SMEM_START_ADDR; + + local_a[num_points - 1 - task_id] = 2 * src_ptr[num_points - 1 - task_id]; + // local_a[task_id] = 2 * src_ptr[task_id]; + + vx_barrier(0, vx_num_warps()); + + dst_ptr[task_id] = local_a[task_id]; } int main() { kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); + int threads_per_core = vx_num_warps() * vx_num_threads(); + vx_spawn_tasks(threads_per_core, (vx_spawn_tasks_cb)kernel_body, arg); return 0; } diff --git a/tests/regression/sgemm_wg/main.cpp b/tests/regression/sgemm_wg/main.cpp index b52b6e6a..f03a44c0 100644 --- a/tests/regression/sgemm_wg/main.cpp +++ b/tests/regression/sgemm_wg/main.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -66,15 +67,15 @@ void gen_input_data(uint32_t len) { src_data.resize(len); for (uint32_t i = 0; i < len; ++i) { - src_data[i] = (float)i; + src_data[i] = static_cast(i); std::cout << i << ": value=" << src_data[i] << std::endl; } } -void gen_ref_data(uint32_t num_points) { - ref_data.resize(num_points); +void gen_ref_data(uint32_t len) { + ref_data.resize(len); - for (uint32_t i = 0; i < num_points; ++i) { + for (uint32_t i = 0; i < len; ++i) { float ref_value = 2 * src_data.at(i); ref_data.at(i) = ref_value; } @@ -141,8 +142,8 @@ int main(int argc, char *argv[]) { // generate reference data gen_ref_data(num_points); - uint32_t src_buf_size = src_data.size() * sizeof(int32_t); - uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t); + uint32_t src_buf_size = src_data.size() * sizeof(src_data[0]); + uint32_t dst_buf_size = ref_data.size() * sizeof(src_data[0]); std::cout << "number of points: " << num_points << std::endl; std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; @@ -176,6 +177,18 @@ int main(int argc, char *argv[]) { auto buf_ptr = staging_buf.data(); memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); + + std::cout << "uploading argument buffer to device, device mem address=" + << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec + << sizeof(kernel_arg_t) << " bytes\n"; + std::ofstream file("args.bin", std::ios::binary | std::ios::out); + if (!file) { + std::cerr << "error: failed to open args.bin for writing\n"; + exit(EXIT_FAILURE); + } + file.write(reinterpret_cast(staging_buf.data()), + sizeof(kernel_arg_t)); + file.close(); } // upload source buffer @@ -184,6 +197,17 @@ int main(int argc, char *argv[]) { auto buf_ptr = staging_buf.data(); memcpy(buf_ptr, src_data.data(), num_points * sizeof(float)); RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), src_buf_size)); + + std::cout << "uploading source buffer to device, device mem address=" + << std::hex << kernel_arg.src_addr << ", size=" << std::dec + << src_buf_size << " bytes\n"; + std::ofstream file("input.bin", std::ios::binary | std::ios::out); + if (!file) { + std::cerr << "error: failed to open args.bin for writing\n"; + exit(EXIT_FAILURE); + } + file.write(reinterpret_cast(buf_ptr), src_buf_size); + file.close(); } // clear destination buffer