added debug print states or rtl
This commit is contained in:
@@ -46,7 +46,7 @@ run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 4
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_warps;
|
||||
uint32_t num_threads;
|
||||
uint32_t stride;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
@@ -15,7 +15,7 @@
|
||||
} while (false)
|
||||
|
||||
const char* program_file = "kernel.bin";
|
||||
uint32_t data_stride = 0xffffffff;
|
||||
uint32_t data_stride = 0;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
@@ -111,19 +111,22 @@ int main(int argc, char *argv[]) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||
|
||||
if (data_stride == 0xffffffff) {
|
||||
data_stride = block_size / sizeof(uint32_t);
|
||||
if (data_stride == 0) {
|
||||
data_stride = 1;
|
||||
}
|
||||
|
||||
uint32_t num_points = max_cores * max_warps * max_threads * data_stride;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
kernel_arg.stride = data_stride;
|
||||
|
||||
uint32_t num_points = max_cores * max_warps * max_threads;
|
||||
uint32_t buf_size = num_points * data_stride * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of workitems: " << num_points << std::endl;
|
||||
std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
@@ -167,10 +170,6 @@ int main(int argc, char *argv[]) {
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.num_warps = max_warps;
|
||||
kernel_arg.num_threads = max_threads;
|
||||
kernel_arg.stride = data_stride;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
|
||||
Binary file not shown.
@@ -6,27 +6,24 @@
|
||||
|
||||
void kernel_body(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
int* x = (int*)_arg->src0_ptr;
|
||||
int* y = (int*)_arg->src1_ptr;
|
||||
int* z = (int*)_arg->dst_ptr;
|
||||
int* src0_ptr = (int*)_arg->src0_ptr;
|
||||
int* src1_ptr = (int*)_arg->src1_ptr;
|
||||
int* dst_ptr = (int*)_arg->dst_ptr;
|
||||
|
||||
unsigned wid = vx_warp_gid();
|
||||
unsigned tid = vx_thread_id();
|
||||
unsigned offset = vx_thread_gid() * _arg->stride;
|
||||
|
||||
unsigned i = ((wid * _arg->num_threads) + tid) * _arg->stride;
|
||||
|
||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
||||
z[i+j] = x[i+j] + y[i+j];
|
||||
for (unsigned i = 0; i < _arg->stride; ++i) {
|
||||
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
/*printf("num_warps=%d\n", arg->num_warps);
|
||||
printf("num_threads=%d\n", arg->num_threads);
|
||||
printf("stride=%d\n", arg->stride);
|
||||
printf("src0_ptr=0x%x\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%x\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
|
||||
vx_spawn_warps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
||||
/*printf("stride=%d\n", arg->stride);
|
||||
printf("src0_ptr=0x%src0\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%src0\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%src0\n", arg->dst_ptr);*/
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
vx_spawn_warps(num_warps, num_threads, kernel_body, arg);
|
||||
}
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user