project directories reorganization
This commit is contained in:
@@ -1,67 +0,0 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../sw/include
|
||||
|
||||
LDFLAGS +=
|
||||
|
||||
PROJECT = basic
|
||||
|
||||
SRCS = basic.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.hex: kernel.elf
|
||||
$(VX_CPY) -O ihex kernel.elf kernel.hex
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
Binary file not shown.
@@ -1,233 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <vortex.h>
|
||||
|
||||
int test = -1;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 't': {
|
||||
test = atoi(optarg);
|
||||
} break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
std::cout << "Test." << std::endl;
|
||||
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t shuffle(int i, uint64_t value) {
|
||||
return (value << i) | (value & ((1 << i)-1));;
|
||||
}
|
||||
|
||||
int run_memcopy_test(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, value);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (address + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_kernel_test(vx_device_h device,
|
||||
vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
const char* program) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
int num_blocks = 4;
|
||||
|
||||
unsigned src_dev_addr = 0x10000000;
|
||||
unsigned dest_dev_addr = 0x20000000;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, seed);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h sbuf = nullptr;
|
||||
vx_buffer_h dbuf = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (sbuf) {
|
||||
vx_buf_release(sbuf);
|
||||
}
|
||||
if (dbuf) {
|
||||
vx_buf_release(dbuf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
vx_device_h device;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// create source buffer
|
||||
std::cout << "create source buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// create destination buffer
|
||||
std::cout << "create destination buffer" << std::endl;
|
||||
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
std::cout << "run kernel test" << std::endl;
|
||||
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "Test PASSED" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
Binary file not shown.
@@ -1,9 +0,0 @@
|
||||
#include <stdint.h>
|
||||
|
||||
void main() {
|
||||
int64_t* x = (int64_t*)0x10000000;
|
||||
int64_t* y = (int64_t*)0x20000000;
|
||||
for (int i = 0; i < 8 * 4; ++i) {
|
||||
y[i] = x[i];
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../sw/include
|
||||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = demo.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.hex: kernel.elf
|
||||
$(VX_CPY) -O ihex kernel.elf kernel.hex
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
@@ -1,15 +0,0 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7fffff00
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_warps;
|
||||
uint32_t num_threads;
|
||||
uint32_t stride;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
Binary file not shown.
@@ -1,241 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
const char* program_file = "kernel.bin";
|
||||
uint32_t data_stride = 0xffffffff;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:f:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
data_stride = atoi(optarg);
|
||||
break;
|
||||
case 'f':
|
||||
program_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nullptr == program_file) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(vx_device_h device,
|
||||
vx_buffer_h buffer,
|
||||
const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
int ret;
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
ret = vx_start(device);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
ret = vx_ready_wait(device, -1);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// flush the destination buffer caches
|
||||
std::cout << "flush the destination buffer caches" << std::endl;
|
||||
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = i + i;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int ret;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||
|
||||
if (data_stride == 0xffffffff) {
|
||||
data_stride = block_size / sizeof(uint32_t);
|
||||
}
|
||||
|
||||
uint32_t num_points = max_cores * max_warps * max_threads * data_stride;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of workitems: " << num_points << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
ret = vx_dev_open(&device);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
ret = vx_upload_kernel_file(device, program_file);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.src0_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.src1_ptr = value;
|
||||
|
||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// populate source buffer values
|
||||
std::cout << "populate source buffer values" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// upload source buffers
|
||||
std::cout << "upload source buffers" << std::endl;
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.num_warps = max_warps;
|
||||
kernel_arg.num_threads = max_threads;
|
||||
kernel_arg.stride = data_stride;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
Binary file not shown.
@@ -1,32 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "intrinsics/vx_intrinsics.h"
|
||||
#include "vx_api/vx_api.h"
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
int* x = (int*)_arg->src0_ptr;
|
||||
int* y = (int*)_arg->src1_ptr;
|
||||
int* z = (int*)_arg->dst_ptr;
|
||||
|
||||
unsigned wNo = vx_warpNum();
|
||||
unsigned tid = vx_threadID();
|
||||
|
||||
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
|
||||
|
||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
||||
z[i+j] = x[i+j] + y[i+j];
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
/*printf("num_warps=%d\n", arg->num_warps);
|
||||
printf("num_threads=%d\n", arg->num_threads);
|
||||
printf("stride=%d\n", arg->stride);
|
||||
printf("src0_ptr=0x%x\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%x\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
|
||||
vx_spawnWarps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
||||
}
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user