diff --git a/kernel/Makefile b/kernel/Makefile index 575707f8..eac133c2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -34,7 +34,7 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I./include -I../hw +CFLAGS += -I./include CFLAGS += -DXLEN_$(XLEN) PROJECT = libvortexrt diff --git a/kernel/include/VX_config.h b/kernel/include/VX_config.h new file mode 100644 index 00000000..e7a6b559 --- /dev/null +++ b/kernel/include/VX_config.h @@ -0,0 +1,685 @@ +// auto-generated by gen_config.py. DO NOT EDIT +// Generated at 2024-05-07 13:55:58.398687 + +// Translated from ./rtl/VX_config.vh: + +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VX_CONFIG_VH +#define VX_CONFIG_VH + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#endif + +#ifndef MAX +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#endif + +#ifndef CLAMP +#define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x))) +#endif + +#ifndef UP +#define UP(x) (((x) != 0) ? (x) : 1) +#endif + +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EXT_M_DISABLE +#define EXT_M_ENABLE +#endif + +#ifndef EXT_F_DISABLE +#define EXT_F_ENABLE +#endif + +#ifndef XLEN_32 +#ifndef XLEN_64 +#define XLEN_32 +#endif +#endif + +#ifdef XLEN_64 +#define XLEN 64 +#endif + +#ifdef XLEN_32 +#define XLEN 32 +#endif + +#ifdef EXT_D_ENABLE +#define FLEN_64 +#else +#define FLEN_32 +#endif + +#ifdef FLEN_64 +#define FLEN 64 +#endif + +#ifdef FLEN_32 +#define FLEN 32 +#endif + +#ifdef XLEN_64 +#ifdef FLEN_32 + #define FPU_RV64F +#endif +#endif + +#ifndef NUM_CLUSTERS +#define NUM_CLUSTERS 1 +#endif + +#ifndef NUM_CORES +#define NUM_CORES 8 +#endif + +#ifndef NUM_WARPS +#define NUM_WARPS 8 +#endif + +#ifndef NUM_THREADS +#define NUM_THREADS 8 +#endif + +#ifndef NUM_BARRIERS +#define NUM_BARRIERS 8 +#endif + +#ifndef SOCKET_SIZE +#define SOCKET_SIZE MIN(4, NUM_CORES) +#endif +#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE) + +#ifdef L2_ENABLE + #define L2_ENABLED 1 +#else + #define L2_ENABLED 0 +#endif + +#ifdef L3_ENABLE + #define L3_ENABLED 1 +#else + #define L3_ENABLED 0 +#endif + +#ifdef L1_DISABLE + #define ICACHE_DISABLE + #define DCACHE_DISABLE +#endif + +#ifndef MEM_BLOCK_SIZE +#define MEM_BLOCK_SIZE 64 +#endif + +#ifndef MEM_ADDR_WIDTH +#ifdef XLEN_64 +#define MEM_ADDR_WIDTH 48 +#else +#define MEM_ADDR_WIDTH 32 +#endif +#endif + +#ifndef L1_LINE_SIZE +#ifdef L1_DISABLE +#define L1_LINE_SIZE ((L2_ENABLED || L3_ENABLED) ? 4 : MEM_BLOCK_SIZE) +#else +#define L1_LINE_SIZE ((L2_ENABLED || L3_ENABLED) ? 16 : MEM_BLOCK_SIZE) +#endif +#endif + +#ifdef L2_ENABLE +#define L2_LINE_SIZE MEM_BLOCK_SIZE +#else +#define L2_LINE_SIZE L1_LINE_SIZE +#endif + +#ifdef L3_ENABLE +#define L3_LINE_SIZE MEM_BLOCK_SIZE +#else +#define L3_LINE_SIZE L2_LINE_SIZE +#endif + +#ifdef XLEN_64 + +#ifndef STARTUP_ADDR +#define STARTUP_ADDR 0x180000000 +#endif + +#ifndef STACK_BASE_ADDR +#define STACK_BASE_ADDR 0x1FF000000 +#endif + +#else + +#ifndef STARTUP_ADDR +#define STARTUP_ADDR 0x80000000 +#endif + +#ifndef STACK_BASE_ADDR +#define STACK_BASE_ADDR 0xFF000000 +#endif + +#endif + +#ifndef SMEM_BASE_ADDR +#define SMEM_BASE_ADDR STACK_BASE_ADDR +#endif + +#ifndef SMEM_LOG_SIZE +#define SMEM_LOG_SIZE 19 +#endif + +#ifndef IO_BASE_ADDR +#define IO_BASE_ADDR (SMEM_BASE_ADDR + (1 << SMEM_LOG_SIZE)) +#endif + +#ifndef IO_COUT_ADDR +#define IO_COUT_ADDR IO_BASE_ADDR +#endif +#define IO_COUT_SIZE MEM_BLOCK_SIZE + +#ifndef IO_CSR_ADDR +#define IO_CSR_ADDR (IO_COUT_ADDR + IO_COUT_SIZE) +#endif +#define IO_CSR_SIZE (4 * 64 * NUM_CORES * NUM_CLUSTERS) + +#ifndef STACK_LOG2_SIZE +#define STACK_LOG2_SIZE 13 +#endif +#define STACK_SIZE (1 << STACK_LOG2_SIZE) + +#define RESET_DELAY 8 + +#ifndef STALL_TIMEOUT +#define STALL_TIMEOUT (100000 * (1 ** (L2_ENABLED + L3_ENABLED))) +#endif + +#ifndef SV_DPI +#define DPI_DISABLE +#endif + +#ifndef FPU_FPNEW +#ifndef FPU_DSP +#ifndef FPU_DPI +#ifndef SYNTHESIS +#ifndef DPI_DISABLE +#define FPU_DPI +#else +#define FPU_DSP +#endif +#else +#define FPU_DSP +#endif +#endif +#endif +#endif + +#ifndef SYNTHESIS +#ifndef DPI_DISABLE +#define IMUL_DPI +#define IDIV_DPI +#endif +#endif + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 3 +#endif + +// Pipeline Configuration ///////////////////////////////////////////////////// + +// Issue width +#ifndef ISSUE_WIDTH +#define ISSUE_WIDTH NUM_WARPS +#endif + +// Number of ALU units +#ifndef NUM_ALU_LANES +#define NUM_ALU_LANES NUM_THREADS +#endif +#ifndef NUM_ALU_BLOCKS +#define NUM_ALU_BLOCKS 4 +#endif + +// Number of FPU units +#ifndef NUM_FPU_LANES +#define NUM_FPU_LANES NUM_THREADS +#endif +#ifndef NUM_FPU_BLOCKS +#define NUM_FPU_BLOCKS 2 +#endif + +// Number of LSU units +#ifndef NUM_LSU_LANES +#define NUM_LSU_LANES NUM_THREADS +#endif + +// Number of SFU units +#ifndef NUM_SFU_LANES +#define NUM_SFU_LANES MIN(NUM_THREADS, 4) +#endif + +// Size of Instruction Buffer +#ifndef IBUF_SIZE +#define IBUF_SIZE (4 * ISSUE_WIDTH) +#endif + +// Size of LSU Request Queue +#ifndef LSUQ_SIZE +#define LSUQ_SIZE (4 * NUM_WARPS * (NUM_THREADS / NUM_LSU_LANES)) +#endif + +// LSU Duplicate Address Check +#ifndef LSU_DUP_DISABLE +#define LSU_DUP_ENABLE +#endif +#ifdef LSU_DUP_ENABLE +#define LSU_DUP_ENABLED 1 +#else +#define LSU_DUP_ENABLED 0 +#endif + +#ifdef GBAR_ENABLE +#define GBAR_ENABLED 1 +#else +#define GBAR_ENABLED 0 +#endif + +#ifndef LATENCY_IMUL +#ifdef VIVADO +#define LATENCY_IMUL 4 +#endif +#ifdef QUARTUS +#define LATENCY_IMUL 3 +#endif +#ifndef LATENCY_IMUL +#define LATENCY_IMUL 4 +#endif +#endif + +// Floating-Point Units /////////////////////////////////////////////////////// + +// Size of FPU Request Queue +#ifndef FPUQ_SIZE +#define FPUQ_SIZE (2 * (NUM_THREADS / NUM_FPU_LANES)) +#endif + +// FNCP Latency +#ifndef LATENCY_FNCP +#define LATENCY_FNCP 2 +#endif + +// FMA Latency +#ifndef LATENCY_FMA +#ifdef FPU_DPI +#define LATENCY_FMA 4 +#endif +#ifdef FPU_FPNEW +#define LATENCY_FMA 4 +#endif +#ifdef FPU_DSP +#ifdef QUARTUS +#define LATENCY_FMA 4 +#endif +#ifdef VIVADO +#define LATENCY_FMA 16 +#endif +#ifndef LATENCY_FMA +#define LATENCY_FMA 4 +#endif +#endif +#endif + +// FDIV Latency +#ifndef LATENCY_FDIV +#ifdef FPU_DPI +#define LATENCY_FDIV 15 +#endif +#ifdef FPU_FPNEW +#define LATENCY_FDIV 16 +#endif +#ifdef FPU_DSP +#ifdef QUARTUS +#define LATENCY_FDIV 15 +#endif +#ifdef VIVADO +#define LATENCY_FDIV 28 +#endif +#ifndef LATENCY_FDIV +#define LATENCY_FDIV 16 +#endif +#endif +#endif + +// FSQRT Latency +#ifndef LATENCY_FSQRT +#ifdef FPU_DPI +#define LATENCY_FSQRT 10 +#endif +#ifdef FPU_FPNEW +#define LATENCY_FSQRT 16 +#endif +#ifdef FPU_DSP +#ifdef QUARTUS +#define LATENCY_FSQRT 10 +#endif +#ifdef VIVADO +#define LATENCY_FSQRT 28 +#endif +#ifndef LATENCY_FSQRT +#define LATENCY_FSQRT 16 +#endif +#endif +#endif + +// FCVT Latency +#ifndef LATENCY_FCVT +#define LATENCY_FCVT 5 +#endif + +// Icache Configurable Knobs ////////////////////////////////////////////////// + +// Cache Enable +#ifndef ICACHE_DISABLE +#define ICACHE_ENABLE +#endif +#ifdef ICACHE_ENABLE + #define ICACHE_ENABLED 1 +#else + #define ICACHE_ENABLED 0 + #define NUM_ICACHES 0 +#endif + +// Number of Cache Units +#ifndef NUM_ICACHES +#define NUM_ICACHES UP(SOCKET_SIZE / 4) +#endif + +// Cache Size +#ifndef ICACHE_SIZE +#define ICACHE_SIZE 16384 +#endif + +// Core Response Queue Size +#ifndef ICACHE_CRSQ_SIZE +#define ICACHE_CRSQ_SIZE 2 +#endif + +// Miss Handling Register Size +#ifndef ICACHE_MSHR_SIZE +#define ICACHE_MSHR_SIZE 16 +#endif + +// Memory Request Queue Size +#ifndef ICACHE_MREQ_SIZE +#define ICACHE_MREQ_SIZE 4 +#endif + +// Memory Response Queue Size +#ifndef ICACHE_MRSQ_SIZE +#define ICACHE_MRSQ_SIZE 0 +#endif + +// Number of Associative Ways +#ifndef ICACHE_NUM_WAYS +#define ICACHE_NUM_WAYS 1 +#endif + +// Dcache Configurable Knobs ////////////////////////////////////////////////// + +// Cache Enable +#ifndef DCACHE_DISABLE +#define DCACHE_ENABLE +#endif +#ifdef DCACHE_ENABLE + #define DCACHE_ENABLED 1 +#else + #define DCACHE_ENABLED 0 + #define NUM_DCACHES 0 + #define DCACHE_NUM_BANKS 1 +#endif + +// Number of Cache Units +#ifndef NUM_DCACHES +#define NUM_DCACHES UP(SOCKET_SIZE / 4) +#endif + +// Cache Size +#ifndef DCACHE_SIZE +#define DCACHE_SIZE 16384 +#endif + +// Number of Banks +#ifndef DCACHE_NUM_BANKS +#define DCACHE_NUM_BANKS NUM_LSU_LANES +#endif + +// Core Response Queue Size +#ifndef DCACHE_CRSQ_SIZE +#define DCACHE_CRSQ_SIZE 2 +#endif + +// Miss Handling Register Size +#ifndef DCACHE_MSHR_SIZE +#define DCACHE_MSHR_SIZE 8 +#endif + +// Memory Request Queue Size +#ifndef DCACHE_MREQ_SIZE +#define DCACHE_MREQ_SIZE 4 +#endif + +// Memory Response Queue Size +#ifndef DCACHE_MRSQ_SIZE +#define DCACHE_MRSQ_SIZE 0 +#endif + +// Number of Associative Ways +#ifndef DCACHE_NUM_WAYS +#define DCACHE_NUM_WAYS 1 +#endif + +// SM Configurable Knobs ////////////////////////////////////////////////////// + +#ifndef SM_DISABLE +#define SM_ENABLE +#endif + +#ifdef SM_ENABLE + #define SM_ENABLED 1 +#else + #define SM_ENABLED 0 + #define SMEM_NUM_BANKS 1 +#endif + +// Number of Banks +#ifndef SMEM_NUM_BANKS +#define SMEM_NUM_BANKS (NUM_LSU_LANES) +#endif + +// L2cache Configurable Knobs ///////////////////////////////////////////////// + +// Cache Size +#ifndef L2_CACHE_SIZE +#ifdef ALTERA_S10 +#define L2_CACHE_SIZE 2097152 +#else +#define L2_CACHE_SIZE 1048576 +#endif +#endif + +// Number of Banks +#ifndef L2_NUM_BANKS +#define L2_NUM_BANKS MIN(4, NUM_SOCKETS) +#endif + +// Core Response Queue Size +#ifndef L2_CRSQ_SIZE +#define L2_CRSQ_SIZE 2 +#endif + +// Miss Handling Register Size +#ifndef L2_MSHR_SIZE +#define L2_MSHR_SIZE 16 +#endif + +// Memory Request Queue Size +#ifndef L2_MREQ_SIZE +#define L2_MREQ_SIZE 4 +#endif + +// Memory Response Queue Size +#ifndef L2_MRSQ_SIZE +#define L2_MRSQ_SIZE 0 +#endif + +// Number of Associative Ways +#ifndef L2_NUM_WAYS +#define L2_NUM_WAYS 2 +#endif + +// L3cache Configurable Knobs ///////////////////////////////////////////////// + +// Cache Size +#ifndef L3_CACHE_SIZE +#ifdef ALTERA_S10 +#define L3_CACHE_SIZE 2097152 +#else +#define L3_CACHE_SIZE 1048576 +#endif +#endif + +// Number of Banks +#ifndef L3_NUM_BANKS +#define L3_NUM_BANKS MIN(4, NUM_CLUSTERS) +#endif + +// Core Response Queue Size +#ifndef L3_CRSQ_SIZE +#define L3_CRSQ_SIZE 2 +#endif + +// Miss Handling Register Size +#ifndef L3_MSHR_SIZE +#define L3_MSHR_SIZE 16 +#endif + +// Memory Request Queue Size +#ifndef L3_MREQ_SIZE +#define L3_MREQ_SIZE 4 +#endif + +// Memory Response Queue Size +#ifndef L3_MRSQ_SIZE +#define L3_MRSQ_SIZE 0 +#endif + +// Number of Associative Ways +#ifndef L3_NUM_WAYS +#define L3_NUM_WAYS 4 +#endif + +// ISA Extensions ///////////////////////////////////////////////////////////// + +#ifdef EXT_A_ENABLE + #define EXT_A_ENABLED 1 +#else + #define EXT_A_ENABLED 0 +#endif + +#ifdef EXT_C_ENABLE + #define EXT_C_ENABLED 1 +#else + #define EXT_C_ENABLED 0 +#endif + +#ifdef EXT_D_ENABLE + #define EXT_D_ENABLED 1 +#else + #define EXT_D_ENABLED 0 +#endif + +#ifdef EXT_F_ENABLE + #define EXT_F_ENABLED 1 +#else + #define EXT_F_ENABLED 0 +#endif + +#ifdef EXT_M_ENABLE + #define EXT_M_ENABLED 1 +#else + #define EXT_M_ENABLED 0 +#endif + +#define ISA_STD_A 0 +#define ISA_STD_C 2 +#define ISA_STD_D 3 +#define ISA_STD_E 4 +#define ISA_STD_F 5 +#define ISA_STD_H 7 +#define ISA_STD_I 8 +#define ISA_STD_N 13 +#define ISA_STD_Q 16 +#define ISA_STD_S 18 +#define ISA_STD_U 20 + +#define ISA_EXT_ICACHE 0 +#define ISA_EXT_DCACHE 1 +#define ISA_EXT_L2CACHE 2 +#define ISA_EXT_L3CACHE 3 +#define ISA_EXT_SMEM 4 + +#define MISA_EXT (ICACHE_ENABLED << ISA_EXT_ICACHE) \ + | (DCACHE_ENABLED << ISA_EXT_DCACHE) \ + | (L2_ENABLED << ISA_EXT_L2CACHE) \ + | (L3_ENABLED << ISA_EXT_L3CACHE) \ + | (SM_ENABLED << ISA_EXT_SMEM) + +#define MISA_STD (EXT_A_ENABLED << 0) /* A - Atomic Instructions extension */ \ + | (0 << 1) /* B - Tentatively reserved for Bit operations extension */ \ + | (EXT_C_ENABLED << 2) /* C - Compressed extension */ \ + | (EXT_D_ENABLED << 3) /* D - Double precsision floating-point extension */ \ + | (0 << 4) /* E - RV32E base ISA */ \ + | (EXT_F_ENABLED << 5) /* F - Single precsision floating-point extension */ \ + | (0 << 6) /* G - Additional standard extensions present */ \ + | (0 << 7) /* H - Hypervisor mode implemented */ \ + | (1 << 8) /* I - RV32I/64I/128I base ISA */ \ + | (0 << 9) /* J - Reserved */ \ + | (0 << 10) /* K - Reserved */ \ + | (0 << 11) /* L - Tentatively reserved for Bit operations extension */ \ + | (EXT_M_ENABLED << 12) /* M - Integer Multiply/Divide extension */ \ + | (0 << 13) /* N - User level interrupts supported */ \ + | (0 << 14) /* O - Reserved */ \ + | (0 << 15) /* P - Tentatively reserved for Packed-SIMD extension */ \ + | (0 << 16) /* Q - Quad-precision floating-point extension */ \ + | (0 << 17) /* R - Reserved */ \ + | (0 << 18) /* S - Supervisor mode implemented */ \ + | (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \ + | (1 << 20) /* U - User mode implemented */ \ + | (0 << 21) /* V - Tentatively reserved for Vector extension */ \ + | (0 << 22) /* W - Reserved */ \ + | (1 << 23) /* X - Non-standard extensions present */ \ + | (0 << 24) /* Y - Reserved */ \ + | (0 << 25) /* Z - Reserved */ + +// Device identification ////////////////////////////////////////////////////// + +#define VENDOR_ID 0 +#define ARCHITECTURE_ID 0 +#define IMPLEMENTATION_ID 0 + +#endif // VX_CONFIG_VH + diff --git a/kernel/include/VX_types.h b/kernel/include/VX_types.h new file mode 100644 index 00000000..7ad1ca68 --- /dev/null +++ b/kernel/include/VX_types.h @@ -0,0 +1,193 @@ +// auto-generated by gen_config.py. DO NOT EDIT +// Generated at 2024-06-15 00:25:12.935689 + +// Translated from ./rtl/VX_types.vh: + +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VX_TYPES_VH +#define VX_TYPES_VH + +// Device configuration registers + +#define VX_CSR_ADDR_BITS 12 +#define VX_DCR_ADDR_BITS 12 + +#define VX_DCR_BASE_STATE_BEGIN 0x001 +#define VX_DCR_BASE_STARTUP_ADDR0 0x001 +#define VX_DCR_BASE_STARTUP_ADDR1 0x002 +#define VX_DCR_BASE_MPM_CLASS 0x003 +#define VX_DCR_BASE_STATE_END 0x004 + +#define VX_DCR_BASE_STATE(addr) ((addr) - VX_DCR_BASE_STATE_BEGIN) +#define VX_DCR_BASE_STATE_COUNT (VX_DCR_BASE_STATE_END-VX_DCR_BASE_STATE_BEGIN) + +// Machine Performance-monitoring counters classes + +#define VX_DCR_MPM_CLASS_NONE 0 +#define VX_DCR_MPM_CLASS_CORE 1 +#define VX_DCR_MPM_CLASS_MEM 2 + +// User Floating-Point CSRs + +#define VX_CSR_FFLAGS 0x001 +#define VX_CSR_FRM 0x002 +#define VX_CSR_FCSR 0x003 + +#define VX_CSR_SATP 0x180 + +#define VX_CSR_PMPCFG0 0x3A0 +#define VX_CSR_PMPADDR0 0x3B0 + +#define VX_CSR_MSTATUS 0x300 +#define VX_CSR_MISA 0x301 +#define VX_CSR_MEDELEG 0x302 +#define VX_CSR_MIDELEG 0x303 +#define VX_CSR_MIE 0x304 +#define VX_CSR_MTVEC 0x305 + +#define VX_CSR_MEPC 0x341 + +#define VX_CSR_MNSTATUS 0x744 + +#define VX_CSR_MPM_BASE 0xB00 +#define VX_CSR_MPM_BASE_H 0xB80 +#define VX_CSR_MPM_USER 0xB03 +#define VX_CSR_MPM_USER_H 0xB83 + +// Machine Performance-monitoring core counters +// PERF: Standard +#define VX_CSR_MCYCLE 0xB00 +#define VX_CSR_MCYCLE_H 0xB80 +#define VX_CSR_MPM_RESERVED 0xB01 +#define VX_CSR_MPM_RESERVED_H 0xB81 +#define VX_CSR_MINSTRET 0xB02 +#define VX_CSR_MINSTRET_H 0xB82 +// PERF: pipeline +#define VX_CSR_MPM_SCHED_ID 0xB03 +#define VX_CSR_MPM_SCHED_ID_H 0xB83 +#define VX_CSR_MPM_SCHED_ST 0xB04 +#define VX_CSR_MPM_SCHED_ST_H 0xB84 +#define VX_CSR_MPM_IBUF_ST 0xB05 +#define VX_CSR_MPM_IBUF_ST_H 0xB85 +#define VX_CSR_MPM_SCRB_ST 0xB06 +#define VX_CSR_MPM_SCRB_ST_H 0xB86 +#define VX_CSR_MPM_SCRB_ALU 0xB07 +#define VX_CSR_MPM_SCRB_ALU_H 0xB87 +#define VX_CSR_MPM_SCRB_FPU 0xB08 +#define VX_CSR_MPM_SCRB_FPU_H 0xB88 +#define VX_CSR_MPM_SCRB_LSU 0xB09 +#define VX_CSR_MPM_SCRB_LSU_H 0xB89 +#define VX_CSR_MPM_SCRB_SFU 0xB0A +#define VX_CSR_MPM_SCRB_SFU_H 0xB8A +// PERF: memory +#define VX_CSR_MPM_IFETCHES 0xB0B +#define VX_CSR_MPM_IFETCHES_H 0xB8B +#define VX_CSR_MPM_LOADS 0xB0C +#define VX_CSR_MPM_LOADS_H 0xB8C +#define VX_CSR_MPM_STORES 0xB0D +#define VX_CSR_MPM_STORES_H 0xB8D +#define VX_CSR_MPM_IFETCH_LT 0xB0E +#define VX_CSR_MPM_IFETCH_LT_H 0xB8E +#define VX_CSR_MPM_LOAD_LT 0xB0F +#define VX_CSR_MPM_LOAD_LT_H 0xB8F +// SFU: scoreboard +#define VX_CSR_MPM_SCRB_WCTL 0xB10 +#define VX_CSR_MPM_SCRB_WCTL_H 0xB90 +#define VX_CSR_MPM_SCRB_CSRS 0xB11 +#define VX_CSR_MPM_SCRB_CSRS_H 0xB91 + +// Machine Performance-monitoring memory counters +// PERF: icache +#define VX_CSR_MPM_ICACHE_READS 0xB03 // total reads +#define VX_CSR_MPM_ICACHE_READS_H 0xB83 +#define VX_CSR_MPM_ICACHE_MISS_R 0xB04 // read misses +#define VX_CSR_MPM_ICACHE_MISS_R_H 0xB84 +#define VX_CSR_MPM_ICACHE_MSHR_ST 0xB05 // MSHR stalls +#define VX_CSR_MPM_ICACHE_MSHR_ST_H 0xB85 +// PERF: dcache +#define VX_CSR_MPM_DCACHE_READS 0xB06 // total reads +#define VX_CSR_MPM_DCACHE_READS_H 0xB86 +#define VX_CSR_MPM_DCACHE_WRITES 0xB07 // total writes +#define VX_CSR_MPM_DCACHE_WRITES_H 0xB87 +#define VX_CSR_MPM_DCACHE_MISS_R 0xB08 // read misses +#define VX_CSR_MPM_DCACHE_MISS_R_H 0xB88 +#define VX_CSR_MPM_DCACHE_MISS_W 0xB09 // write misses +#define VX_CSR_MPM_DCACHE_MISS_W_H 0xB89 +#define VX_CSR_MPM_DCACHE_BANK_ST 0xB0A // bank conflicts +#define VX_CSR_MPM_DCACHE_BANK_ST_H 0xB8A +#define VX_CSR_MPM_DCACHE_MSHR_ST 0xB0B // MSHR stalls +#define VX_CSR_MPM_DCACHE_MSHR_ST_H 0xB8B +// PERF: l2cache +#define VX_CSR_MPM_L2CACHE_READS 0xB0C // total reads +#define VX_CSR_MPM_L2CACHE_READS_H 0xB8C +#define VX_CSR_MPM_L2CACHE_WRITES 0xB0D // total writes +#define VX_CSR_MPM_L2CACHE_WRITES_H 0xB8D +#define VX_CSR_MPM_L2CACHE_MISS_R 0xB0E // read misses +#define VX_CSR_MPM_L2CACHE_MISS_R_H 0xB8E +#define VX_CSR_MPM_L2CACHE_MISS_W 0xB0F // write misses +#define VX_CSR_MPM_L2CACHE_MISS_W_H 0xB8F +#define VX_CSR_MPM_L2CACHE_BANK_ST 0xB10 // bank conflicts +#define VX_CSR_MPM_L2CACHE_BANK_ST_H 0xB90 +#define VX_CSR_MPM_L2CACHE_MSHR_ST 0xB11 // MSHR stalls +#define VX_CSR_MPM_L2CACHE_MSHR_ST_H 0xB91 +// PERF: l3cache +#define VX_CSR_MPM_L3CACHE_READS 0xB12 // total reads +#define VX_CSR_MPM_L3CACHE_READS_H 0xB92 +#define VX_CSR_MPM_L3CACHE_WRITES 0xB13 // total writes +#define VX_CSR_MPM_L3CACHE_WRITES_H 0xB93 +#define VX_CSR_MPM_L3CACHE_MISS_R 0xB14 // read misses +#define VX_CSR_MPM_L3CACHE_MISS_R_H 0xB94 +#define VX_CSR_MPM_L3CACHE_MISS_W 0xB15 // write misses +#define VX_CSR_MPM_L3CACHE_MISS_W_H 0xB95 +#define VX_CSR_MPM_L3CACHE_BANK_ST 0xB16 // bank conflicts +#define VX_CSR_MPM_L3CACHE_BANK_ST_H 0xB96 +#define VX_CSR_MPM_L3CACHE_MSHR_ST 0xB17 // MSHR stalls +#define VX_CSR_MPM_L3CACHE_MSHR_ST_H 0xB97 +// PERF: memory +#define VX_CSR_MPM_MEM_READS 0xB18 // total reads +#define VX_CSR_MPM_MEM_READS_H 0xB98 +#define VX_CSR_MPM_MEM_WRITES 0xB19 // total writes +#define VX_CSR_MPM_MEM_WRITES_H 0xB99 +#define VX_CSR_MPM_MEM_LT 0xB1A // memory latency +#define VX_CSR_MPM_MEM_LT_H 0xB9A +// PERF: smem +#define VX_CSR_MPM_SMEM_READS 0xB1B // memory reads +#define VX_CSR_MPM_SMEM_READS_H 0xB9B +#define VX_CSR_MPM_SMEM_WRITES 0xB1C // memory writes +#define VX_CSR_MPM_SMEM_WRITES_H 0xB9C +#define VX_CSR_MPM_SMEM_BANK_ST 0xB1D // bank conflicts +#define VX_CSR_MPM_SMEM_BANK_ST_H 0xB9D + +// Machine Information Registers + +#define VX_CSR_MVENDORID 0xF11 +#define VX_CSR_MARCHID 0xF12 +#define VX_CSR_MIMPID 0xF13 +#define VX_CSR_MHARTID 0xF14 + +// GPGU CSRs + +#define VX_CSR_THREAD_ID 0xCC0 +#define VX_CSR_WARP_ID 0xCC1 +#define VX_CSR_CORE_ID 0xCC2 +#define VX_CSR_WARP_MASK 0xCC3 +#define VX_CSR_THREAD_MASK 0xCC4 // warning! this value is also used in LLVM + +#define VX_CSR_NUM_THREADS 0xFC0 +#define VX_CSR_NUM_WARPS 0xFC1 +#define VX_CSR_NUM_CORES 0xFC2 + +#endif // VX_TYPES_VH + diff --git a/tests/regression/Makefile b/tests/regression/Makefile index d44c82c4..a19b87d7 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -1,89 +1,17 @@ -all: - $(MAKE) -C basic - $(MAKE) -C demo - $(MAKE) -C dogfood - $(MAKE) -C mstress - $(MAKE) -C io_addr - $(MAKE) -C printf - $(MAKE) -C diverge - $(MAKE) -C sort - $(MAKE) -C fence - $(MAKE) -C no_mf_ext - $(MAKE) -C no_smem - $(MAKE) -C vecaddx - $(MAKE) -C sgemmx +# Find all subdirectories containing a Makefile +SUBDIRS := $(shell find . -mindepth 1 -maxdepth 1 -type d -exec test -e {}/Makefile \; -print) -run-simx: - $(MAKE) -C basic run-simx - $(MAKE) -C demo run-simx - $(MAKE) -C dogfood run-simx - $(MAKE) -C mstress run-simx - $(MAKE) -C io_addr run-simx - $(MAKE) -C printf run-simx - $(MAKE) -C diverge run-simx - $(MAKE) -C sort run-simx - $(MAKE) -C fence run-simx - $(MAKE) -C no_mf_ext run-simx - $(MAKE) -C no_smem run-simx - $(MAKE) -C vecaddx run-simx - $(MAKE) -C sgemmx run-simx +.PHONY: all $(SUBDIRS) clean clean-all -run-rtlsim: - $(MAKE) -C basic run-rtlsim - $(MAKE) -C demo run-rtlsim - $(MAKE) -C dogfood run-rtlsim - $(MAKE) -C mstress run-rtlsim - $(MAKE) -C io_addr run-rtlsim - $(MAKE) -C printf run-rtlsim - $(MAKE) -C diverge run-rtlsim - $(MAKE) -C sort run-rtlsim - $(MAKE) -C fence run-rtlsim - $(MAKE) -C no_mf_ext run-rtlsim - $(MAKE) -C no_smem run-rtlsim - $(MAKE) -C vecaddx run-rtlsim - $(MAKE) -C sgemmx run-rtlsim +# Default target: run make in all subdirectories +all: $(SUBDIRS) -run-opae: - $(MAKE) -C basic run-opae - $(MAKE) -C demo run-opae - $(MAKE) -C dogfood run-opae - $(MAKE) -C mstress run-opae - $(MAKE) -C io_addr run-opae - $(MAKE) -C printf run-opae - $(MAKE) -C diverge run-opae - $(MAKE) -C sort run-opae - $(MAKE) -C fence run-opae - $(MAKE) -C no_mf_ext run-opae - $(MAKE) -C no_smem run-opae - $(MAKE) -C vecaddx run-opae - $(MAKE) -C sgemmx run-opae +$(SUBDIRS): + $(MAKE) -C $@ +# Clean target: run make clean in all subdirectories clean: - $(MAKE) -C basic clean - $(MAKE) -C demo clean - $(MAKE) -C dogfood clean - $(MAKE) -C mstress clean - $(MAKE) -C io_addr clean - $(MAKE) -C printf clean - $(MAKE) -C diverge clean - $(MAKE) -C sort clean - $(MAKE) -C fence clean - $(MAKE) -C no_mf_ext clean - $(MAKE) -C no_smem clean - $(MAKE) -C vecaddx clean - $(MAKE) -C sgemmx clean + for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean; done clean-all: - $(MAKE) -C basic clean-all - $(MAKE) -C demo clean-all - $(MAKE) -C dogfood clean-all - $(MAKE) -C mstress clean-all - $(MAKE) -C io_addr clean-all - $(MAKE) -C printf clean-all - $(MAKE) -C diverge clean-all - $(MAKE) -C sort clean-all - $(MAKE) -C fence clean-all - $(MAKE) -C no_mf_ext clean-all - $(MAKE) -C no_smem clean-all - $(MAKE) -C vecaddx clean-all - $(MAKE) -C sgemmx clean-all + for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean-all; done diff --git a/tests/regression/bad_apple/Makefile b/tests/regression/bad_apple/Makefile index e83a1a8e..e3ea9d26 100644 --- a/tests/regression/bad_apple/Makefile +++ b/tests/regression/bad_apple/Makefile @@ -1,7 +1,5 @@ PROJECT = bad_apple -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/bad_apple/bad_apple b/tests/regression/bad_apple/bad_apple deleted file mode 100755 index 67ade61b..00000000 Binary files a/tests/regression/bad_apple/bad_apple and /dev/null differ diff --git a/tests/regression/bad_apple/main.cpp b/tests/regression/bad_apple/main.cpp deleted file mode 100644 index 54531062..00000000 --- a/tests/regression/bad_apple/main.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.addr_a); - vx_mem_free(device, kernel_arg.addr_b); - vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - kernel_arg.addr_a = (uint64_t) 0x20000; - kernel_arg.addr_b = (uint64_t) 0x28000; - kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/basic/Makefile b/tests/regression/basic/Makefile deleted file mode 100644 index 06d4c088..00000000 --- a/tests/regression/basic/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -PROJECT = basic - -SRCS = main.cpp - -VX_SRCS = kernel.cpp ../../../kernel/src/vx_perf.c start.S - -OPTS ?= -n256 - -include ../common.mk - -VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) - -VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc -VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++ -VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump -VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy diff --git a/tests/regression/basic/common.h b/tests/regression/basic/common.h deleted file mode 100644 index 88748dc9..00000000 --- a/tests/regression/basic/common.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t count; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/basic/kernel.cpp b/tests/regression/basic/kernel.cpp deleted file mode 100644 index fae6019b..00000000 --- a/tests/regression/basic/kernel.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include "common.h" - -int main() { - kernel_arg_t* __UNIFORM__ arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - uint32_t count = arg->count; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - uint32_t offset = vx_core_id() * count; - - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset + i] = src_ptr[offset + i]; - } - - return 0; -} diff --git a/tests/regression/basic/main.cpp b/tests/regression/basic/main.cpp deleted file mode 100755 index 0f6f3bde..00000000 --- a/tests/regression/basic/main.cpp +++ /dev/null @@ -1,279 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -int test = -1; -uint32_t count = 0; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 't': - test = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -uint64_t shuffle(int i, uint64_t value) { - return (value << i) | (value & ((1 << i)-1));; -} - -int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { - int errors = 0; - - auto time_start = std::chrono::high_resolution_clock::now(); - - int num_blocks_8 = (64 * num_blocks) / 8; - - // update source buffer - for (int i = 0; i < num_blocks_8; ++i) { - ((uint64_t*)staging_buf.data())[i] = shuffle(i, value); - } - - /*for (int i = 0; i < num_blocks; ++i) { - std::cout << "data[" << i << "]=0x"; - for (int j = 7; j >= 0; --j) { - std::cout << std::hex << ((uint64_t*)staging_buf.data())[i * 8 +j]; - } - std::cout << std::endl; - }*/ - - // write source buffer to local memory - std::cout << "write source buffer to local memory" << std::endl; - auto t0 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_to_dev(device, dev_addr, staging_buf.data(), 64 * num_blocks)); - auto t1 = std::chrono::high_resolution_clock::now(); - - // clear destination buffer - for (int i = 0; i < num_blocks_8; ++i) { - ((uint64_t*)staging_buf.data())[i] = 0; - } - - // read destination buffer from local memory - std::cout << "read destination buffer from local memory" << std::endl; - auto t2 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), dev_addr, 64 * num_blocks)); - auto t3 = std::chrono::high_resolution_clock::now(); - - // verify result - std::cout << "verify result" << std::endl; - for (int i = 0; i < num_blocks_8; ++i) { - auto curr = ((uint64_t*)staging_buf.data())[i]; - auto ref = shuffle(i, value); - if (curr != ref) { - std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i) - << ": actual 0x" << curr << ", expected 0x" << ref << std::endl; - ++errors; - } - } - - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - - auto time_end = std::chrono::high_resolution_clock::now(); - - double elapsed; - elapsed = std::chrono::duration_cast(t1 - t0).count(); - printf("upload time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(t3 - t2).count(); - printf("download time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(time_end - time_start).count(); - printf("Total elapsed time: %lg ms\n", elapsed); - - return 0; -} - -int run_kernel_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - int errors = 0; - - auto time_start = std::chrono::high_resolution_clock::now(); - - // update source buffer - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i; - } - } - auto t0 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), buf_size)); - auto t1 = std::chrono::high_resolution_clock::now(); - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - } - - // start device - std::cout << "start execution" << std::endl; - auto t2 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_start(device)); - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - auto t3 = std::chrono::high_resolution_clock::now(); - - // read destination buffer from local memory - std::cout << "read destination buffer from local memory" << std::endl; - auto t4 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - auto t5 = std::chrono::high_resolution_clock::now(); - - - // verify result - std::cout << "verify result" << std::endl; - for (uint32_t i = 0; i < num_points; ++i) { - int32_t curr = ((int32_t*)staging_buf.data())[i]; - int32_t ref = i; - if (curr != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << curr << ", expected 0x" << ref << std::endl; - ++errors; - } - } - - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - - auto time_end = std::chrono::high_resolution_clock::now(); - - double elapsed; - elapsed = std::chrono::duration_cast(t1 - t0).count(); - printf("upload time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(t3 - t2).count(); - printf("execute time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(t5 - t4).count(); - printf("download time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(time_end - time_start).count(); - printf("Total elapsed time: %lg ms\n", elapsed); - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - - uint32_t num_points = count * num_cores; - uint32_t num_blocks = (num_points * sizeof(int32_t) + 63) / 64; - uint32_t buf_size = num_blocks * 64; - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.count = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // run tests - if (0 == test || -1 == test) { - std::cout << "run memcopy test" << std::endl; - RT_CHECK(run_memcopy_test(kernel_arg.src_addr, 0x0badf00d40ff40ff, num_blocks)); - } - - if (1 == test || -1 == test) { - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "run kernel test" << std::endl; - RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points)); - } - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "Test PASSED" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/basic/start.S b/tests/regression/basic/start.S deleted file mode 100644 index 0cc469b6..00000000 --- a/tests/regression/basic/start.S +++ /dev/null @@ -1,13 +0,0 @@ -.section .init, "ax" -.global _start -.type _start, @function -_start: - # call main routine - call main - - # dump perf counter - call vx_perf_dump - - # end execution - .insn r 0x0b, 0, 0, x0, x0, x0 -.size _start, .-_start \ No newline at end of file diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 96c5965a..76bac290 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -2,11 +2,6 @@ XLEN ?= 32 TOOLDIR ?= /opt -TARGET ?= opaesim - -XRT_SYN_DIR ?= ../../../hw/syn/xilinx/xrt -XRT_DEVICE_INDEX ?= 0 - ifeq ($(XLEN),64) RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain VX_CFLAGS += -march=rv64imafd -mabi=lp64d @@ -24,13 +19,12 @@ VORTEX_RT_PATH ?= $(realpath ../../../runtime) VORTEX_KN_PATH ?= $(realpath ../../../kernel) GEMMINI_SW_PATH ?= $(realpath ../../../gemmini) -FPGA_BIN_DIR ?= $(VORTEX_RT_PATH)/opae - LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex + #LLVM_CFLAGS += -mllvm -vortex-branch-divergence=2 #LLVM_CFLAGS += -mllvm -print-after-all #LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0/$(RISCV_PREFIX) @@ -53,14 +47,14 @@ VX_CFLAGS += -mcmodel=medany -fno-rtti -fno-exceptions -nostartfiles -fdata-sect # comment out below for regression/basic, which uses GCC that doesn't # understand these flags VX_CFLAGS += -mllvm -inline-threshold=262144 -VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(VORTEX_KN_PATH)/../hw -I$(GEMMINI_SW_PATH) +VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(GEMMINI_SW_PATH) VX_CFLAGS += -DNDEBUG -DLLVM_VORTEX # VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortexrt.a VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortexrt.a $(VORTEX_KN_PATH)/tohost.S CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_KN_PATH)/../hw +CXXFLAGS += -I$(VORTEX_RT_PATH)/include LDFLAGS += -L$(VORTEX_RT_PATH)/stub -lvortex @@ -71,53 +65,22 @@ else CXXFLAGS += -O2 -DNDEBUG endif -ifeq ($(TARGET), fpga) - OPAE_DRV_PATHS ?= libopae-c.so -else -ifeq ($(TARGET), asesim) - OPAE_DRV_PATHS ?= libopae-c-ase.so -else -ifeq ($(TARGET), opaesim) - OPAE_DRV_PATHS ?= libopae-c-sim.so -endif -endif -endif - # CONFIG is supplied from the command line to differentiate ELF files with custom suffixes CONFIGEXT = $(if $(CONFIG),.$(CONFIG),) -all: $(PROJECT) kernel.bin kernel.dump kernel.radiance.dump kernel$(CONFIGEXT).dump kernel.radiance$(CONFIGEXT).dump - -kernel.dump: kernel.elf - $(VX_DP) -D kernel.elf > kernel.dump +all: kernel.radiance.dump kernel.radiance$(CONFIGEXT).dump kernel.radiance.dump: kernel.radiance.elf $(VX_DP) -D kernel.radiance.elf > kernel.radiance.dump ifneq ($(CONFIG),) -kernel$(CONFIGEXT).dump: kernel$(CONFIGEXT).elf - $(VX_DP) -D kernel$(CONFIGEXT).elf > kernel$(CONFIGEXT).dump - kernel.radiance$(CONFIGEXT).dump: kernel.radiance$(CONFIGEXT).elf $(VX_DP) -D kernel.radiance$(CONFIGEXT).elf > kernel.radiance$(CONFIGEXT).dump endif -kernel.bin: kernel.elf kernel.radiance.elf - $(VX_CP) -O binary kernel.elf kernel.bin - OBJCOPY ?= $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy OBJCOPY_FLAGS ?= "LOAD,ALLOC,DATA,CONTENTS" BINFILES := args.bin input.a.bin input.b.bin input.c.bin -kernel.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) - $(VX_CXX) $(VX_CFLAGS) -o $@ $(VX_SRCS) $(VX_LDFLAGS) - $(OBJCOPY) --set-section-flags .operand.a=$(OBJCOPY_FLAGS) $@ - $(OBJCOPY) --set-section-flags .operand.b=$(OBJCOPY_FLAGS) $@ - $(OBJCOPY) --set-section-flags .operand.c=$(OBJCOPY_FLAGS) $@ - $(OBJCOPY) --set-section-flags .args=$(OBJCOPY_FLAGS) $@ - $(OBJCOPY) --update-section .operand.a=input.a.bin $@ || true - $(OBJCOPY) --update-section .operand.b=input.b.bin $@ || true - $(OBJCOPY) --update-section .operand.c=input.c.bin $@ || true - $(OBJCOPY) --update-section .args=args.bin $@ || true kernel.radiance.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) $(VX_CXX) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -DRADIANCE -o $@ @@ -131,41 +94,12 @@ kernel.radiance.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) $(OBJCOPY) --update-section .args=args.bin $@ || true ifneq ($(CONFIG),) -kernel$(CONFIGEXT).elf: kernel.elf - cp $< $@ - kernel.radiance$(CONFIGEXT).elf: kernel.radiance.elf cp $< $@ endif -$(PROJECT): $(SRCS) - $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ - -run-simx: $(PROJECT) kernel.bin - LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) - -run-opae: $(PROJECT) kernel.bin - SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) - -run-rtlsim: $(PROJECT) kernel.bin - LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) - -run-xrt: $(PROJECT) kernel.bin -ifeq ($(TARGET), hw) - SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) -else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) -endif - -.depend: $(SRCS) - $(CXX) $(CXXFLAGS) -MM $^ > .depend; - clean: - rm -rf $(PROJECT) *.o .depend + rm -rf *.o clean-all: clean - rm -rf kernel.elf kernel.dump - -ifneq ($(MAKECMDGOALS),clean) - -include .depend -endif + rm -rf kernel*.elf kernel*.dump diff --git a/tests/regression/common.mk.muon b/tests/regression/common.mk.muon new file mode 100644 index 00000000..35cfa5a5 --- /dev/null +++ b/tests/regression/common.mk.muon @@ -0,0 +1,145 @@ +XLEN ?= 32 + +TOOLDIR ?= /opt + +ifeq ($(XLEN),64) +RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain +VX_CFLAGS += -march=rv64imafd -mabi=lp64d +STARTUP_ADDR ?= 0x180000000 +else +RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain +VX_CFLAGS += -march=rv32imafd -mabi=ilp32f +STARTUP_ADDR ?= 0x80000000 +endif + +RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf +RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) + +VORTEX_RT_PATH ?= $(realpath ../../../runtime) +VORTEX_KN_PATH ?= $(realpath ../../../kernel) +GEMMINI_SW_PATH ?= $(realpath ../../../third_party/gemmini-rocc-tests) + +LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex + +LLVM_MUON ?= $(TOOLDIR)/llvm-muon +LLVM_MUON_32R ?= $(TOOLDIR)/llvm-muon-baseline + +LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) +LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) +LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex + +#LLVM_CFLAGS += -mllvm -vortex-branch-divergence=2 +#LLVM_CFLAGS += -mllvm -print-after-all +#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0/$(RISCV_PREFIX) +#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0 +#LLVM_CFLAGS += -Wl,-L$(RISCV_TOOLCHAIN_PATH)/lib/gcc/$(RISCV_PREFIX)/9.2.0 +#LLVM_CFLAGS += --rtlib=libgcc + +VX_CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS) +VX_CXX = $(LLVM_VORTEX)/bin/clang++ $(LLVM_CFLAGS) +VX_DP = $(LLVM_VORTEX)/bin/llvm-objdump +VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy + +MU_CC = $(LLVM_MUON)/bin/clang $(LLVM_CFLAGS) +MU_CXX = $(LLVM_MUON)/bin/clang++ $(LLVM_CFLAGS) +MU_DP = $(LLVM_MUON)/bin/llvm-objdump +MU_CP = $(LLVM_MUON)/bin/llvm-objcopy + +MU_32R_CC = $(LLVM_MUON_32R)/bin/clang $(LLVM_CFLAGS) +MU_32R_CXX = $(LLVM_MUON_32R)/bin/clang++ $(LLVM_CFLAGS) +MU_32R_DP = $(LLVM_MUON_32R)/bin/llvm-objdump +MU_32R_CP = $(LLVM_MUON_32R)/bin/llvm-objcopy + +VX_CFLAGS += -v -O2 -std=c++17 +VX_CFLAGS += -mcmodel=medany -fno-rtti -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections +# comment out below for regression/basic, which uses GCC that doesn't +# understand these flags +VX_CFLAGS += -mllvm -inline-threshold=262144 +VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(GEMMINI_SW_PATH) +VX_CFLAGS += -DNDEBUG -DLLVM_VORTEX + +MU_CFLAGS := $(VX_CFLAGS) +MU_CFLAGS += -fuse-ld=lld + +VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) +MU_LDFLAGS := $(VX_LDFLAGS) +VX_LDFLAGS += $(VORTEX_KN_PATH)/libvortexrt.a +MU_LDFLAGS += $(VORTEX_KN_PATH)/libvortexrtmuon.a $(VORTEX_KN_PATH)/tohost.S + +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -I$(VORTEX_RT_PATH)/include + +LDFLAGS += -L$(VORTEX_RT_PATH)/stub -lvortex + +# Debugigng +ifdef DEBUG + CXXFLAGS += -g -O0 +else + CXXFLAGS += -O2 -DNDEBUG +endif + +# CONFIG is supplied from the command line to differentiate ELF files with custom suffixes +CONFIGEXT = $(if $(CONFIG),.$(CONFIG),) + +all: kernel.radiance.dump kernel.radiance$(CONFIGEXT).dump kernel.radiance.32r.dump kernel.vortex.dump + +kernel.vortex.dump: kernel.vortex.elf + $(VX_DP) -D kernel.vortex.elf > kernel.vortex.dump +kernel.radiance.dump: kernel.radiance.elf + $(MU_DP) -D kernel.radiance.elf > kernel.radiance.dump +kernel.radiance.32r.dump: kernel.radiance.32r.elf + $(MU_32R_DP) -D kernel.radiance.32r.elf > kernel.radiance.32r.dump + +ifneq ($(CONFIG),) +kernel.radiance$(CONFIGEXT).dump: kernel.radiance$(CONFIGEXT).elf + $(MU_DP) -D kernel.radiance$(CONFIGEXT).elf > kernel.radiance$(CONFIGEXT).dump +endif + +OBJCOPY_FLAGS ?= "LOAD,ALLOC,DATA,CONTENTS" +BINFILES := args.bin input.a.bin input.b.bin input.c.bin + +kernel.vortex.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) + $(VX_CXX) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -DRADIANCE -o $@ + $(VX_CP) --set-section-flags .operand.a=$(OBJCOPY_FLAGS) $@ + $(VX_CP) --set-section-flags .operand.b=$(OBJCOPY_FLAGS) $@ + $(VX_CP) --set-section-flags .operand.c=$(OBJCOPY_FLAGS) $@ + $(VX_CP) --set-section-flags .args=$(OBJCOPY_FLAGS) $@ + $(VX_CP) --update-section .operand.a=input.a.bin $@ || true + $(VX_CP) --update-section .operand.b=input.b.bin $@ || true + $(VX_CP) --update-section .operand.c=input.c.bin $@ || true + $(VX_CP) --update-section .args=args.bin $@ || true + +kernel.radiance.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) + $(MU_CXX) $(MU_CFLAGS) $(VX_SRCS) $(MU_LDFLAGS) -DRADIANCE -S + $(MU_CXX) $(MU_CFLAGS) $(VX_SRCS) $(MU_LDFLAGS) -DRADIANCE -c + $(MU_CXX) $(MU_CFLAGS) $(VX_SRCS) $(MU_LDFLAGS) -DRADIANCE -o $@ + # $(MU_CP) --set-section-flags .operand.a=$(OBJCOPY_FLAGS) $@ + # $(MU_CP) --set-section-flags .operand.b=$(OBJCOPY_FLAGS) $@ + # $(MU_CP) --set-section-flags .operand.c=$(OBJCOPY_FLAGS) $@ + # $(MU_CP) --set-section-flags .args=$(OBJCOPY_FLAGS) $@ + # $(MU_CP) --update-section .operand.a=input.a.bin $@ || true + # $(MU_CP) --update-section .operand.b=input.b.bin $@ || true + # $(MU_CP) --update-section .operand.c=input.c.bin $@ || true + # $(MU_CP) --update-section .args=args.bin $@ || true + +kernel.radiance.32r.elf: $(VX_SRCS) $(VX_INCLUDES) $(BINFILES) + $(MU_32R_CXX) $(MU_CFLAGS) $(VX_SRCS) $(MU_LDFLAGS) -DRADIANCE -o $@ + $(MU_32R_CP) --set-section-flags .operand.a=$(OBJCOPY_FLAGS) $@ + $(MU_32R_CP) --set-section-flags .operand.b=$(OBJCOPY_FLAGS) $@ + $(MU_32R_CP) --set-section-flags .operand.c=$(OBJCOPY_FLAGS) $@ + $(MU_32R_CP) --set-section-flags .args=$(OBJCOPY_FLAGS) $@ + $(MU_32R_CP) --update-section .operand.a=input.a.bin $@ || true + $(MU_32R_CP) --update-section .operand.b=input.b.bin $@ || true + $(MU_32R_CP) --update-section .operand.c=input.c.bin $@ || true + $(MU_32R_CP) --update-section .args=args.bin $@ || true + +ifneq ($(CONFIG),) +kernel.radiance$(CONFIGEXT).elf: kernel.radiance.elf + cp $< $@ +endif + +clean: + rm -rf *.o + +clean-all: clean + rm -rf kernel*.elf kernel*.dump diff --git a/tests/regression/demo/Makefile b/tests/regression/demo/Makefile deleted file mode 100644 index 349f7ba4..00000000 --- a/tests/regression/demo/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = demo - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n64 - -include ../common.mk \ No newline at end of file diff --git a/tests/regression/demo/common.h b/tests/regression/demo/common.h deleted file mode 100644 index 941983ac..00000000 --- a/tests/regression/demo/common.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -#ifndef TYPE -#define TYPE float -#endif - -typedef struct { - uint32_t num_tasks; - uint32_t task_size; - uint64_t src0_addr; - uint64_t src1_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif diff --git a/tests/regression/demo/kernel.cpp b/tests/regression/demo/kernel.cpp deleted file mode 100644 index 49945440..00000000 --- a/tests/regression/demo/kernel.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto src0_ptr = reinterpret_cast(arg->src0_addr); - auto src1_ptr = reinterpret_cast(arg->src1_addr); - auto dst_ptr = reinterpret_cast(arg->dst_addr); - - uint32_t count = arg->task_size; - uint32_t offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; - } -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/demo/main.cpp b/tests/regression/demo/main.cpp deleted file mode 100644 index f14f66c3..00000000 --- a/tests/regression/demo/main.cpp +++ /dev/null @@ -1,245 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define FLOAT_ULP 6 - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -template -class Comparator {}; - -template <> -class Comparator { -public: - static const char* type_str() { - return "integer"; - } - static int generate() { - return rand(); - } - static bool compare(int a, int b, int index, int errors) { - if (a != b) { - if (errors < 100) { - printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b); - } - return false; - } - return true; - } -}; - -template <> -class Comparator { -private: - union Float_t { float f; int i; }; -public: - static const char* type_str() { - return "float"; - } - static int generate() { - return static_cast(rand()) / RAND_MAX; - } - static bool compare(float a, float b, int index, int errors) { - union fi_t { float f; int32_t i; }; - fi_t fa, fb; - fa.f = a; - fb.f = b; - auto d = std::abs(fa.i - fb.i); - if (d > FLOAT_ULP) { - if (errors < 100) { - printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b); - } - return false; - } - return true; - } -}; - -const char* kernel_file = "kernel.bin"; -uint32_t count = 16; - -vx_device_h device = nullptr; -std::vector source_data; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src0_addr); - vx_mem_free(device, kernel_arg.src1_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - auto ref = source_data[2 * i + 0] + source_data[2 * i + 1]; - auto cur = buf_ptr[i]; - if (!Comparator::compare(cur, ref, i, errors)) { - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores, num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - - uint32_t num_tasks = num_cores * num_warps * num_threads; - uint32_t num_points = count * num_tasks; - uint32_t buf_size = num_points * sizeof(TYPE); - - std::cout << "data type: " << Comparator::type_str() << std::endl; - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_tasks = num_tasks; - kernel_arg.task_size = count; - - std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::endl; - std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // generate source data - source_data.resize(2 * num_points); - for (uint32_t i = 0; i < source_data.size(); ++i) { - source_data[i] = Comparator::generate(); - } - - // upload source buffer0 - { - std::cout << "upload source buffer0" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = source_data[2 * i + 0]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, staging_buf.data(), buf_size)); - } - - // upload source buffer1 - { - std::cout << "upload source buffer1" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = source_data[2 * i + 1]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, staging_buf.data(), buf_size)); - } - - // clear destination buffer - std::cout << "clear destination buffer" << std::endl; - memset(staging_buf.data(), 0, num_points * sizeof(TYPE)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/diverge/Makefile b/tests/regression/diverge/Makefile deleted file mode 100644 index 8c56f28d..00000000 --- a/tests/regression/diverge/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = diverge - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n16 - -include ../common.mk diff --git a/tests/regression/diverge/common.h b/tests/regression/diverge/common.h deleted file mode 100644 index 5e5bf23f..00000000 --- a/tests/regression/diverge/common.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t num_points; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/diverge/kernel.cpp b/tests/regression/diverge/kernel.cpp deleted file mode 100644 index 3924b920..00000000 --- a/tests/regression/diverge/kernel.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -// Parallel Selection sort - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - int value = src_ptr[task_id]; - - // none taken - if (task_id >= 0x7fffffff) { - value = 0; - } else { - value += 2; - } - - // diverge - if (task_id > 1) { - if (task_id > 2) { - value += 6; - } else { - value += 5; - } - } else { - if (task_id > 0) { - value += 4; - } else { - value += 3; - } - } - - // all taken - if (task_id >= 0) { - value += 7; - } else { - value = 0; - } - - // loop - for (int i = 0, n = task_id; i < n; ++i) { - value += src_ptr[i]; - } - - // switch - switch (task_id) { - case 0: - value += 1; - break; - case 1: - value -= 1; - break; - case 2: - value *= 3; - break; - case 3: - value *= 5; - break; - default: - assert(task_id < arg->num_points); - break; - } - - // select - value += (task_id >= 0) ? ((task_id > 5) ? src_ptr[0] : task_id) : ((task_id < 5) ? src_ptr[1] : -task_id); - - // min/max - value += std::min(src_ptr[task_id], value); - value += std::max(src_ptr[task_id], value); - - dst_ptr[task_id] = value; -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp deleted file mode 100644 index d5de1bc1..00000000 --- a/tests/regression/diverge/main.cpp +++ /dev/null @@ -1,268 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -void gen_input_data(uint32_t num_points) { - src_data.resize(num_points); - - for (uint32_t i = 0; i < src_data.size(); ++i) { - int value = std::rand(); - src_data[i] = value; - //std::cout << std::dec << i << ": value=0x" << std::hex << value << std::endl; - } -} - -void gen_ref_data(uint32_t num_points) { - ref_data.resize(num_points); - - for (int i = 0; i < (int)ref_data.size(); ++i) { - int value = src_data.at(i); - - // none taken - if (i >= 0x7fffffff) { - value = 0; - } else { - value += 2; - } - - // diverge - if (i > 1) { - if (i > 2) { - value += 6; - } else { - value += 5; - } - } else { - if (i > 0) { - value += 4; - } else { - value += 3; - } - } - - // all taken - if (i >= 0) { - value += 7; - } else { - value = 0; - } - - // loop - for (int j = 0, n = i; j < n; ++j) { - value += src_data.at(j); - } - - // switch - switch (i) { - case 0: - value += 1; - break; - case 1: - value -= 1; - break; - case 2: - value *= 3; - break; - case 3: - value *= 5; - break; - default: - assert(i < (int)num_points); - break; - } - - // select - value += (i >= 0) ? ((i > 5) ? src_data.at(0) : i) : ((i < 5) ? src_data.at(1) : -i); - - // min/max - value += std::min(src_data.at(i), value); - value += std::max(src_data.at(i), value); - - ref_data[i] = value; - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = ref_data.at(i); - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = count; - - // generate input data - gen_input_data(num_points); - - // generate reference data - gen_ref_data(num_points); - - uint32_t src_buf_size = src_data.size() * sizeof(int32_t); - uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_points = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max(src_buf_size, - std::max(dst_buf_size, - sizeof(kernel_arg_t))); - staging_buf.resize(staging_buf_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_data.data(), num_points * sizeof(int32_t)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), src_buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/dogfood/Makefile b/tests/regression/dogfood/Makefile deleted file mode 100644 index e36be4fc..00000000 --- a/tests/regression/dogfood/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = dogfood - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n64 -x19 -x20 - -include ../common.mk \ No newline at end of file diff --git a/tests/regression/dogfood/common.h b/tests/regression/dogfood/common.h deleted file mode 100644 index 35f30d42..00000000 --- a/tests/regression/dogfood/common.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t testid; - uint32_t num_tasks; - uint32_t task_size; - uint64_t src0_addr; - uint64_t src1_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/dogfood/kernel.cpp b/tests/regression/dogfood/kernel.cpp deleted file mode 100644 index 58bf0f6a..00000000 --- a/tests/regression/dogfood/kernel.cpp +++ /dev/null @@ -1,396 +0,0 @@ -#include -#include -#include -#include -#include "common.h" - -typedef void (*PFN_Kernel)(int task_id, kernel_arg_t* __UNIFORM__ arg); - -inline float __ieee754_sqrtf (float x) { - asm ("fsqrt.s %0, %1" : "=f" (x) : "f" (x)); - return x; -} - -void kernel_iadd(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (int32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - int32_t a = src0_ptr[offset+i]; - int32_t b = src1_ptr[offset+i]; - int32_t c = a + b; - dst_ptr[offset+i] = c; - } -} - -void kernel_imul(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (int32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a * b; - dst_ptr[offset+i] = c; - } -} - -void kernel_idiv(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (int32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a / b; - dst_ptr[offset+i] = c; - } -} - -void kernel_idiv_mul(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (int32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a / b; - auto d = a * b; - auto e = c + d; - dst_ptr[offset+i] = e; - } -} - -void kernel_fadd(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - float a = src0_ptr[offset+i]; - float b = src1_ptr[offset+i]; - float c = a + b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fsub(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a - b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fmul(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a * b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fmadd(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a * b + b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fmsub(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a * b - b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fnmadd(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c =-a * b - b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fnmsub(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c =-a * b + b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fnmadd_madd(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c =-a * b - b; - auto d = a * b + b; - auto e = c + d; - dst_ptr[offset+i] = e; - } -} - -void kernel_fdiv(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a / b; - dst_ptr[offset+i] = c; - } -} - -void kernel_fdiv2(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a / b; - auto d = b / a; - auto e = c + d; - dst_ptr[offset+i] = e; - } -} - -void kernel_fsqrt(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = __ieee754_sqrtf(a * b); - dst_ptr[offset+i] = c; - } -} - -void kernel_ftoi(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (int32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a + b; - auto d = (int32_t)c; - dst_ptr[offset+i] = d; - } -} - -void kernel_ftou(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (float*)arg->src0_addr; - auto src1_ptr = (float*)arg->src1_addr; - auto dst_ptr = (uint32_t*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a + b; - auto d = (uint32_t)c; - dst_ptr[offset+i] = d; - } -} - -void kernel_itof(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a + b; - auto d = (float)c; - dst_ptr[offset+i] = d; - } -} - -void kernel_utof(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto count = arg->task_size; - auto src0_ptr = (int32_t*)arg->src0_addr; - auto src1_ptr = (int32_t*)arg->src1_addr; - auto dst_ptr = (float*)arg->dst_addr; - auto offset = task_id * count; - - for (uint32_t i = 0; i < count; ++i) { - auto a = src0_ptr[offset+i]; - auto b = src1_ptr[offset+i]; - auto c = a + b; - auto d = (float)c; - dst_ptr[offset+i] = d; - } -} - -void kernel_bar(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto num_warps = vx_num_warps(); - auto num_threads = vx_num_threads(); - - auto cid = vx_core_id(); - auto wid = vx_warp_id(); - auto tid = vx_thread_id(); - - auto src0_ptr = (uint32_t*)arg->src0_addr; - auto dst_ptr = (uint32_t*)arg->dst_addr; - - // per warp delay - uint32_t barrier_stall = 0; - for (int i = 0; i <= wid; ++i) { - barrier_stall += src0_ptr[0] * src0_ptr[i]; - } - - // memory fence - vx_fence(); - - // local barrier - vx_barrier(0, num_warps); - - // update destination - auto src_idx = (cid * num_warps + (num_warps - 1 - wid)) * num_threads + tid; - dst_ptr[task_id] = src0_ptr[src_idx] + barrier_stall; -} - -void kernel_gbar(int task_id, kernel_arg_t* __UNIFORM__ arg) { - auto num_cores = vx_num_cores(); - auto num_warps = vx_num_warps(); - auto num_threads = vx_num_threads(); - - auto cid = vx_core_id(); - auto wid = vx_warp_id(); - auto tid = vx_thread_id(); - - auto src0_ptr = (uint32_t*)arg->src0_addr; - auto dst_ptr = (uint32_t*)arg->dst_addr; - - // per core delay - uint32_t barrier_stall = 0; - for (int i = 0; i <= cid; ++i) { - for (int j = 0; j <= wid; ++j) { - barrier_stall += src0_ptr[0] * src0_ptr[i + j]; - } - } - - // memory fence - vx_fence(); - - // global barrier - vx_barrier(0x80000000, num_cores); - - // update destination - auto src_idx = ((num_cores - 1 - cid) * num_warps + (num_warps - 1 - wid)) * num_threads + tid; - dst_ptr[task_id] = src0_ptr[src_idx] + barrier_stall; -} - -static const PFN_Kernel sc_tests[] = { - kernel_iadd, - kernel_imul, - kernel_idiv, - kernel_idiv_mul, - kernel_fadd, - kernel_fsub, - kernel_fmul, - kernel_fmadd, - kernel_fmsub, - kernel_fnmadd, - kernel_fnmsub, - kernel_fnmadd_madd, - kernel_fdiv, - kernel_fdiv2, - kernel_fsqrt, - kernel_ftoi, - kernel_ftou, - kernel_itof, - kernel_utof, - kernel_bar, - kernel_gbar -}; - -int main() { - auto arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)sc_tests[arg->testid], arg); - return 0; -} diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp deleted file mode 100644 index 25baf736..00000000 --- a/tests/regression/dogfood/main.cpp +++ /dev/null @@ -1,218 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "testcases.h" -#include "common.h" - -/////////////////////////////////////////////////////////////////////////////// - -TestSuite* testSuite = nullptr; -const char* kernel_file = "kernel.bin"; -int count = 0; -std::unordered_set included; -std::unordered_set excluded; -int testid_s = 0; -int testid_e = 0; -bool stop_on_error = true; - -vx_device_h device = nullptr; -std::vector arg_buf; -std::vector src1_buf; -std::vector src2_buf; -std::vector dst_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-t: selected test] [-s: start test] [-e: end test] [-x: excluded tests]" << std::endl; - std::cout << " [-k] [-n] [-c] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 't': - included.insert(atoi(optarg)); - break; - case 'x': - excluded.insert(atoi(optarg)); - break; - case 's': - testid_s = atoi(optarg); - break; - case 'e': - testid_e = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'c': - stop_on_error = false; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (testSuite) { - delete testSuite; - } - if (device) { - vx_mem_free(device, kernel_arg.src0_addr); - vx_mem_free(device, kernel_arg.src1_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int main(int argc, char *argv[]) { - int exitcode = 0; - - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::cout << std::dec; - - std::cout << "test ids: " << testid_s << " - " << testid_e << std::endl; - std::cout << "workitem size: " << count << std::endl; - std::cout << "using kernel: " << kernel_file << std::endl; - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores, num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - - int num_tasks = num_cores * num_warps * num_threads; - int num_points = count * num_tasks; - size_t buf_size = num_points * sizeof(uint32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload kernel" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_tasks = num_tasks; - kernel_arg.task_size = count; - - std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::dec << std::endl; - std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::dec << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::dec << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - arg_buf.resize(sizeof(kernel_arg_t)); - src1_buf.resize(buf_size); - src2_buf.resize(buf_size); - dst_buf.resize(buf_size); - - // allocate test suite - testSuite = new TestSuite(device); - if (testid_e == 0) { - testid_e = (testSuite->size() - 1); - } - // execute tests - for (int t = testid_s; t <= testid_e; ++t) { - if (!included.empty()) { - if (included.count(t) == 0) - continue; - } - if (!excluded.empty()) { - if (excluded.count(t) != 0) - continue; - } - auto test = testSuite->get_test(t); - auto name = test->name(); - - std::cout << "Test" << t << ": " << name << std::endl; - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - kernel_arg.testid = t; - memcpy(arg_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, arg_buf.data(), sizeof(kernel_arg_t))); - - // get test arguments - std::cout << "get test arguments" << std::endl; - RT_CHECK(test->setup(num_points, (void*)src1_buf.data(), (void*)src2_buf.data())); - - // upload source buffer0 - std::cout << "upload source buffer0" << std::endl; - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, src1_buf.data(), buf_size)); - - // upload source buffer1 - std::cout << "upload source buffer1" << std::endl; - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, src2_buf.data(), buf_size)); - - // clear destination buffer - std::cout << "clear destination buffer" << std::endl; - for (int i = 0; i < num_points; ++i) { - ((uint32_t*)dst_buf.data())[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, dst_buf.data(), buf_size)); - - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, dst_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify destination - std::cout << "verify test result" << std::endl; - int errors = test->verify(num_points, dst_buf.data(), src1_buf.data(), src2_buf.data()); - if (errors != 0) { - std::cout << "found " << std::dec << errors << " errors!" << std::endl; - std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush; - if (stop_on_error) { - cleanup(); - exit(1); - } - exitcode = 1; - } else { - std::cout << "Test" << t << "-" << name << " PASSED!" << std::endl << std::flush; - } - } - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return exitcode; -} \ No newline at end of file diff --git a/tests/regression/dogfood/testcases.h b/tests/regression/dogfood/testcases.h deleted file mode 100644 index b2cd55e1..00000000 --- a/tests/regression/dogfood/testcases.h +++ /dev/null @@ -1,821 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -void cleanup(); - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -union Float_t { - float f; - int i; - struct { - uint32_t man : 23; - uint32_t exp : 8; - uint32_t sign : 1; - } parts; -}; - -inline float fround(float x, int32_t precision = 8) { - auto power_of_10 = std::pow(10, precision); - return std::round(x * power_of_10) / power_of_10; -} - -inline bool almost_equal_eps(float a, float b, int ulp = 128) { - auto eps = std::numeric_limits::epsilon() * (std::max(fabs(a), fabs(b)) * ulp); - auto d = fabs(a - b); - if (d > eps) { - std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl; - return false; - } - return true; -} - -inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) { - Float_t fa{a}, fb{b}; - auto d = std::abs(fa.i - fb.i); - if (d > ulp) { - std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl; - return false; - } - return true; -} - -inline bool almost_equal(float a, float b) { - if (a == b) - return true; - /*if (almost_equal_eps(a, b)) - return true;*/ - return almost_equal_ulp(a, b); -} - -class ITestCase; - -class TestSuite { -public: - TestSuite(vx_device_h device); - ~TestSuite(); - - ITestCase* get_test(int testid) const; - - void add_test(ITestCase* test); - - size_t size() const; - - vx_device_h device() const; - -private: - std::vector _tests; - vx_device_h device_; -}; - -class ITestCase { -public: - ITestCase(TestSuite* suite, const char* name) - : suite_(suite) - , name_(name) - {} - - virtual ~ITestCase() {} - - TestSuite* suite() const { - return suite_; - } - - const char* name() const { - return name_; - } - - virtual int setup(uint32_t n, void* src1, void* src2) = 0; - - virtual int verify(uint32_t n, void* dst, const void* src1, const void* src2) = 0; - -protected: - TestSuite* suite_; - const char* const name_; -}; - -class Test_IADD : public ITestCase { -public: - Test_IADD(TestSuite* suite) : ITestCase(suite, "iadd") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = n/2 - i; - b[i] = n/2 + i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - auto c = (int32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] + b[i]; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_IMUL : public ITestCase { -public: - Test_IMUL(TestSuite* suite) : ITestCase(suite, "imul") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = n/2 - i; - b[i] = n/2 + i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - auto c = (int32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] * b[i]; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_IDIV : public ITestCase { -public: - Test_IDIV(TestSuite* suite) : ITestCase(suite, "idiv") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = n/2 - i; - b[i] = n/2 + i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - auto c = (int32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] / b[i]; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_IDIV_MUL : public ITestCase { -public: - Test_IDIV_MUL(TestSuite* suite) : ITestCase(suite, "idiv-mul") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = n/2 - i; - b[i] = n/2 + i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - auto c = (int32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] / b[i]; - auto y = a[i] * b[i]; - auto ref = x + y; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FADD : public ITestCase { -public: - Test_FADD(TestSuite* suite) : ITestCase(suite, "fadd") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] + b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FSUB : public ITestCase { -public: - Test_FSUB(TestSuite* suite) : ITestCase(suite, "fsub") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] - b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FMUL : public ITestCase { -public: - Test_FMUL(TestSuite* suite) : ITestCase(suite, "fmul") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] * b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FMADD : public ITestCase { -public: - Test_FMADD(TestSuite* suite) : ITestCase(suite, "fmadd") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] * b[i] + b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FMSUB : public ITestCase { -public: - Test_FMSUB(TestSuite* suite) : ITestCase(suite, "fmsub") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] * b[i] - b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FNMADD : public ITestCase { -public: - Test_FNMADD(TestSuite* suite) : ITestCase(suite, "fnmadd") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = -a[i] * b[i] - b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FNMSUB : public ITestCase { -public: - Test_FNMSUB(TestSuite* suite) : ITestCase(suite, "fnmsub") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = -a[i] * b[i] + b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FNMADD_MADD : public ITestCase { -public: - Test_FNMADD_MADD(TestSuite* suite) : ITestCase(suite, "fnmadd-madd") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = -a[i] * b[i] - b[i]; - auto y = a[i] * b[i] + b[i]; - auto ref = x + y; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FDIV : public ITestCase { -public: - Test_FDIV(TestSuite* suite) : ITestCase(suite, "fdiv") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = a[i] / b[i]; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FDIV2 : public ITestCase { -public: - Test_FDIV2(TestSuite* suite) : ITestCase(suite, "fdiv2") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = fround((n - i) * (1.0f/n)); - b[i] = fround((n + i) * (1.0f/n)); - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] / b[i]; - auto y = b[i] / a[i]; - auto ref = x + y; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FSQRT : public ITestCase { -public: - Test_FSQRT(TestSuite* suite) : ITestCase(suite, "fsqrt") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - float q = 1.0f + (i % 64); - a[i] = q; - b[i] = q; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto ref = sqrt(a[i] * b[i]); - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FTOI : public ITestCase { -public: - Test_FTOI(TestSuite* suite) : ITestCase(suite, "ftoi") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - float q = fround(float(n/2) - i + (float(i) / n)); - a[i] = q; - b[i] = q; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (int32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] + b[i]; - auto ref = (int32_t)x; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_FTOU : public ITestCase { -public: - Test_FTOU(TestSuite* suite) : ITestCase(suite, "ftou") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (float*)src1; - auto b = (float*)src2; - for (uint32_t i = 0; i < n; ++i) { - float q = fround(i + (float(i) / n)); - a[i] = q; - b[i] = q; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (float*)src1; - auto b = (float*)src2; - auto c = (uint32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] + b[i]; - auto ref = (uint32_t)x; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_ITOF : public ITestCase { -public: - Test_ITOF(TestSuite* suite) : ITestCase(suite, "itof") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = n/2 - i; - b[i] = n/2 - i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (int32_t*)src1; - auto b = (int32_t*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] + b[i]; - auto ref = (float)x; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_UTOF : public ITestCase { -public: - Test_UTOF(TestSuite* suite) : ITestCase(suite, "utof") {} - - int setup(uint32_t n, void* src1, void* src2) override { - auto a = (uint32_t*)src1; - auto b = (uint32_t*)src2; - for (uint32_t i = 0; i < n; ++i) { - a[i] = i; - b[i] = i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* src2) override { - int errors = 0; - auto a = (uint32_t*)src1; - auto b = (uint32_t*)src2; - auto c = (float*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto x = a[i] + b[i]; - auto ref = (float)x; - if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; - ++errors; - } - } - return errors; - } -}; - -class Test_BAR : public ITestCase { -public: - Test_BAR(TestSuite* suite) : ITestCase(suite, "bar") {} - - int setup(uint32_t n, void* src1, void* /*src2*/) override { - RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_WARPS, &num_warps_)); - if (num_warps_ == 1) { - std::cout << "Error: multiple warps configuration required!" << std::endl; - return -1; - } - RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_THREADS, &num_threads_)); - auto a = (uint32_t*)src1; - for (uint32_t i = 0; i < n; ++i) { - a[i] = i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* /*src2*/) override { - int errors = 0; - auto a = (uint32_t*)src1; - auto c = (uint32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto tid = i % num_threads_; - auto wid = (i / num_threads_) % num_warps_; - auto cid = i / (num_warps_ * num_threads_); - auto src_idx = (cid * num_warps_ + (num_warps_ - 1 - wid)) * num_threads_ + tid; - uint32_t ref = a[src_idx]; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; - ++errors; - } - } - return errors; - } - - uint64_t num_warps_; - uint64_t num_threads_; -}; - -class Test_GBAR : public ITestCase { -public: - Test_GBAR(TestSuite* suite) : ITestCase(suite, "gbar") {} - - int setup(uint32_t n, void* src1, void* /*src2*/) override { - RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_CORES, &num_cores_)); - if (num_cores_ == 1) { - std::cout << "Error: multiple cores configuration required!" << std::endl; - return -1; - } - RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_WARPS, &num_warps_)); - RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_THREADS, &num_threads_)); - auto a = (uint32_t*)src1; - for (uint32_t i = 0; i < n; ++i) { - a[i] = i; - } - return 0; - } - - int verify(uint32_t n, void* dst, const void* src1, const void* /*src2*/) override { - int errors = 0; - auto a = (uint32_t*)src1; - auto c = (uint32_t*)dst; - for (uint32_t i = 0; i < n; ++i) { - auto tid = i % num_threads_; - auto wid = (i / num_threads_) % num_warps_; - auto cid = i / (num_warps_ * num_threads_); - auto src_idx = ((num_cores_ - 1 - cid) * num_warps_ + (num_warps_ - 1 - wid)) * num_threads_ + tid; - uint32_t ref = a[src_idx]; - if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; - ++errors; - } - } - return errors; - } - - uint64_t num_cores_; - uint64_t num_warps_; - uint64_t num_threads_; -}; - -/////////////////////////////////////////////////////////////////////////////// - -TestSuite::TestSuite(vx_device_h device) - : device_(device) { - this->add_test(new Test_IADD(this)); - this->add_test(new Test_IMUL(this)); - this->add_test(new Test_IDIV(this)); - this->add_test(new Test_IDIV_MUL(this)); - this->add_test(new Test_FADD(this)); - this->add_test(new Test_FSUB(this)); - this->add_test(new Test_FMUL(this)); - this->add_test(new Test_FMADD(this)); - this->add_test(new Test_FMSUB(this)); - this->add_test(new Test_FNMADD(this)); - this->add_test(new Test_FNMSUB(this)); - this->add_test(new Test_FNMADD_MADD(this)); - this->add_test(new Test_FDIV(this)); - this->add_test(new Test_FDIV2(this)); - this->add_test(new Test_FSQRT(this)); - this->add_test(new Test_FTOI(this)); - this->add_test(new Test_FTOU(this)); - this->add_test(new Test_ITOF(this)); - this->add_test(new Test_UTOF(this)); - this->add_test(new Test_BAR(this)); - this->add_test(new Test_GBAR(this)); -} - -TestSuite::~TestSuite() { - for (size_t i = 0; i < _tests.size(); ++i) { - delete _tests[i]; - } -} - -ITestCase* TestSuite::get_test(int testid) const { - return _tests.at(testid); -} - -void TestSuite::add_test(ITestCase* test) { - _tests.push_back(test); -} - -size_t TestSuite::size() const { - return _tests.size(); -} - -vx_device_h TestSuite::device() const { - return device_; -} \ No newline at end of file diff --git a/tests/regression/fence/Makefile b/tests/regression/fence/Makefile deleted file mode 100644 index b2c0bddf..00000000 --- a/tests/regression/fence/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = fence - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n64 - -include ../common.mk diff --git a/tests/regression/fence/common.h b/tests/regression/fence/common.h deleted file mode 100644 index a57e5484..00000000 --- a/tests/regression/fence/common.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t num_tasks; - uint32_t task_size; - uint64_t src0_addr; - uint64_t src1_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/fence/kernel.cpp b/tests/regression/fence/kernel.cpp deleted file mode 100644 index 15e1c25e..00000000 --- a/tests/regression/fence/kernel.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - uint32_t count = arg->task_size; - int32_t* src0_ptr = (int32_t*)arg->src0_addr; - int32_t* src1_ptr = (int32_t*)arg->src1_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - uint32_t offset = task_id * count; - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; - } - - vx_fence(); -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp deleted file mode 100644 index c9225edc..00000000 --- a/tests/regression/fence/main.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src0_addr); - vx_mem_free(device, kernel_arg.src1_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = i + i; - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores, num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - - uint32_t num_tasks = num_cores * num_warps * num_threads; - uint32_t num_points = count * num_tasks; - uint32_t buf_size = num_points * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_tasks = num_tasks; - kernel_arg.task_size = count; - - std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::endl; - std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer0 - { - std::cout << "upload source buffer0" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i-1; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, staging_buf.data(), buf_size)); - } - - // upload source buffer1 - { - std::cout << "upload source buffer1" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i+1; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, staging_buf.data(), buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/flash_attention/Makefile b/tests/regression/flash_attention/Makefile index 3a25e4f3..b7818e74 100644 --- a/tests/regression/flash_attention/Makefile +++ b/tests/regression/flash_attention/Makefile @@ -1,7 +1,5 @@ PROJECT = flash_attention -SRCS = main.cpp common.h - # VX_SRCS = kernel.cpp # VX_SRCS = kernel.gemmini.warpspec.cpp VX_SRCS = kernel.gemmini.cpp diff --git a/tests/regression/flash_attention/main.cpp b/tests/regression/flash_attention/main.cpp deleted file mode 100644 index b1b8d522..00000000 --- a/tests/regression/flash_attention/main.cpp +++ /dev/null @@ -1,166 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "common.h" -#include "half.hpp" - -using half_float::half; -using half_float::half_cast; - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.addr_a); - // vx_mem_free(device, kernel_arg.addr_b); - // vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_o, buf_size)); - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t dim_seqlen = 128; - uint32_t dim_headdim = 64; - - using float_type = half; - - uint32_t dst_buf_size = - dim_seqlen * dim_headdim * sizeof(ref_data[0]); - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - kernel_arg.addr_q = 0xa0000000; - kernel_arg.addr_k = 0xa1000000; - kernel_arg.addr_v = 0xa2000000; - kernel_arg.addr_o = 0xc0000000; - - kernel_arg.dim_seqlen = dim_seqlen; - kernel_arg.dim_headdim = dim_headdim; - - std::cout << "dev_addr_q=0x" << std::hex << kernel_arg.addr_q << std::endl; - std::cout << "dev_addr_k=0x" << std::hex << kernel_arg.addr_k << std::endl; - std::cout << "dev_addr_v=0x" << std::hex << kernel_arg.addr_v << std::endl; - std::cout << "dev_addr_o=0x" << std::hex << kernel_arg.addr_o << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = sizeof(kernel_arg_t); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/flops/.gitignore b/tests/regression/flops/.gitignore deleted file mode 100644 index c791df5d..00000000 --- a/tests/regression/flops/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.bin -*.dump -*.elf -flops -.depend diff --git a/tests/regression/flops/Makefile b/tests/regression/flops/Makefile deleted file mode 100644 index b5d37285..00000000 --- a/tests/regression/flops/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = flops - -SRCS = main.cpp common.h - -VX_SRCS = kernel.cpp - -OPTS ?= -n16 - -include ../common.mk diff --git a/tests/regression/flops/common.h b/tests/regression/flops/common.h deleted file mode 100644 index a609a0b4..00000000 --- a/tests/regression/flops/common.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#include - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7fff0000 -#define DEV_SMEM_START_ADDR 0xff000000 - -typedef struct { - uint32_t size; - uint32_t addr_src; - uint32_t addr_dst; -} kernel_arg_t; - -#endif diff --git a/tests/regression/flops/flops b/tests/regression/flops/flops deleted file mode 100755 index dfd6a6c8..00000000 Binary files a/tests/regression/flops/flops and /dev/null differ diff --git a/tests/regression/flops/kernel.cpp b/tests/regression/flops/kernel.cpp deleted file mode 100644 index 773e4b95..00000000 --- a/tests/regression/flops/kernel.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { - const float *A = (const float *)arg->addr_src; - float *C = (float *)arg->addr_dst; - - int incr = A[task_id]; - float sum = 0.0f; - float sum1 = 0.0f; - float sum2 = 0.0f; - float sum3 = 0.0f; - float sum4 = 0.0f; - float sum5 = 0.0f; -#pragma unroll 8 - for (int i = 0; i < 5000; i++) { - sum1 = sum2 + 5.0f; - sum2 = sum3 + 5.0f; - sum3 = sum4 + 5.0f; - sum4 = sum5 + 5.0f; - sum5 = sum1 + 5.0f; - } - - sum = sum1 + sum2 + sum3 + sum4 + sum5; - C[task_id] = static_cast(sum); -} - -int main() { - kernel_arg_t *arg = (kernel_arg_t *)KERNEL_ARG_DEV_MEM_ADDR; - const uint32_t grid_size = arg->size; -#ifdef RADIANCE - vx_spawn_tasks_cluster(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); -#else - // NOTE: This kernel assumes contiguous thread scheduling for efficient shared - // memory allocation, and therefore does not work with original vx_spawn_tasks - vx_spawn_tasks_contiguous(grid_size, (vx_spawn_tasks_cb)kernel_body, arg); -#endif - return 0; -} diff --git a/tests/regression/flops/main.cpp b/tests/regression/flops/main.cpp deleted file mode 100644 index 72aa56ba..00000000 --- a/tests/regression/flops/main.cpp +++ /dev/null @@ -1,252 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.addr_a); - // vx_mem_free(device, kernel_arg.addr_b); - // vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_data(size_t size) { - src_data.resize(size); - - for (uint32_t i = 0; i < src_data.size(); ++i) { - src_data[i] = static_cast(i); - } -} - -void generate_reference_data(size_t size) { - ref_data.resize(size); - - for (uint32_t i = 0; i < ref_data.size(); ++i) { - ref_data[i] = static_cast(i) * 1000.0f; - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t size) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_dst, buf_size)); - - std::cout << "downloading result C matrix from device, device mem address=" - << std::hex << kernel_arg.addr_dst << ", size=" << std::dec - << buf_size << " bytes\n"; - std::ofstream file("output.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open output.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), buf_size); - file.close(); - - std::ofstream ref_file("reference.bin", std::ios::binary | std::ios::out); - if (!ref_file) { - std::cerr << "error: failed to open reference.bin for writing\n"; - exit(EXIT_FAILURE); - } - ref_file.write(reinterpret_cast(ref_data.data()), buf_size); - ref_file.close(); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < size; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - size_t size = 64; - - generate_source_data(size); - generate_reference_data(size); - - uint32_t src_buf_size = src_data.size() * sizeof(src_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(ref_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - // RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_src)); - // RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_dst)); - kernel_arg.addr_src = 0x20000UL; - kernel_arg.addr_dst = 0xc0000000UL; - kernel_arg.size = size; - - std::cout << "dev_addr_src=0x" << std::hex << kernel_arg.addr_src << std::endl; - std::cout << "dev_addr_dst=0x" << std::hex << kernel_arg.addr_dst << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_buf_size, - std::max( - src_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_data.data(), src_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_src, staging_buf.data(), - src_buf_size)); - - std::cout << "uploading source data to device, device mem address=" - << std::hex << kernel_arg.addr_src << ", size=" << std::dec - << src_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open input.a.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_dst, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.size)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/idle/Makefile b/tests/regression/idle/Makefile index b5c78ec0..b81729a2 100644 --- a/tests/regression/idle/Makefile +++ b/tests/regression/idle/Makefile @@ -1,7 +1,5 @@ PROJECT = idle -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/idle/main.cpp b/tests/regression/idle/main.cpp deleted file mode 100644 index 45548d91..00000000 --- a/tests/regression/idle/main.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.addr_a); - vx_mem_free(device, kernel_arg.addr_b); - vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - kernel_arg.addr_a = (uint64_t) 0xa0000000ULL; - kernel_arg.addr_b = (uint64_t) 0xa1000000ULL; - kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/io_addr/Makefile b/tests/regression/io_addr/Makefile deleted file mode 100644 index d7ace893..00000000 --- a/tests/regression/io_addr/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = io_addr - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n16 - -include ../common.mk diff --git a/tests/regression/io_addr/common.h b/tests/regression/io_addr/common.h deleted file mode 100644 index 5e5bf23f..00000000 --- a/tests/regression/io_addr/common.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t num_points; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/io_addr/kernel.cpp b/tests/regression/io_addr/kernel.cpp deleted file mode 100644 index 5328e2b9..00000000 --- a/tests/regression/io_addr/kernel.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - uint64_t* src_ptr = (uint64_t*)arg->src_addr; - uint32_t* dst_ptr = (uint32_t*)arg->dst_addr; - - int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]); - - dst_ptr[task_id] = *addr_ptr; -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/io_addr/main.cpp b/tests/regression/io_addr/main.cpp deleted file mode 100644 index 0272bfbc..00000000 --- a/tests/regression/io_addr/main.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define NUM_ADDRS 16 - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -static uint64_t io_base_addr = IO_CSR_ADDR + IO_CSR_SIZE; - -uint64_t usr_test_mem; - -std::vector src_addrs; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_mem_free(device, usr_test_mem); - vx_dev_close(device); - } -} - -void gen_src_addrs(uint32_t num_points) { - src_addrs.resize(num_points); - - uint32_t u = 0, k = 0; - for (uint32_t i = 0; i < num_points; ++i) { - if (0 ==(i % 4)) { - k = (i + u) % NUM_ADDRS; - ++u; - } - uint32_t j = i % NUM_ADDRS; - uint64_t a = ((j == k) ? usr_test_mem : io_base_addr) + j * sizeof(uint32_t); - std::cout << std::dec << i << "," << k << ": value=0x" << std::hex << a << std::endl; - src_addrs[i] = a; - } -} - -void gen_ref_data(uint32_t num_points) { - ref_data.resize(num_points); - - for (uint32_t i = 0; i < num_points; ++i) { - int32_t j = i % NUM_ADDRS; - ref_data[i] = j * j; - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = ref_data.at(i); - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - uint64_t value; - - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = count; - - RT_CHECK(vx_mem_alloc(device, NUM_ADDRS * sizeof(int32_t), VX_MEM_TYPE_GLOBAL, &usr_test_mem)); - - // generate input data - gen_src_addrs(num_points); - - // generate reference data - gen_ref_data(num_points); - - uint32_t src_buf_size = num_points * sizeof(uint64_t); - uint32_t dst_buf_size = num_points * sizeof(int32_t); - - std::cout << "number of points: " << std::dec << num_points << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - - RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &value)); - kernel_arg.src_addr = value; - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &value)); - kernel_arg.dst_addr = value; - kernel_arg.num_points = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max(NUM_ADDRS * sizeof(uint64_t), - std::max(src_buf_size, - std::max(dst_buf_size, - sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload test address data - { - std::cout << "upload test address data" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < NUM_ADDRS; ++i) { - buf_ptr[i] = i * i; - } - RT_CHECK(vx_copy_to_dev(device, io_base_addr, staging_buf.data(), NUM_ADDRS * sizeof(int32_t))); - RT_CHECK(vx_copy_to_dev(device, usr_test_mem, staging_buf.data(), NUM_ADDRS * sizeof(int32_t))); - } - - // upload source buffer - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = (uint64_t*)staging_buf.data(); - memcpy(buf_ptr, src_addrs.data(), src_buf_size); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), src_buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/mstress/Makefile b/tests/regression/mstress/Makefile deleted file mode 100644 index c87839a0..00000000 --- a/tests/regression/mstress/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = mstress - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n64 - -include ../common.mk diff --git a/tests/regression/mstress/common.h b/tests/regression/mstress/common.h deleted file mode 100644 index 3fb4169d..00000000 --- a/tests/regression/mstress/common.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -#define NUM_LOADS 8 - -typedef struct { - uint32_t num_tasks; - uint32_t size; - uint32_t stride; - uint64_t src0_addr; - uint64_t src1_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/mstress/kernel.cpp b/tests/regression/mstress/kernel.cpp deleted file mode 100644 index 535dfd10..00000000 --- a/tests/regression/mstress/kernel.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - uint32_t stride = arg->stride; - uint32_t* addr_ptr = (uint32_t*)arg->src0_addr; - float* src_ptr = (float*)arg->src1_addr; - float* dst_ptr = (float*)arg->dst_addr; - - uint32_t offset = task_id * stride; - - for (uint32_t i = 0; i < stride; ++i) { - float value = 0.0f; - for (uint32_t j = 0; j < NUM_LOADS; ++j) { - uint32_t addr = offset + i + j; - uint32_t index = addr_ptr[addr]; - value *= src_ptr[index]; - } - dst_ptr[offset+i] = value; - } -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/mstress/main.cpp b/tests/regression/mstress/main.cpp deleted file mode 100644 index 9b527126..00000000 --- a/tests/regression/mstress/main.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include -#include -#include -#include -#include "common.h" -#include -#include -#include -#include - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -union Float_t { - float f; - int i; - struct { - uint32_t man : 23; - uint32_t exp : 8; - uint32_t sign : 1; - } parts; -}; - -inline float fround(float x, int32_t precision = 8) { - auto power_of_10 = std::pow(10, precision); - return std::round(x * power_of_10) / power_of_10; -} - -inline bool almost_equal_eps(float a, float b, int ulp = 128) { - auto eps = std::numeric_limits::epsilon() * (std::max(fabs(a), fabs(b)) * ulp); - auto d = fabs(a - b); - if (d > eps) { - std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl; - return false; - } - return true; -} - -inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) { - Float_t fa{a}, fb{b}; - auto d = std::abs(fa.i - fb.i); - if (d > ulp) { - std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl; - return false; - } - return true; -} - -inline bool almost_equal(float a, float b) { - if (a == b) - return true; - /*if (almost_equal_eps(a, b)) - return true;*/ - return almost_equal_ulp(a, b); -} - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector test_data; -std::vector addr_table; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src0_addr); - vx_mem_free(device, kernel_arg.src1_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -void gen_input_data(uint32_t num_points) { - test_data.resize(num_points); - addr_table.resize(num_points + NUM_LOADS - 1); - - for (uint32_t i = 0; i < num_points; ++i) { - float r = static_cast(std::rand()) / RAND_MAX; - test_data[i] = r; - } - - for (uint32_t i = 0; i < addr_table.size(); ++i) { - float r = static_cast(std::rand()) / RAND_MAX; - uint32_t index = static_cast(r * num_points); - assert(index < num_points); - addr_table[i] = index; - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t dst_buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, dst_buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - - for (uint32_t i = 0; i < num_points; ++i) { - - float ref = 0.0f; - for (uint32_t j = 0; j < NUM_LOADS; ++j) { - uint32_t addr = i + j; - uint32_t index = addr_table.at(addr); - float value = test_data.at(index); - //printf("*** [%d] addr=%d, index=%d, value=%f\n", i, addr, index, value); - ref *= value; - } - - float cur = buf_ptr[i]; - if (!almost_equal(cur, ref)) { - std::cout << "error at result #" << std::dec << i - << ": actual " << cur << ", expected " << ref << std::endl; - ++errors; - } - } - - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores, num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - - uint32_t num_tasks = num_cores * num_warps * num_threads; - uint32_t num_points = count * num_tasks; - - // generate input data - gen_input_data(num_points); - - uint32_t addr_buf_size = addr_table.size() * sizeof(int32_t); - uint32_t src_buf_size = test_data.size() * sizeof(int32_t); - uint32_t dst_buf_size = test_data.size() * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, addr_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); - RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_tasks = num_tasks; - kernel_arg.stride = count; - - std::cout << "dev_addr=0x" << std::hex << kernel_arg.src0_addr << std::endl; - std::cout << "dev_src=0x" << std::hex << kernel_arg.src1_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max(src_buf_size, - std::max(addr_buf_size, - std::max(dst_buf_size, - sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer0 - { - std::cout << "upload address buffer" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, addr_table.data(), addr_table.size() * sizeof(int32_t)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, staging_buf.data(), addr_buf_size)); - } - - // upload source buffer1 - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, test_data.data(), test_data.size() * sizeof(int32_t)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, staging_buf.data(), src_buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < test_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/no_mf_ext/Makefile b/tests/regression/no_mf_ext/Makefile deleted file mode 100644 index 58fcfab7..00000000 --- a/tests/regression/no_mf_ext/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = no_mf_ext - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n8 - -include ../common.mk diff --git a/tests/regression/no_mf_ext/common.h b/tests/regression/no_mf_ext/common.h deleted file mode 100644 index 05a76155..00000000 --- a/tests/regression/no_mf_ext/common.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t size; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/no_mf_ext/kernel.cpp b/tests/regression/no_mf_ext/kernel.cpp deleted file mode 100644 index b378f953..00000000 --- a/tests/regression/no_mf_ext/kernel.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include "common.h" - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - - uint32_t size = arg->size; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - for (uint32_t i = 0; i < size; ++i) { - dst_ptr[i] = src_ptr[i]; - } - - return 0; -} diff --git a/tests/regression/no_mf_ext/main.cpp b/tests/regression/no_mf_ext/main.cpp deleted file mode 100644 index e711b99a..00000000 --- a/tests/regression/no_mf_ext/main.cpp +++ /dev/null @@ -1,174 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = i-1; - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = count; - uint32_t buf_size = num_points * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.size = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer0 - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i-1; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/no_smem/Makefile b/tests/regression/no_smem/Makefile deleted file mode 100644 index 8c8e619c..00000000 --- a/tests/regression/no_smem/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT = no_smem - -OPTS ?= -n8 - -VX_SRCS = kernel.cpp $(VORTEX_KN_PATH)/src/vx_perf.c $(VORTEX_KN_PATH)/src/vx_syscalls.c $(VORTEX_KN_PATH)/src/vx_print.S $(VORTEX_KN_PATH)/src/vx_start.S - -SRCS = main.cpp - -include ../common.mk - -VX_CFLAGS += -DSM_DISABLE -VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) - -VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc -VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++ -VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump -VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy \ No newline at end of file diff --git a/tests/regression/no_smem/common.h b/tests/regression/no_smem/common.h deleted file mode 100644 index 05a76155..00000000 --- a/tests/regression/no_smem/common.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t size; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/no_smem/kernel.cpp b/tests/regression/no_smem/kernel.cpp deleted file mode 100644 index b378f953..00000000 --- a/tests/regression/no_smem/kernel.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include "common.h" - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - - uint32_t size = arg->size; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - for (uint32_t i = 0; i < size; ++i) { - dst_ptr[i] = src_ptr[i]; - } - - return 0; -} diff --git a/tests/regression/no_smem/main.cpp b/tests/regression/no_smem/main.cpp deleted file mode 100644 index 53db0465..00000000 --- a/tests/regression/no_smem/main.cpp +++ /dev/null @@ -1,174 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = i-1; - int cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = count; - uint32_t buf_size = num_points * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.size = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer0 - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i-1; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/printf/Makefile b/tests/regression/printf/Makefile deleted file mode 100644 index 09793ab4..00000000 --- a/tests/regression/printf/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = printf - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n4 - -include ../common.mk diff --git a/tests/regression/printf/common.h b/tests/regression/printf/common.h deleted file mode 100644 index ac100949..00000000 --- a/tests/regression/printf/common.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -typedef struct { - uint32_t num_points; - uint64_t src_addr; -} kernel_arg_t; - -#endif \ No newline at end of file diff --git a/tests/regression/printf/kernel.cpp b/tests/regression/printf/kernel.cpp deleted file mode 100644 index 8f35de7e..00000000 --- a/tests/regression/printf/kernel.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - int cid = vx_core_id(); - int* src_ptr = (int*)arg->src_addr; - char value = 'A' + src_ptr[task_id]; - vx_printf("cid=%d: task=%d, value=%c\n", cid, task_id, value); -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/printf/main.cpp b/tests/regression/printf/main.cpp deleted file mode 100644 index 3a920294..00000000 --- a/tests/regression/printf/main.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 4; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_dev_close(device); - } -} - -int run_test() { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - - uint32_t num_points = count; - uint32_t buf_size = count * sizeof(int32_t); - - std::cout << "number of points: " << count << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - - kernel_arg.num_points = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer0 - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = i; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test()); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/rickroll/Makefile b/tests/regression/rickroll/Makefile index 7717878d..45489f69 100644 --- a/tests/regression/rickroll/Makefile +++ b/tests/regression/rickroll/Makefile @@ -1,7 +1,5 @@ PROJECT = rickroll -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/rickroll/main.cpp b/tests/regression/rickroll/main.cpp deleted file mode 100644 index 54531062..00000000 --- a/tests/regression/rickroll/main.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.addr_a); - vx_mem_free(device, kernel_arg.addr_b); - vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - kernel_arg.addr_a = (uint64_t) 0x20000; - kernel_arg.addr_b = (uint64_t) 0x28000; - kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/rickroll/rickroll b/tests/regression/rickroll/rickroll deleted file mode 100755 index 67ade61b..00000000 Binary files a/tests/regression/rickroll/rickroll and /dev/null differ diff --git a/tests/regression/sgemm_gemmini/Makefile b/tests/regression/sgemm_gemmini/Makefile index a36f6d21..d501f0df 100644 --- a/tests/regression/sgemm_gemmini/Makefile +++ b/tests/regression/sgemm_gemmini/Makefile @@ -1,7 +1,5 @@ PROJECT = sgemm_gemmini -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/sgemm_gemmini/main.cpp b/tests/regression/sgemm_gemmini/main.cpp deleted file mode 100644 index 54531062..00000000 --- a/tests/regression/sgemm_gemmini/main.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.addr_a); - vx_mem_free(device, kernel_arg.addr_b); - vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - kernel_arg.addr_a = (uint64_t) 0x20000; - kernel_arg.addr_b = (uint64_t) 0x28000; - kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/sgemm_gemmini/sgemm_gemmini b/tests/regression/sgemm_gemmini/sgemm_gemmini deleted file mode 100755 index 2204a038..00000000 Binary files a/tests/regression/sgemm_gemmini/sgemm_gemmini and /dev/null differ diff --git a/tests/regression/sgemm_gemmini_dma/Makefile b/tests/regression/sgemm_gemmini_dma/Makefile index 3a8ffb18..da758845 100644 --- a/tests/regression/sgemm_gemmini_dma/Makefile +++ b/tests/regression/sgemm_gemmini_dma/Makefile @@ -1,7 +1,5 @@ PROJECT = sgemm_gemmini_dma -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/sgemm_gemmini_dma/compile_debug.sh b/tests/regression/sgemm_gemmini_dma/compile_debug.sh new file mode 100755 index 00000000..13451c0d --- /dev/null +++ b/tests/regression/sgemm_gemmini_dma/compile_debug.sh @@ -0,0 +1,11 @@ +rm kernel.radiance.elf +rm -rf binaries +mkdir binaries +for a in args/*; do + cp -f $a args.bin + aa=$(basename "$a") + cp -f input.a/"$aa" input.a.bin + cp -f input.b/"$aa" input.b.bin + make > /dev/null + mv kernel.radiance.elf binaries/gemmini_debug_dma"$aa".elf +done diff --git a/tests/regression/sgemm_gemmini_dma/input.a/1024 b/tests/regression/sgemm_gemmini_dma/input.a/1024 new file mode 100644 index 00000000..ed28b455 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.a/1024 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.a/128 b/tests/regression/sgemm_gemmini_dma/input.a/128 new file mode 100644 index 00000000..66035f64 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.a/128 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.a/256 b/tests/regression/sgemm_gemmini_dma/input.a/256 new file mode 100644 index 00000000..683fe95c Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.a/256 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.a/512 b/tests/regression/sgemm_gemmini_dma/input.a/512 new file mode 100644 index 00000000..91dc2070 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.a/512 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.b/1024 b/tests/regression/sgemm_gemmini_dma/input.b/1024 new file mode 100644 index 00000000..86ce9db2 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.b/1024 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.b/128 b/tests/regression/sgemm_gemmini_dma/input.b/128 new file mode 100644 index 00000000..5b6ff306 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.b/128 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.b/256 b/tests/regression/sgemm_gemmini_dma/input.b/256 new file mode 100644 index 00000000..7b5a0e5b Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.b/256 differ diff --git a/tests/regression/sgemm_gemmini_dma/input.b/512 b/tests/regression/sgemm_gemmini_dma/input.b/512 new file mode 100644 index 00000000..78242202 Binary files /dev/null and b/tests/regression/sgemm_gemmini_dma/input.b/512 differ diff --git a/tests/regression/sgemm_gemmini_dma/main.cpp b/tests/regression/sgemm_gemmini_dma/main.cpp deleted file mode 100644 index 45548d91..00000000 --- a/tests/regression/sgemm_gemmini_dma/main.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.addr_a); - vx_mem_free(device, kernel_arg.addr_b); - vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - kernel_arg.addr_a = (uint64_t) 0xa0000000ULL; - kernel_arg.addr_b = (uint64_t) 0xa1000000ULL; - kernel_arg.addr_c = (uint64_t) 0xc0000000ULL; - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/sgemm_gemmini_duo/Makefile b/tests/regression/sgemm_gemmini_duo/Makefile index 05737084..ab1f1b9d 100644 --- a/tests/regression/sgemm_gemmini_duo/Makefile +++ b/tests/regression/sgemm_gemmini_duo/Makefile @@ -1,7 +1,5 @@ PROJECT = sgemm_gemmini_duo -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/sgemm_gemmini_duo/main.cpp b/tests/regression/sgemm_gemmini_duo/main.cpp deleted file mode 100644 index 84283992..00000000 --- a/tests/regression/sgemm_gemmini_duo/main.cpp +++ /dev/null @@ -1,282 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.addr_a); - // vx_mem_free(device, kernel_arg.addr_b); - // vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 128; - uint32_t dim_n = 128; - uint32_t dim_k = 128; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - std::cout << "write reference output" << std::endl; - std::ofstream ref_file("reference.c.bin", std::ios::binary | std::ios::out); - if (!ref_file) { - std::cerr << "error: failed to open reference.c.bin for writing\n"; - exit(EXIT_FAILURE); - } - ref_file.write(reinterpret_cast(ref_data.data()), ref_data.size() * sizeof(ref_data[0])); - ref_file.close(); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - // RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - // RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - // RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - kernel_arg.addr_a = 0xa0000000; - kernel_arg.addr_b = 0xa1000000; - kernel_arg.addr_c = 0xc0000000; - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/sgemm_tcore/Makefile b/tests/regression/sgemm_tcore/Makefile index 30655c95..f57ffbf6 100644 --- a/tests/regression/sgemm_tcore/Makefile +++ b/tests/regression/sgemm_tcore/Makefile @@ -1,7 +1,5 @@ PROJECT = sgemm_tcore -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp VX_INCLUDES = sgemm_impl.hpp diff --git a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.dma.elf b/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.dma.elf deleted file mode 100755 index b908b3d2..00000000 Binary files a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.dma.elf and /dev/null differ diff --git a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.nodma.elf b/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.nodma.elf deleted file mode 100755 index cd99eb13..00000000 Binary files a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m256n256k256.bm128bn64bk128.nodma.elf and /dev/null differ diff --git a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.dma.elf b/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.dma.elf deleted file mode 100755 index aa2035bd..00000000 Binary files a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.dma.elf and /dev/null differ diff --git a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.nodma.elf b/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.nodma.elf deleted file mode 100755 index d361cc03..00000000 Binary files a/tests/regression/sgemm_tcore/kernel.radiance.tcore.fp16.m512n512k512.bm128bn64bk128.nodma.elf and /dev/null differ diff --git a/tests/regression/sgemm_tcore/main.cpp b/tests/regression/sgemm_tcore/main.cpp deleted file mode 100644 index 8e4e6061..00000000 --- a/tests/regression/sgemm_tcore/main.cpp +++ /dev/null @@ -1,308 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "common.h" -#include "half.hpp" - -using half_float::half; -using half_float::half_cast; - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -template std::vector src_a_data; -template std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.addr_a); - // vx_mem_free(device, kernel_arg.addr_b); - // vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -template -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - static_assert(std::is_same_v || std::is_same_v, - "unsupported floating point datatype"); - - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - if constexpr (std::is_same_v) { - src_a_data[i] = half_cast(static_cast(i)); - } else if (std::is_same_v) { - src_a_data[i] = static_cast(i); - } - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - if constexpr (std::is_same_v) { - src_b_data[i] = half_cast(static_cast(i)); - } else if (std::is_same_v) { - src_b_data[i] = static_cast(i); - } - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -template -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - static_assert(std::is_same_v || std::is_same_v, - "unsupported floating point datatype"); - - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += static_cast(src_a_data[dim_k * i + k]) * - static_cast(src_b_data[dim_n * k + j]); - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 64; - uint32_t dim_n = 64; - uint32_t dim_k = 64; - - using float_type = half; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - std::cout << "write reference output" << std::endl; - std::ofstream ref_file("reference.c.bin", std::ios::binary | std::ios::out); - if (!ref_file) { - std::cerr << "error: failed to open reference.c.bin for writing\n"; - exit(EXIT_FAILURE); - } - ref_file.write(reinterpret_cast(ref_data.data()), ref_data.size() * sizeof(ref_data[0])); - ref_file.close(); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - // RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - // RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - // RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - kernel_arg.addr_a = 0xa0000000; - kernel_arg.addr_b = 0xa1000000; - kernel_arg.addr_c = 0xc0000000; - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), - src_a_data.size() * sizeof(float_type)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), - src_b_data.size() * sizeof(float_type)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/sgemm_wg/Makefile b/tests/regression/sgemm_wg/Makefile index 289369d2..ccd249aa 100644 --- a/tests/regression/sgemm_wg/Makefile +++ b/tests/regression/sgemm_wg/Makefile @@ -1,7 +1,5 @@ PROJECT = sgemm_wg -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/sgemm_wg/main.cpp b/tests/regression/sgemm_wg/main.cpp deleted file mode 100644 index 62625c44..00000000 --- a/tests/regression/sgemm_wg/main.cpp +++ /dev/null @@ -1,292 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_a_data; -std::vector src_b_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.addr_a); - // vx_mem_free(device, kernel_arg.addr_b); - // vx_mem_free(device, kernel_arg.addr_c); - vx_dev_close(device); - } -} - -void generate_source_matrix(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - src_a_data.resize(dim_m * dim_k); - src_b_data.resize(dim_k * dim_n); - - for (uint32_t i = 0; i < src_a_data.size(); ++i) { - src_a_data[i] = static_cast(i); - std::cout << "A: " << i << ": value=" << src_a_data[i] << std::endl; - } - for (uint32_t i = 0; i < src_b_data.size(); ++i) { - src_b_data[i] = static_cast(i); - std::cout << "B: " << i << ": value=" << src_b_data[i] << std::endl; - } -} - -void generate_reference_matmul(uint32_t dim_m, uint32_t dim_n, uint32_t dim_k) { - ref_data.resize(dim_m * dim_n); - - for (uint32_t i = 0; i < dim_m; ++i) { - for (uint32_t j = 0; j < dim_n; ++j) { - float ref = 0.0f; - for (uint32_t k = 0; k < dim_k; ++k) { - ref += src_a_data[dim_k * i + k] * src_b_data[dim_n * k + j]; - } - ref_data.at(dim_n * i + j) = ref; - } - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t dim_m, uint32_t dim_n) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.addr_c, buf_size)); - - std::cout << "downloading result C matrix from device, device mem address=" - << std::hex << kernel_arg.addr_c << ", size=" << std::dec - << buf_size << " bytes\n"; - std::ofstream file("output.c.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open output.c.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), buf_size); - file.close(); - - std::ofstream ref_file("reference.c.bin", std::ios::binary | std::ios::out); - if (!ref_file) { - std::cerr << "error: failed to open reference.c.bin for writing\n"; - exit(EXIT_FAILURE); - } - ref_file.write(reinterpret_cast(ref_data.data()), buf_size); - ref_file.close(); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (float*)staging_buf.data(); - for (uint32_t i = 0; i < dim_m * dim_n; ++i) { - float ref = ref_data.at(i); - float cur = buf_ptr[i]; - if (std::abs((cur - ref) / ref) > 1e-6) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // FIXME: hardcoded - uint32_t dim_m = 128; - uint32_t dim_n = 128; - uint32_t dim_k = 128; - - generate_source_matrix(dim_m, dim_n, dim_k); - generate_reference_matmul(dim_m, dim_n, dim_k); - - uint32_t src_a_buf_size = src_a_data.size() * sizeof(src_a_data[0]); - uint32_t src_b_buf_size = src_b_data.size() * sizeof(src_b_data[0]); - uint32_t dst_buf_size = ref_data.size() * sizeof(src_a_data[0]); - - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - // RT_CHECK(vx_mem_alloc(device, src_a_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_a)); - // RT_CHECK(vx_mem_alloc(device, src_b_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_b)); - // RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.addr_c)); - kernel_arg.addr_a = 0x20000UL; - kernel_arg.addr_b = 0x28000UL; - kernel_arg.addr_c = 0xc0000000UL; - - kernel_arg.dim_m = dim_m; - kernel_arg.dim_n = dim_n; - kernel_arg.dim_k = dim_k; - - std::cout << "dev_addr_a=0x" << std::hex << kernel_arg.addr_a << std::endl; - std::cout << "dev_addr_b=0x" << std::hex << kernel_arg.addr_b << std::endl; - std::cout << "dev_addr_c=0x" << std::hex << kernel_arg.addr_c << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max( - src_a_buf_size, - std::max( - src_b_buf_size, - std::max(dst_buf_size, sizeof(kernel_arg_t)))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - { - std::cout << "upload kernel argument" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::cout << "uploading argument buffer to device, device mem address=" - << std::hex << KERNEL_ARG_DEV_MEM_ADDR << ", size=" << std::dec - << sizeof(kernel_arg_t) << " bytes\n"; - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), - sizeof(kernel_arg_t)); - file.close(); - } - - // upload source buffer - { - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_a_data.data(), src_a_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_a, staging_buf.data(), - src_a_buf_size)); - - std::cout << "uploading source A matrix to device, device mem address=" - << std::hex << kernel_arg.addr_a << ", size=" << std::dec - << src_a_buf_size << " bytes\n"; - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open input.a.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_a_buf_size); - file.close(); - } - { - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_b_data.data(), src_b_data.size() * sizeof(float)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_b, staging_buf.data(), - src_b_buf_size)); - - std::cout << "uploading source B matrix to device, device mem address=" - << std::hex << kernel_arg.addr_b << ", size=" << std::dec - << src_b_buf_size << " bytes\n"; - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open input.b.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), src_b_buf_size); - file.close(); - } - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < ref_data.size(); ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.addr_c, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, kernel_arg.dim_m, kernel_arg.dim_n)); - std::cout << "PASSED!" << std::endl; - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/sgemmx/Makefile b/tests/regression/sgemmx/Makefile deleted file mode 100644 index 2e72b32e..00000000 --- a/tests/regression/sgemmx/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = sgemmx - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n32 - -include ../common.mk \ No newline at end of file diff --git a/tests/regression/sgemmx/common.h b/tests/regression/sgemmx/common.h deleted file mode 100644 index 75cfc340..00000000 --- a/tests/regression/sgemmx/common.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -#ifndef TYPE -#define TYPE float -#endif - -typedef struct { - uint32_t num_tasks; - uint32_t size; - uint64_t A_addr; - uint64_t B_addr; - uint64_t C_addr; -} kernel_arg_t; - -#endif diff --git a/tests/regression/sgemmx/kernel.cpp b/tests/regression/sgemmx/kernel.cpp deleted file mode 100644 index b0e8f69e..00000000 --- a/tests/regression/sgemmx/kernel.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include -#include -#include "common.h" - -inline char is_log2(uint32_t x) { - return ((x & (x-1)) == 0); -} - -inline uint32_t log2_fast(uint32_t x) { - return 31 - __builtin_clz (x); -} - -void kernel_body(uint32_t task_id, kernel_arg_t* __UNIFORM__ arg) { - auto A = reinterpret_cast(arg->A_addr); - auto B = reinterpret_cast(arg->B_addr); - auto C = reinterpret_cast(arg->C_addr); - auto size = arg->size; - - uint32_t row, col; - if (is_log2(size)) { - uint32_t log_size = log2_fast(size); - row = task_id >> log_size; - col = task_id & (size-1); - } else { - row = task_id / size; - col = task_id % size; - } - - TYPE sum (0); - for (int e = 0; e < size; ++e) { - sum += A[row * size + e] * B[e * size + col]; - } - C[row * size + col] = sum; -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_tasks, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/sgemmx/main.cpp b/tests/regression/sgemmx/main.cpp deleted file mode 100644 index 23008011..00000000 --- a/tests/regression/sgemmx/main.cpp +++ /dev/null @@ -1,251 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define FLOAT_ULP 6 - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -template -class Comparator {}; - -template <> -class Comparator { -public: - static const char* type_str() { - return "integer"; - } - static int generate() { - return rand(); - } - static bool compare(int a, int b, int index, int errors) { - if (a != b) { - if (errors < 100) { - printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b); - } - return false; - } - return true; - } -}; - -template <> -class Comparator { -public: - static const char* type_str() { - return "float"; - } - static int generate() { - return static_cast(rand()) / RAND_MAX; - } - static bool compare(float a, float b, int index, int errors) { - union fi_t { float f; int32_t i; }; - fi_t fa, fb; - fa.f = a; - fb.f = b; - auto d = std::abs(fa.i - fb.i); - if (d > FLOAT_ULP) { - if (errors < 100) { - printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b); - } - return false; - } - return true; - } -}; - -static void matmul_cpu(TYPE* out, const TYPE* A, const TYPE* B, uint32_t width, uint32_t height) { - for (uint32_t row = 0; row < height; ++row) { - for (uint32_t col = 0; col < width; ++col) { - TYPE sum(0); - for (uint32_t e = 0; e < width; ++e) { - sum += A[row * width + e] * B[e * width + col]; - } - out[row * width + col] = sum; - } - } -} - -const char* kernel_file = "kernel.bin"; -uint32_t size = 32; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n size] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - size = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.A_addr); - vx_mem_free(device, kernel_arg.B_addr); - vx_mem_free(device, kernel_arg.C_addr); - vx_dev_close(device); - } -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = size * size; - uint32_t buf_size = num_points * sizeof(TYPE); - - std::cout << "data type: " << Comparator::type_str() << std::endl; - std::cout << "matrix size: " << size << "x" << size << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.A_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.B_addr)); - RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.C_addr)); - - kernel_arg.num_tasks = num_points; - kernel_arg.size = size; - - std::cout << "dev_src0=0x" << std::hex << kernel_arg.A_addr << std::endl; - std::cout << "dev_src1=0x" << std::hex << kernel_arg.B_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.C_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // generate source data - std::vector src_A(num_points); - std::vector src_B(num_points); - std::vector refs(num_points); - for (uint32_t i = 0; i < num_points; ++i) { - auto a = static_cast(std::rand()) / RAND_MAX; - auto b = static_cast(std::rand()) / RAND_MAX; - src_A[i] = static_cast(a * size); - src_B[i] = static_cast(b * size); - } - matmul_cpu(refs.data(), src_A.data(), src_B.data(), size, size); - - // upload source buffer0 - { - std::cout << "upload source buffer0" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = src_A[i]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.A_addr, staging_buf.data(), buf_size)); - } - - // upload source buffer1 - { - std::cout << "upload source buffer1" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = src_B[i]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.B_addr, staging_buf.data(), buf_size)); - } - - // clear destination buffer - std::cout << "clear destination buffer" << std::endl; - memset(staging_buf.data(), 0, num_points * sizeof(TYPE)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.C_addr, staging_buf.data(), buf_size)); - - auto time_start = std::chrono::high_resolution_clock::now(); - - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - auto time_end = std::chrono::high_resolution_clock::now(); - double elapsed = std::chrono::duration_cast(time_end - time_start).count(); - printf("Elapsed time: %lg ms\n", elapsed); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.C_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < refs.size(); ++i) { - auto ref = refs[i]; - auto cur = buf_ptr[i]; - if (!Comparator::compare(cur, ref, i, errors)) { - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/sort/Makefile b/tests/regression/sort/Makefile deleted file mode 100644 index b11df5dd..00000000 --- a/tests/regression/sort/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -PROJECT = sort - -SRCS = main.cpp - -VX_SRCS = kernel.cpp - -OPTS ?= -n16 - -include ../common.mk \ No newline at end of file diff --git a/tests/regression/sort/common.h b/tests/regression/sort/common.h deleted file mode 100644 index 92ceeb91..00000000 --- a/tests/regression/sort/common.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _COMMON_H_ -#define _COMMON_H_ - -#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 - -#ifndef TYPE -#define TYPE int -#endif - -typedef struct { - uint32_t num_points; - uint64_t src_addr; - uint64_t dst_addr; -} kernel_arg_t; - -#endif diff --git a/tests/regression/sort/kernel.cpp b/tests/regression/sort/kernel.cpp deleted file mode 100644 index 2e9d3453..00000000 --- a/tests/regression/sort/kernel.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include -#include "common.h" - -void kernel_body(int task_id, kernel_arg_t* __UNIFORM__ arg) { - uint32_t num_points = arg->num_points; - auto src_ptr = (TYPE*)arg->src_addr; - auto dst_ptr = (TYPE*)arg->dst_addr; - - auto ref_value = src_ptr[task_id]; - - uint32_t pos = 0; - for (uint32_t i = 0; i < num_points; ++i) { - auto cur_value = src_ptr[i]; - pos += (cur_value < ref_value) || ((cur_value == ref_value) && (i < task_id)); - } - dst_ptr[pos] = ref_value; -} - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - vx_spawn_tasks(arg->num_points, (vx_spawn_tasks_cb)kernel_body, arg); - return 0; -} diff --git a/tests/regression/sort/main.cpp b/tests/regression/sort/main.cpp deleted file mode 100644 index 38d5d4d4..00000000 --- a/tests/regression/sort/main.cpp +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -std::vector src_data; -std::vector ref_data; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_mem_free(device, kernel_arg.src_addr); - vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -void gen_input_data(uint32_t num_points) { - src_data.resize(num_points); - - for (uint32_t i = 0; i < num_points; ++i) { - auto r = static_cast(std::rand()) / RAND_MAX; - auto value = static_cast(r * num_points); - src_data[i] = value; - std::cout << std::dec << i << ": value=" << value << std::endl; - } -} - -void gen_ref_data(uint32_t num_points) { - ref_data.resize(num_points); - - for (uint32_t i = 0; i < num_points; ++i) { - TYPE ref_value = src_data.at(i); - uint32_t pos = 0; - for (uint32_t j = 0; j < num_points; ++j) { - TYPE cur_value = src_data.at(j); - pos += (cur_value < ref_value) || (cur_value == ref_value && j < i); - } - ref_data.at(pos) = ref_value; - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - TYPE ref = ref_data.at(i); - TYPE cur = buf_ptr[i]; - if (cur != ref) { - std::cout << "error at result #" << std::dec << i - << std::hex << ": actual=" << cur << ", expected=" << ref << std::endl; - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint32_t num_points = count; - - // generate input data - gen_input_data(num_points); - - // generate reference data - gen_ref_data(num_points); - - uint32_t src_buf_size = src_data.size() * sizeof(int32_t); - uint32_t dst_buf_size = ref_data.size() * sizeof(int32_t); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - RT_CHECK(vx_mem_alloc(device, src_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src_addr)); - RT_CHECK(vx_mem_alloc(device, dst_buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - - kernel_arg.num_points = num_points; - - std::cout << "dev_src=0x" << std::hex << kernel_arg.src_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - { - std::cout << "allocate staging buffer" << std::endl; - uint32_t staging_buf_size = std::max(src_buf_size, - std::max(dst_buf_size, - sizeof(kernel_arg_t))); - staging_buf.resize(staging_buf_size); - } - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - // upload source buffer - { - std::cout << "upload source buffer" << std::endl; - auto buf_ptr = staging_buf.data(); - memcpy(buf_ptr, src_data.data(), num_points * sizeof(TYPE)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src_addr, staging_buf.data(), src_buf_size)); - } - - // clear destination buffer - { - std::cout << "clear destination buffer" << std::endl; - auto buf_ptr = (int32_t*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = 0xdeadbeef; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), dst_buf_size)); - } - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -} \ No newline at end of file diff --git a/tests/regression/unaligned/Makefile b/tests/regression/unaligned/Makefile index a314fc4b..b38078d2 100644 --- a/tests/regression/unaligned/Makefile +++ b/tests/regression/unaligned/Makefile @@ -1,7 +1,5 @@ PROJECT = unaligned -SRCS = main.cpp common.h - VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/unaligned/main.cpp b/tests/regression/unaligned/main.cpp deleted file mode 100644 index a1b1d384..00000000 --- a/tests/regression/unaligned/main.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -const char* kernel_file = "kernel.bin"; -uint32_t count = 0; - -vx_device_h device = nullptr; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - count = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - vx_dev_close(device); - } -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - if (count == 0) { - count = 1; - } - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - return 0; -} diff --git a/tests/regression/unaligned/unaligned b/tests/regression/unaligned/unaligned deleted file mode 100755 index 7fa0b6c0..00000000 Binary files a/tests/regression/unaligned/unaligned and /dev/null differ diff --git a/tests/regression/vecaddx/Makefile b/tests/regression/vecaddx/Makefile index af43d3c7..2b8072e1 100644 --- a/tests/regression/vecaddx/Makefile +++ b/tests/regression/vecaddx/Makefile @@ -1,9 +1,7 @@ PROJECT = vecaddx -SRCS = main.cpp - VX_SRCS = kernel.cpp OPTS ?= -n64 -include ../common.mk \ No newline at end of file +include ../common.mk diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp deleted file mode 100644 index e25ad5b4..00000000 --- a/tests/regression/vecaddx/main.cpp +++ /dev/null @@ -1,275 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define FLOAT_ULP 6 - -#define RT_CHECK(_expr) \ - do { \ - int _ret = _expr; \ - if (0 == _ret) \ - break; \ - printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ - cleanup(); \ - exit(-1); \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// - -template -class Comparator {}; - -template <> -class Comparator { -public: - static const char* type_str() { - return "integer"; - } - static int generate() { - return rand(); - } - static bool compare(int a, int b, int index, int errors) { - if (a != b) { - if (errors < 100) { - printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b); - } - return false; - } - return true; - } -}; - -template <> -class Comparator { -private: - union Float_t { float f; int i; }; -public: - static const char* type_str() { - return "float"; - } - static int generate() { - return static_cast(rand()) / RAND_MAX; - } - static bool compare(float a, float b, int index, int errors) { - union fi_t { float f; int32_t i; }; - fi_t fa, fb; - fa.f = a; - fb.f = b; - auto d = std::abs(fa.i - fb.i); - if (d > FLOAT_ULP) { - if (errors < 100) { - printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b); - } - return false; - } - return true; - } -}; - -const char* kernel_file = "kernel.bin"; -uint32_t size = 16; - -vx_device_h device = nullptr; -std::vector source_data; -std::vector staging_buf; -kernel_arg_t kernel_arg = {}; - -static void show_usage() { - std::cout << "Vortex Test." << std::endl; - std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { - switch (c) { - case 'n': - size = atoi(optarg); - break; - case 'k': - kernel_file = optarg; - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } -} - -void cleanup() { - if (device) { - // vx_mem_free(device, kernel_arg.src0_addr); - // vx_mem_free(device, kernel_arg.src1_addr); - // vx_mem_free(device, kernel_arg.dst_addr); - vx_dev_close(device); - } -} - -int run_test(const kernel_arg_t& kernel_arg, - uint32_t buf_size, - uint32_t num_points) { - // start device - std::cout << "start device" << std::endl; - RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; - RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - RT_CHECK(vx_copy_from_dev(device, staging_buf.data(), kernel_arg.dst_addr, buf_size)); - - // verify result - std::cout << "verify result" << std::endl; - { - int errors = 0; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - auto ref = source_data[2 * i + 0] + source_data[2 * i + 1]; - auto cur = buf_ptr[i]; - if (!Comparator::compare(cur, ref, i, errors)) { - ++errors; - } - } - if (errors != 0) { - std::cout << "Found " << std::dec << errors << " errors!" << std::endl; - std::cout << "FAILED!" << std::endl; - return 1; - } - } - - return 0; -} - -int main(int argc, char *argv[]) { - // parse command arguments - parse_args(argc, argv); - - std::srand(50); - - // open device connection - std::cout << "open device connection" << std::endl; - RT_CHECK(vx_dev_open(&device)); - - uint64_t num_cores, num_warps, num_threads; - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - std::cout << "number of cores: " << num_cores << std::endl; - std::cout << "number of warps: " << num_warps << std::endl; - std::cout << "number of threads: " << num_threads << std::endl; - - uint32_t num_points = size; - uint32_t buf_size = num_points * sizeof(TYPE); - - std::cout << "number of points: " << num_points << std::endl; - std::cout << "data type: " << Comparator::type_str() << std::endl; - std::cout << "buffer size: " << buf_size << " bytes" << std::endl; - - // upload program - std::cout << "upload program" << std::endl; - RT_CHECK(vx_upload_kernel_file(device, kernel_file)); - - // allocate device memory - std::cout << "allocate device memory" << std::endl; - // RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src0_addr)); - // RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.src1_addr)); - // RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_TYPE_GLOBAL, &kernel_arg.dst_addr)); - kernel_arg.src0_addr = 0x20000UL; - kernel_arg.src1_addr = 0x28000UL; - kernel_arg.dst_addr = 0xc0000000UL; - - kernel_arg.num_points = num_points; - - std::cout << "dev_src0=0x" << std::hex << kernel_arg.src0_addr << std::endl; - std::cout << "dev_src1=0x" << std::hex << kernel_arg.src1_addr << std::endl; - std::cout << "dev_dst=0x" << std::hex << kernel_arg.dst_addr << std::endl; - - // allocate staging buffer - std::cout << "allocate staging buffer" << std::endl; - uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); - staging_buf.resize(alloc_size); - - // upload kernel argument - std::cout << "upload kernel argument" << std::endl; - memcpy(staging_buf.data(), &kernel_arg, sizeof(kernel_arg_t)); - RT_CHECK(vx_copy_to_dev(device, KERNEL_ARG_DEV_MEM_ADDR, staging_buf.data(), sizeof(kernel_arg_t))); - - std::ofstream file("args.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open args.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(staging_buf.data()), sizeof(kernel_arg_t)); - file.close(); - - // generate source data - source_data.resize(2 * num_points); - for (uint32_t i = 0; i < source_data.size(); ++i) { - // source_data[i] = Comparator::generate(); - source_data[i] = static_cast(i); - } - - // upload source buffer0 - { - std::cout << "upload source buffer0" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = source_data[2 * i + 0]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src0_addr, staging_buf.data(), buf_size)); - - std::ofstream file("input.a.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open input.a.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), buf_size); - file.close(); - } - - // upload source buffer1 - { - std::cout << "upload source buffer1" << std::endl; - auto buf_ptr = (TYPE*)staging_buf.data(); - for (uint32_t i = 0; i < num_points; ++i) { - buf_ptr[i] = source_data[2 * i + 1]; - } - RT_CHECK(vx_copy_to_dev(device, kernel_arg.src1_addr, staging_buf.data(), buf_size)); - - std::ofstream file("input.b.bin", std::ios::binary | std::ios::out); - if (!file) { - std::cerr << "error: failed to open input.b.bin for writing\n"; - exit(EXIT_FAILURE); - } - file.write(reinterpret_cast(buf_ptr), buf_size); - file.close(); - } - - // clear destination buffer - std::cout << "clear destination buffer" << std::endl; - memset(staging_buf.data(), 0, num_points * sizeof(TYPE)); - RT_CHECK(vx_copy_to_dev(device, kernel_arg.dst_addr, staging_buf.data(), buf_size)); - - // run tests - std::cout << "run tests" << std::endl; - RT_CHECK(run_test(kernel_arg, buf_size, num_points)); - - // cleanup - std::cout << "cleanup" << std::endl; - cleanup(); - - std::cout << "PASSED!" << std::endl; - - return 0; -}