From 3cc1190cd746518faa0cfb4ee9178bf60d712d41 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 11 Jun 2021 03:08:07 -0700 Subject: [PATCH] CSRs I/O refactoring --- driver/common/vx_utils.cpp | 115 ++- driver/include/vortex.h | 6 - driver/opae/vortex.cpp | 73 +- driver/opae/vortex_afu.h | 7 +- driver/rtlsim/vortex.cpp | 40 - driver/simx/vortex.cpp | 18 - driver/stub/vortex.cpp | 8 - driver/tests/demo/kernel.bin | Bin 6964 -> 7500 bytes driver/tests/demo/kernel.dump | 1039 ++++++++++++++---------- driver/tests/demo/kernel.elf | Bin 9016 -> 9612 bytes hw/rtl/VX_cluster.v | 70 -- hw/rtl/VX_config.vh | 74 +- hw/rtl/VX_core.v | 26 +- hw/rtl/VX_csr_arb.v | 82 -- hw/rtl/VX_csr_data.v | 56 +- hw/rtl/VX_csr_unit.v | 94 +-- hw/rtl/VX_execute.v | 8 +- hw/rtl/VX_pipeline.v | 37 +- hw/rtl/Vortex.v | 74 -- hw/rtl/afu/vortex_afu.sv | 141 +--- hw/rtl/afu/vortex_afu.vh | 7 +- hw/rtl/interfaces/VX_csr_io_req_if.v | 16 - hw/rtl/interfaces/VX_csr_io_rsp_if.v | 14 - hw/rtl/interfaces/VX_csr_pipe_req_if.v | 22 - hw/simulate/simulator.cpp | 83 -- hw/simulate/simulator.h | 12 +- hw/syn/opae/vortex_afu.json | 7 +- hw/syn/quartus/project.tcl | 38 +- runtime/Makefile | 2 +- runtime/include/vx_intrinsics.h | 59 +- runtime/src/vx_perf.c | 27 + runtime/src/vx_start.S | 3 + simX/core.cpp | 8 +- 33 files changed, 881 insertions(+), 1385 deletions(-) delete mode 100644 hw/rtl/VX_csr_arb.v delete mode 100644 hw/rtl/interfaces/VX_csr_io_req_if.v delete mode 100644 hw/rtl/interfaces/VX_csr_io_rsp_if.v delete mode 100644 hw/rtl/interfaces/VX_csr_pipe_req_if.v create mode 100644 runtime/src/vx_perf.c diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 754cea4b..635e1ff1 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -76,21 +76,20 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) { return err; } -int vx_csr_get_l(vx_device_h device, int core_id, int addr, int addr_h, uint64_t* value) { - int ret = 0; - unsigned value_lo, value_hi; - ret |= vx_csr_get(device, core_id, addr, &value_lo); - ret |= vx_csr_get(device, core_id, addr_h, &value_hi); - *value = (uint64_t(value_hi) << 32) | value_lo; - return ret; +/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) { + uint32_t value_lo = buffer[addr - CSR_MPM_BASE]; + return value_lo; +}*/ + +static uint64_t get_csr_64(const uint32_t* buffer, int addr) { + uint32_t value_lo = buffer[addr - CSR_MPM_BASE]; + uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32]; + return (uint64_t(value_hi) << 32) | value_lo; } extern int vx_dump_perf(vx_device_h device, FILE* stream) { int ret = 0; - unsigned num_cores; - vx_csr_get(device, 0, CSR_NC, &num_cores); - uint64_t instrs = 0; uint64_t cycles = 0; @@ -127,12 +126,23 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { uint64_t mem_stalls = 0; uint64_t mem_lat = 0; #endif - - for (unsigned core_id = 0; core_id < num_cores; ++core_id) { - uint64_t instrs_per_core, cycles_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_INSTRET, CSR_INSTRET_H, &instrs_per_core); - ret |= vx_csr_get_l(device, core_id, CSR_CYCLE, CSR_CYCLE_H, &cycles_per_core); + unsigned num_cores; + ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores); + if (ret) + return ret; + + vx_buffer_h staging_buf; + ret |= vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf); + if (ret) + return ret; + + auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf); + + for (unsigned core_id = 0; core_id < num_cores; ++core_id) { + ret |= vx_copy_from_dev(staging_buf, IO_ADDR_CSR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0); + uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET); + uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE); float IPC = (float)(double(instrs_per_core) / double(cycles_per_core)); if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC); instrs += instrs_per_core; @@ -141,133 +151,110 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { #ifdef PERF_ENABLE // PERF: pipeline // ibuffer_stall - uint64_t ibuffer_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_IBUF_ST, CSR_MPM_IBUF_ST_H, &ibuffer_stalls_per_core); + uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core); ibuffer_stalls += ibuffer_stalls_per_core; // scoreboard_stall - uint64_t scoreboard_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_SCRB_ST, CSR_MPM_SCRB_ST_H, &scoreboard_stalls_per_core); + uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core); scoreboard_stalls += scoreboard_stalls_per_core; // alu_stall - uint64_t alu_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_ALU_ST, CSR_MPM_ALU_ST_H, &alu_stalls_per_core); + uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core); alu_stalls += alu_stalls_per_core; // lsu_stall - uint64_t lsu_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_LSU_ST, CSR_MPM_LSU_ST_H, &lsu_stalls_per_core); + uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core); lsu_stalls += lsu_stalls_per_core; // csr_stall - uint64_t csr_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core); + uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core); csr_stalls += csr_stalls_per_core; // fpu_stall - uint64_t fpu_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core); + uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core); fpu_stalls += fpu_stalls_per_core; // gpu_stall - uint64_t gpu_stalls_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_GPU_ST, CSR_MPM_GPU_ST_H, &gpu_stalls_per_core); + uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core); gpu_stalls += gpu_stalls_per_core; // PERF: Icache // total reads - uint64_t icache_reads_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_READS, CSR_MPM_ICACHE_READS_H, &icache_reads_per_core); + uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS); if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core); icache_reads += icache_reads_per_core; // read misses - uint64_t icache_miss_r_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MISS_R, CSR_MPM_ICACHE_MISS_R_H, &icache_miss_r_per_core); + uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R); int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio); icache_read_misses += icache_miss_r_per_core; // pipeline stalls - uint64_t icache_pipe_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_PIPE_ST, CSR_MPM_ICACHE_PIPE_ST_H, &icache_pipe_st_per_core); + uint64_t icache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_PIPE_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core); icache_pipe_stalls += icache_pipe_st_per_core; // response stalls - uint64_t icache_crsp_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_CRSP_ST, CSR_MPM_ICACHE_CRSP_ST_H, &icache_crsp_st_per_core); + uint64_t icache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_CRSP_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core); icache_rsp_stalls += icache_crsp_st_per_core; // PERF: Dcache // total reads - uint64_t dcache_reads_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_READS, CSR_MPM_DCACHE_READS_H, &dcache_reads_per_core); + uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core); dcache_reads += dcache_reads_per_core; // total write - uint64_t dcache_writes_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_WRITES, CSR_MPM_DCACHE_WRITES_H, &dcache_writes_per_core); + uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core); dcache_writes += dcache_writes_per_core; // read misses - uint64_t dcache_miss_r_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_R, CSR_MPM_DCACHE_MISS_R_H, &dcache_miss_r_per_core); + uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R); int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio); dcache_read_misses += dcache_miss_r_per_core; // read misses - uint64_t dcache_miss_w_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core); + uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W); int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio); dcache_write_misses += dcache_miss_w_per_core; // bank_stalls - uint64_t dcache_bank_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_BANK_ST, CSR_MPM_DCACHE_BANK_ST_H, &dcache_bank_st_per_core); + uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST); int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization); dcache_bank_stalls += dcache_bank_st_per_core; // mshr_stalls - uint64_t dcache_mshr_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MSHR_ST, CSR_MPM_DCACHE_MSHR_ST_H, &dcache_mshr_st_per_core); + uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core); dcache_mshr_stalls += dcache_mshr_st_per_core; // pipeline stalls - uint64_t dcache_pipe_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core); + uint64_t dcache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_PIPE_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core); dcache_pipe_stalls += dcache_pipe_st_per_core; // response stalls - uint64_t dcache_crsp_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core); + uint64_t dcache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_CRSP_ST); if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core); dcache_rsp_stalls += dcache_crsp_st_per_core; // PERF: SMEM // total reads - uint64_t smem_reads_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_READS, CSR_MPM_SMEM_READS_H, &smem_reads_per_core); + uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS); if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core); smem_reads += smem_reads_per_core; // total write - uint64_t smem_writes_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_WRITES, CSR_MPM_SMEM_WRITES_H, &smem_writes_per_core); + uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES); if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core); smem_writes += smem_writes_per_core; // bank_stalls - uint64_t smem_bank_st_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_BANK_ST, CSR_MPM_SMEM_BANK_ST_H, &smem_bank_st_per_core); + uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST); int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization); smem_bank_stalls += smem_bank_st_per_core; // PERF: memory - uint64_t mem_reads_per_core, mem_writes_per_core, mem_stalls_per_core, mem_lat_per_core; - ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_READS, CSR_MPM_MEM_READS_H, &mem_reads_per_core); - ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_WRITES, CSR_MPM_MEM_WRITES_H, &mem_writes_per_core); - ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_ST, CSR_MPM_MEM_ST_H, &mem_stalls_per_core); - ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_LAT, CSR_MPM_MEM_LAT_H, &mem_lat_per_core); + uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS); + uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES); + uint64_t mem_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_ST); + uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT); int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100); int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core)); if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core); diff --git a/driver/include/vortex.h b/driver/include/vortex.h index 3f341165..05648671 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -59,12 +59,6 @@ int vx_start(vx_device_h hdevice); // Wait for device ready with milliseconds timeout int vx_ready_wait(vx_device_h hdevice, long long timeout); -// set device constant registers -int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value); - -// get device constant registers -int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value); - ////////////////////////////// UTILITY FUNCIONS /////////////////////////////// // upload kernel bytes to device diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index d830f00a..29a61062 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -37,25 +37,20 @@ #define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ #define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE #define CMD_RUN AFU_IMAGE_CMD_RUN -#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ -#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE #define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4) #define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4) #define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4) #define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4) +#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4) #define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4) -#define MMIO_CSR_CORE (AFU_IMAGE_MMIO_CSR_CORE * 4) -#define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4) -#define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4) -#define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4) /////////////////////////////////////////////////////////////////////////////// typedef struct vx_device_ { fpga_handle fpga; size_t mem_allocation; - unsigned implementation_id; + unsigned version; unsigned num_cores; unsigned num_warps; unsigned num_threads; @@ -89,7 +84,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) { switch (caps_id) { case VX_CAPS_VERSION: - *value = device->implementation_id; + *value = device->version; break; case VX_CAPS_MAX_CORES: *value = device->num_cores; @@ -195,21 +190,22 @@ extern int vx_dev_open(vx_device_h* hdevice) { device->fpga = accel_handle; device->mem_allocation = ALLOC_BASE_ADDR; - + { // Load device CAPS - int ret = 0; - ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id); - ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores); - ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps); - ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads); + uint64_t dev_caps; + int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps); if (ret != FPGA_OK) { fpgaClose(accel_handle); return ret; } + device->version = (dev_caps >> 0) & 0xffff; + device->num_cores = (dev_caps >> 16) & 0xffff; + device->num_warps = (dev_caps >> 32) & 0xffff; + device->num_threads = (dev_caps >> 48) & 0xffff; #ifndef NDEBUG fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n", - device->implementation_id, device->num_cores, device->num_warps, device->num_threads); + device->version, device->num_cores, device->num_warps, device->num_threads); #endif } @@ -470,52 +466,5 @@ extern int vx_start(vx_device_h hdevice) { // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN)); - return 0; -} - -// set device constant registers -extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) { - if (nullptr == hdevice) - return -1; - - vx_device_t *device = ((vx_device_t*)hdevice); - - // Ensure ready for new command - if (vx_ready_wait(hdevice, -1) != 0) - return -1; - - // write CSR value - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE)); - - return 0; -} - -// get device constant registers -extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) { - if (nullptr == hdevice || nullptr == value) - return -1; - - vx_device_t *device = ((vx_device_t*)hdevice); - - // Ensure ready for new command - if (vx_ready_wait(hdevice, -1) != 0) - return -1; - - // write CSR value - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ)); - - // Ensure ready for new command - if (vx_ready_wait(hdevice, -1) != 0) - return -1; - - uint64_t value64; - CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_READ, &value64)); - *value = (unsigned)value64; - return 0; } \ No newline at end of file diff --git a/driver/opae/vortex_afu.h b/driver/opae/vortex_afu.h index 8a4b7826..40836450 100644 --- a/driver/opae/vortex_afu.h +++ b/driver/opae/vortex_afu.h @@ -7,21 +7,16 @@ #define AFU_ACCEL_NAME "vortex_afu" #define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C" -#define AFU_IMAGE_CMD_CSR_READ 4 -#define AFU_IMAGE_CMD_CSR_WRITE 5 #define AFU_IMAGE_CMD_MEM_READ 1 #define AFU_IMAGE_CMD_MEM_WRITE 2 #define AFU_IMAGE_CMD_RUN 3 #define AFU_IMAGE_MMIO_CMD_TYPE 10 -#define AFU_IMAGE_MMIO_CSR_ADDR 26 -#define AFU_IMAGE_MMIO_CSR_CORE 24 -#define AFU_IMAGE_MMIO_CSR_DATA 28 -#define AFU_IMAGE_MMIO_CSR_READ 30 #define AFU_IMAGE_MMIO_DATA_SIZE 16 #define AFU_IMAGE_MMIO_IO_ADDR 12 #define AFU_IMAGE_MMIO_MEM_ADDR 14 #define AFU_IMAGE_MMIO_SCOPE_READ 20 #define AFU_IMAGE_MMIO_SCOPE_WRITE 22 +#define AFU_IMAGE_MMIO_DEV_CAPS 24 #define AFU_IMAGE_MMIO_STATUS 18 #define AFU_IMAGE_POWER 0 #define AFU_TOP_IFC "ccip_std_afu_avalon_mm" diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index c80f2adf..544f0f15 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -144,28 +144,6 @@ public: return 0; } - int set_csr(int core_id, int addr, unsigned value) { - if (future_.valid()) { - future_.wait(); // ensure prior run completed - } - simulator_.set_csr(core_id, addr, value); - while (simulator_.csr_req_active()) { - simulator_.step(); - }; - return 0; - } - - int get_csr(int core_id, int addr, unsigned *value) { - if (future_.valid()) { - future_.wait(); // ensure prior run completed - } - simulator_.get_csr(core_id, addr, value); - while (simulator_.csr_req_active()) { - simulator_.step(); - }; - return 0; - } - private: size_t mem_allocation_; @@ -330,22 +308,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { vx_device *device = ((vx_device*)hdevice); return device->wait(timeout); -} - -extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) { - if (nullptr == hdevice) - return -1; - - vx_device *device = ((vx_device*)hdevice); - - return device->set_csr(core_id, addr, value); -} - -extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) { - if (nullptr == hdevice) - return -1; - - vx_device *device = ((vx_device*)hdevice); - - return device->get_csr(core_id, addr, value); } \ No newline at end of file diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index f4c87bc9..d2a6ca76 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -376,22 +376,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { vx_device *device = ((vx_device*)hdevice); return device->wait(timeout); -} - -extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) { - if (nullptr == hdevice) - return -1; - - vx_device *device = ((vx_device*)hdevice); - - return device->set_csr(core_id, addr, value); -} - -extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned *value) { - if (nullptr == hdevice) - return -1; - - vx_device *device = ((vx_device*)hdevice); - - return device->get_csr(core_id, addr, value); } \ No newline at end of file diff --git a/driver/stub/vortex.cpp b/driver/stub/vortex.cpp index 7585a656..f5079500 100644 --- a/driver/stub/vortex.cpp +++ b/driver/stub/vortex.cpp @@ -42,12 +42,4 @@ extern int vx_start(vx_device_h /*hdevice*/) { extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) { return -1; -} - -extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) { - return -1; -} - -extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) { - return -1; } \ No newline at end of file diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin index e2e6ad9f9a1184eabeae9540477d2e428e3e3fe4..5c6dd6e8c820028d66c25a609e7e4c88586db272 100755 GIT binary patch delta 934 zcmZ9KF=!KE7>2*g<#MLP5Itxt3XNWpBZ~wJ2As|z(4pW74lOtYQgF!7z&Q%JVl=vW z92hr4ks>1UpRstiMQD*ihJbMqhiqAd9CWCaKz+Y%I{ZABKmYUa4d17m!&_EYA@ZJ7 z({>>rCw}gj%gMxPqw1v<0EJl^_2RZ zDS6!vK^J;&3TA}9!^{c&4YMrtU8X7Yw@gdu@0c~Azh^du{(?be)`(TfK zP_Un9*~WkaxEOH&oNxe)H~>@Q#d z_Q4+epkTj+j{yh35eL8t2f&B}AcY8|FhGa^mC2XDAcD3YlcO`jZ>ctDBUvo*@D4BIi#UEoMGGD3D0C3I2ptS$6-?2hYxu!B zI61g$l;Y-K(IKn2IEag5Dhjm_(2I)>$K$x~-h1xSRk79~0IW@-W>)r_@-=t>W0c6y z1OiGHfRgrwqM{1IO+h%<8S==iD5#W_3m)isoJ9{T*n!);U5m#pg^2)-BN0jjEJ}Js zyv*6V^%TXgz}d`#A803y=`HS6;b4z)+`H|Ur`4MKk(+l-4O~+NzpH`8j#XfXYmQUr zMz~PLVcVh0=zXjy55VguUv~P*0b3>udC}R|df~=_2yd2%_4wfYgvQ^Q*%ovauu2nH zh+j3(&0wcwL&Lq4zyy&us3aK$X{cw^7-VK_pqn|p0aA$ayuqr%WGZ<6gDb!Af5@o{ r<6KB|*p_$}2p@S2^TRLaIkGN~equ^}3amREmX=POhlCa8BiQK&K>Tee diff --git a/driver/tests/demo/kernel.dump b/driver/tests/demo/kernel.dump index 6a877677..4f7824c6 100644 --- a/driver/tests/demo/kernel.dump +++ b/driver/tests/demo/kernel.dump @@ -6,25 +6,25 @@ Disassembly of section .init: 80000000 <_start>: 80000000: 00000597 auipc a1,0x0 -80000004: 0e058593 addi a1,a1,224 # 800000e0 +80000004: 0e458593 addi a1,a1,228 # 800000e4 80000008: fc102573 csrr a0,0xfc1 8000000c: 00b5106b 0xb5106b -80000010: 0d0000ef jal ra,800000e0 +80000010: 0d4000ef jal ra,800000e4 80000014: 00100513 li a0,1 80000018: 0005006b 0x5006b 8000001c: 00002517 auipc a0,0x2 -80000020: b1850513 addi a0,a0,-1256 # 80001b34 +80000020: d3050513 addi a0,a0,-720 # 80001d4c 80000024: 00002617 auipc a2,0x2 -80000028: b9060613 addi a2,a2,-1136 # 80001bb4 <__BSS_END__> +80000028: da860613 addi a2,a2,-600 # 80001dcc <__BSS_END__> 8000002c: 40a60633 sub a2,a2,a0 80000030: 00000593 li a1,0 -80000034: 430000ef jal ra,80000464 +80000034: 648000ef jal ra,8000067c 80000038: 00000517 auipc a0,0x0 -8000003c: 33450513 addi a0,a0,820 # 8000036c <__libc_fini_array> -80000040: 2e4000ef jal ra,80000324 -80000044: 384000ef jal ra,800003c8 <__libc_init_array> +8000003c: 54c50513 addi a0,a0,1356 # 80000584 <__libc_fini_array> +80000040: 4fc000ef jal ra,8000053c +80000044: 59c000ef jal ra,800005e0 <__libc_init_array> 80000048: 008000ef jal ra,80000050
-8000004c: 2ec0006f j 80000338 +8000004c: 5040006f j 80000550 Disassembly of section .text: @@ -34,14 +34,14 @@ Disassembly of section .text: 80000058: 800005b7 lui a1,0x80000 8000005c: 7ffff637 lui a2,0x7ffff 80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080> -80000064: 1780006f j 800001dc +80000064: 17c0006f j 800001e0 80000068 : 80000068: 00000793 li a5,0 8000006c: 00078863 beqz a5,8000007c 80000070: 80000537 lui a0,0x80000 -80000074: 36c50513 addi a0,a0,876 # 8000036c <__stack_top+0x8100036c> -80000078: 2ac0006f j 80000324 +80000074: 58450513 addi a0,a0,1412 # 80000584 <__stack_top+0x81000584> +80000078: 4c40006f j 8000053c 8000007c: 00008067 ret 80000080 : @@ -69,463 +69,599 @@ Disassembly of section .text: 800000d4: 00008067 ret 800000d8 <_exit>: -800000d8: 00000513 li a0,0 -800000dc: 0005006b 0x5006b +800000d8: 250000ef jal ra,80000328 +800000dc: 00000513 li a0,0 +800000e0: 0005006b 0x5006b -800000e0 : -800000e0: fc002573 csrr a0,0xfc0 -800000e4: 0005006b 0x5006b -800000e8: 00002197 auipc gp,0x2 -800000ec: e2018193 addi gp,gp,-480 # 80001f08 <__global_pointer> -800000f0: 7f000117 auipc sp,0x7f000 -800000f4: f1010113 addi sp,sp,-240 # ff000000 <__stack_top> -800000f8: 40000593 li a1,1024 -800000fc: cc102673 csrr a2,0xcc1 -80000100: 02c585b3 mul a1,a1,a2 -80000104: 40b10133 sub sp,sp,a1 -80000108: cc3026f3 csrr a3,0xcc3 -8000010c: 00068663 beqz a3,80000118 -80000110: 00000513 li a0,0 -80000114: 0005006b 0x5006b +800000e4 : +800000e4: fc002573 csrr a0,0xfc0 +800000e8: 0005006b 0x5006b +800000ec: 00002197 auipc gp,0x2 +800000f0: 03418193 addi gp,gp,52 # 80002120 <__global_pointer> +800000f4: 7f000117 auipc sp,0x7f000 +800000f8: f0c10113 addi sp,sp,-244 # ff000000 <__stack_top> +800000fc: 40000593 li a1,1024 +80000100: cc102673 csrr a2,0xcc1 +80000104: 02c585b3 mul a1,a1,a2 +80000108: 40b10133 sub sp,sp,a1 +8000010c: cc3026f3 csrr a3,0xcc3 +80000110: 00068663 beqz a3,8000011c +80000114: 00000513 li a0,0 +80000118: 0005006b 0x5006b -80000118 : -80000118: 00008067 ret +8000011c : +8000011c: 00008067 ret -8000011c : -8000011c: fe010113 addi sp,sp,-32 -80000120: 00112e23 sw ra,28(sp) -80000124: 00812c23 sw s0,24(sp) -80000128: 00912a23 sw s1,20(sp) -8000012c: 01212823 sw s2,16(sp) -80000130: 01312623 sw s3,12(sp) -80000134: fc0027f3 csrr a5,0xfc0 -80000138: 0007806b 0x7806b -8000013c: cc5026f3 csrr a3,0xcc5 -80000140: cc3029f3 csrr s3,0xcc3 -80000144: cc002773 csrr a4,0xcc0 -80000148: fc002673 csrr a2,0xfc0 -8000014c: 800027b7 lui a5,0x80002 -80000150: 00269693 slli a3,a3,0x2 -80000154: b3478793 addi a5,a5,-1228 # 80001b34 <__stack_top+0x81001b34> -80000158: 00d787b3 add a5,a5,a3 -8000015c: 0007a483 lw s1,0(a5) -80000160: 0104a403 lw s0,16(s1) -80000164: 00c4a683 lw a3,12(s1) -80000168: 0089a933 slt s2,s3,s0 -8000016c: 00040793 mv a5,s0 -80000170: 00d90933 add s2,s2,a3 -80000174: 03368433 mul s0,a3,s3 -80000178: 00f9d463 bge s3,a5,80000180 -8000017c: 00098793 mv a5,s3 -80000180: 00f40433 add s0,s0,a5 -80000184: 0084a683 lw a3,8(s1) -80000188: 02c40433 mul s0,s0,a2 -8000018c: 02e907b3 mul a5,s2,a4 -80000190: 00d40433 add s0,s0,a3 -80000194: 00f40433 add s0,s0,a5 -80000198: 00890933 add s2,s2,s0 -8000019c: 01245e63 bge s0,s2,800001b8 -800001a0: 0004a783 lw a5,0(s1) -800001a4: 0044a583 lw a1,4(s1) -800001a8: 00040513 mv a0,s0 -800001ac: 00140413 addi s0,s0,1 -800001b0: 000780e7 jalr a5 -800001b4: fe8916e3 bne s2,s0,800001a0 -800001b8: 0019b993 seqz s3,s3 -800001bc: 0009806b 0x9806b -800001c0: 01c12083 lw ra,28(sp) -800001c4: 01812403 lw s0,24(sp) -800001c8: 01412483 lw s1,20(sp) -800001cc: 01012903 lw s2,16(sp) -800001d0: 00c12983 lw s3,12(sp) -800001d4: 02010113 addi sp,sp,32 -800001d8: 00008067 ret +80000120 : +80000120: fe010113 addi sp,sp,-32 +80000124: 00112e23 sw ra,28(sp) +80000128: 00812c23 sw s0,24(sp) +8000012c: 00912a23 sw s1,20(sp) +80000130: 01212823 sw s2,16(sp) +80000134: 01312623 sw s3,12(sp) +80000138: fc0027f3 csrr a5,0xfc0 +8000013c: 0007806b 0x7806b +80000140: cc5027f3 csrr a5,0xcc5 +80000144: cc3029f3 csrr s3,0xcc3 +80000148: cc002773 csrr a4,0xcc0 +8000014c: fc002673 csrr a2,0xfc0 +80000150: 00279693 slli a3,a5,0x2 +80000154: 800027b7 lui a5,0x80002 +80000158: d4c78793 addi a5,a5,-692 # 80001d4c <__stack_top+0x81001d4c> +8000015c: 00d787b3 add a5,a5,a3 +80000160: 0007a483 lw s1,0(a5) +80000164: 0104a403 lw s0,16(s1) +80000168: 00c4a683 lw a3,12(s1) +8000016c: 0089a933 slt s2,s3,s0 +80000170: 00040793 mv a5,s0 +80000174: 00d90933 add s2,s2,a3 +80000178: 03368433 mul s0,a3,s3 +8000017c: 00f9d463 bge s3,a5,80000184 +80000180: 00098793 mv a5,s3 +80000184: 00f40433 add s0,s0,a5 +80000188: 0084a683 lw a3,8(s1) +8000018c: 02c40433 mul s0,s0,a2 +80000190: 02e907b3 mul a5,s2,a4 +80000194: 00d40433 add s0,s0,a3 +80000198: 00f40433 add s0,s0,a5 +8000019c: 00890933 add s2,s2,s0 +800001a0: 01245e63 bge s0,s2,800001bc +800001a4: 0004a783 lw a5,0(s1) +800001a8: 0044a583 lw a1,4(s1) +800001ac: 00040513 mv a0,s0 +800001b0: 00140413 addi s0,s0,1 +800001b4: 000780e7 jalr a5 +800001b8: fe8916e3 bne s2,s0,800001a4 +800001bc: 0019b993 seqz s3,s3 +800001c0: 0009806b 0x9806b +800001c4: 01c12083 lw ra,28(sp) +800001c8: 01812403 lw s0,24(sp) +800001cc: 01412483 lw s1,20(sp) +800001d0: 01012903 lw s2,16(sp) +800001d4: 00c12983 lw s3,12(sp) +800001d8: 02010113 addi sp,sp,32 +800001dc: 00008067 ret -800001dc : -800001dc: fc010113 addi sp,sp,-64 -800001e0: 02112e23 sw ra,60(sp) -800001e4: 02812c23 sw s0,56(sp) -800001e8: 02912a23 sw s1,52(sp) -800001ec: 03212823 sw s2,48(sp) -800001f0: 03312623 sw s3,44(sp) -800001f4: fc2026f3 csrr a3,0xfc2 -800001f8: fc102873 csrr a6,0xfc1 -800001fc: fc002473 csrr s0,0xfc0 -80000200: cc5027f3 csrr a5,0xcc5 -80000204: 01f00713 li a4,31 -80000208: 0cf74463 blt a4,a5,800002d0 -8000020c: 030408b3 mul a7,s0,a6 -80000210: 00100713 li a4,1 -80000214: 00a8d463 bge a7,a0,8000021c -80000218: 03154733 div a4,a0,a7 -8000021c: 0ce6c863 blt a3,a4,800002ec -80000220: 0ae7d863 bge a5,a4,800002d0 -80000224: fff68693 addi a3,a3,-1 -80000228: 02e54333 div t1,a0,a4 -8000022c: 00030893 mv a7,t1 -80000230: 00f69663 bne a3,a5,8000023c -80000234: 02e56533 rem a0,a0,a4 -80000238: 006508b3 add a7,a0,t1 -8000023c: 0288c4b3 div s1,a7,s0 -80000240: 0288e933 rem s2,a7,s0 -80000244: 0b04ca63 blt s1,a6,800002f8 -80000248: 00100693 li a3,1 -8000024c: 0304c733 div a4,s1,a6 -80000250: 00070663 beqz a4,8000025c -80000254: 00070693 mv a3,a4 -80000258: 0304e733 rem a4,s1,a6 -8000025c: 800029b7 lui s3,0x80002 -80000260: b3498993 addi s3,s3,-1228 # 80001b34 <__stack_top+0x81001b34> -80000264: 00e12e23 sw a4,28(sp) -80000268: 00c10713 addi a4,sp,12 -8000026c: 00b12623 sw a1,12(sp) -80000270: 00c12823 sw a2,16(sp) -80000274: 00d12c23 sw a3,24(sp) -80000278: 02f30333 mul t1,t1,a5 -8000027c: 00279793 slli a5,a5,0x2 -80000280: 00f987b3 add a5,s3,a5 -80000284: 00e7a023 sw a4,0(a5) -80000288: 00612a23 sw t1,20(sp) -8000028c: 06904c63 bgtz s1,80000304 -80000290: 04090063 beqz s2,800002d0 -80000294: 02848433 mul s0,s1,s0 -80000298: 00812a23 sw s0,20(sp) -8000029c: 0009006b 0x9006b -800002a0: cc5027f3 csrr a5,0xcc5 -800002a4: cc202573 csrr a0,0xcc2 -800002a8: 00279793 slli a5,a5,0x2 -800002ac: 00f989b3 add s3,s3,a5 -800002b0: 0009a783 lw a5,0(s3) -800002b4: 0087a683 lw a3,8(a5) -800002b8: 0007a703 lw a4,0(a5) -800002bc: 0047a583 lw a1,4(a5) -800002c0: 00d50533 add a0,a0,a3 -800002c4: 000700e7 jalr a4 -800002c8: 00100793 li a5,1 -800002cc: 0007806b 0x7806b -800002d0: 03c12083 lw ra,60(sp) -800002d4: 03812403 lw s0,56(sp) -800002d8: 03412483 lw s1,52(sp) -800002dc: 03012903 lw s2,48(sp) -800002e0: 02c12983 lw s3,44(sp) -800002e4: 04010113 addi sp,sp,64 -800002e8: 00008067 ret -800002ec: 00068713 mv a4,a3 -800002f0: f2e7cae3 blt a5,a4,80000224 -800002f4: fddff06f j 800002d0 -800002f8: 00000713 li a4,0 -800002fc: 00100693 li a3,1 -80000300: f5dff06f j 8000025c -80000304: 00048713 mv a4,s1 -80000308: 00985463 bge a6,s1,80000310 -8000030c: 00080713 mv a4,a6 -80000310: 800007b7 lui a5,0x80000 -80000314: 11c78793 addi a5,a5,284 # 8000011c <__stack_top+0x8100011c> -80000318: 00f7106b 0xf7106b -8000031c: e01ff0ef jal ra,8000011c -80000320: f71ff06f j 80000290 +800001e0 : +800001e0: fc010113 addi sp,sp,-64 +800001e4: 02112e23 sw ra,60(sp) +800001e8: 02812c23 sw s0,56(sp) +800001ec: 02912a23 sw s1,52(sp) +800001f0: 03212823 sw s2,48(sp) +800001f4: 03312623 sw s3,44(sp) +800001f8: fc2026f3 csrr a3,0xfc2 +800001fc: fc102873 csrr a6,0xfc1 +80000200: fc002473 csrr s0,0xfc0 +80000204: cc5027f3 csrr a5,0xcc5 +80000208: 01f00713 li a4,31 +8000020c: 0cf74463 blt a4,a5,800002d4 +80000210: 030408b3 mul a7,s0,a6 +80000214: 00100713 li a4,1 +80000218: 00a8d463 bge a7,a0,80000220 +8000021c: 03154733 div a4,a0,a7 +80000220: 0ce6c863 blt a3,a4,800002f0 +80000224: 0ae7d863 bge a5,a4,800002d4 +80000228: fff68693 addi a3,a3,-1 +8000022c: 02e54333 div t1,a0,a4 +80000230: 00030893 mv a7,t1 +80000234: 00f69663 bne a3,a5,80000240 +80000238: 02e56533 rem a0,a0,a4 +8000023c: 006508b3 add a7,a0,t1 +80000240: 0288c4b3 div s1,a7,s0 +80000244: 0288e933 rem s2,a7,s0 +80000248: 0b04ca63 blt s1,a6,800002fc +8000024c: 00100693 li a3,1 +80000250: 0304c733 div a4,s1,a6 +80000254: 00070663 beqz a4,80000260 +80000258: 00070693 mv a3,a4 +8000025c: 0304e733 rem a4,s1,a6 +80000260: 800029b7 lui s3,0x80002 +80000264: d4c98993 addi s3,s3,-692 # 80001d4c <__stack_top+0x81001d4c> +80000268: 00e12e23 sw a4,28(sp) +8000026c: 00c10713 addi a4,sp,12 +80000270: 00b12623 sw a1,12(sp) +80000274: 00c12823 sw a2,16(sp) +80000278: 00d12c23 sw a3,24(sp) +8000027c: 02f30333 mul t1,t1,a5 +80000280: 00279793 slli a5,a5,0x2 +80000284: 00f987b3 add a5,s3,a5 +80000288: 00e7a023 sw a4,0(a5) +8000028c: 00612a23 sw t1,20(sp) +80000290: 06904c63 bgtz s1,80000308 +80000294: 04090063 beqz s2,800002d4 +80000298: 02848433 mul s0,s1,s0 +8000029c: 00812a23 sw s0,20(sp) +800002a0: 0009006b 0x9006b +800002a4: cc5027f3 csrr a5,0xcc5 +800002a8: cc202573 csrr a0,0xcc2 +800002ac: 00279793 slli a5,a5,0x2 +800002b0: 00f989b3 add s3,s3,a5 +800002b4: 0009a783 lw a5,0(s3) +800002b8: 0087a683 lw a3,8(a5) +800002bc: 0007a703 lw a4,0(a5) +800002c0: 0047a583 lw a1,4(a5) +800002c4: 00d50533 add a0,a0,a3 +800002c8: 000700e7 jalr a4 +800002cc: 00100793 li a5,1 +800002d0: 0007806b 0x7806b +800002d4: 03c12083 lw ra,60(sp) +800002d8: 03812403 lw s0,56(sp) +800002dc: 03412483 lw s1,52(sp) +800002e0: 03012903 lw s2,48(sp) +800002e4: 02c12983 lw s3,44(sp) +800002e8: 04010113 addi sp,sp,64 +800002ec: 00008067 ret +800002f0: 00068713 mv a4,a3 +800002f4: f2e7cae3 blt a5,a4,80000228 +800002f8: fddff06f j 800002d4 +800002fc: 00000713 li a4,0 +80000300: 00100693 li a3,1 +80000304: f5dff06f j 80000260 +80000308: 00048713 mv a4,s1 +8000030c: 00985463 bge a6,s1,80000314 +80000310: 00080713 mv a4,a6 +80000314: 800007b7 lui a5,0x80000 +80000318: 12078793 addi a5,a5,288 # 80000120 <__stack_top+0x81000120> +8000031c: 00f7106b 0xf7106b +80000320: e01ff0ef jal ra,80000120 +80000324: f71ff06f j 80000294 -80000324 : -80000324: 00050593 mv a1,a0 -80000328: 00000693 li a3,0 -8000032c: 00000613 li a2,0 -80000330: 00000513 li a0,0 -80000334: 20c0006f j 80000540 <__register_exitproc> +80000328 : +80000328: cc5027f3 csrr a5,0xcc5 +8000032c: 00ff0737 lui a4,0xff0 +80000330: 00e787b3 add a5,a5,a4 +80000334: 00879793 slli a5,a5,0x8 +80000338: b0002773 csrr a4,mcycle +8000033c: 00e7a023 sw a4,0(a5) +80000340: b0102773 csrr a4,0xb01 +80000344: 00e7a223 sw a4,4(a5) +80000348: b0202773 csrr a4,minstret +8000034c: 00e7a423 sw a4,8(a5) +80000350: b0302773 csrr a4,mhpmcounter3 +80000354: 00e7a623 sw a4,12(a5) +80000358: b0402773 csrr a4,mhpmcounter4 +8000035c: 00e7a823 sw a4,16(a5) +80000360: b0502773 csrr a4,mhpmcounter5 +80000364: 00e7aa23 sw a4,20(a5) +80000368: b0602773 csrr a4,mhpmcounter6 +8000036c: 00e7ac23 sw a4,24(a5) +80000370: b0702773 csrr a4,mhpmcounter7 +80000374: 00e7ae23 sw a4,28(a5) +80000378: b0802773 csrr a4,mhpmcounter8 +8000037c: 02e7a023 sw a4,32(a5) +80000380: b0902773 csrr a4,mhpmcounter9 +80000384: 02e7a223 sw a4,36(a5) +80000388: b0a02773 csrr a4,mhpmcounter10 +8000038c: 02e7a423 sw a4,40(a5) +80000390: b0b02773 csrr a4,mhpmcounter11 +80000394: 02e7a623 sw a4,44(a5) +80000398: b0c02773 csrr a4,mhpmcounter12 +8000039c: 02e7a823 sw a4,48(a5) +800003a0: b0d02773 csrr a4,mhpmcounter13 +800003a4: 02e7aa23 sw a4,52(a5) +800003a8: b0e02773 csrr a4,mhpmcounter14 +800003ac: 02e7ac23 sw a4,56(a5) +800003b0: b0f02773 csrr a4,mhpmcounter15 +800003b4: 02e7ae23 sw a4,60(a5) +800003b8: b1002773 csrr a4,mhpmcounter16 +800003bc: 04e7a023 sw a4,64(a5) +800003c0: b1102773 csrr a4,mhpmcounter17 +800003c4: 04e7a223 sw a4,68(a5) +800003c8: b1202773 csrr a4,mhpmcounter18 +800003cc: 04e7a423 sw a4,72(a5) +800003d0: b1302773 csrr a4,mhpmcounter19 +800003d4: 04e7a623 sw a4,76(a5) +800003d8: b1402773 csrr a4,mhpmcounter20 +800003dc: 04e7a823 sw a4,80(a5) +800003e0: b1502773 csrr a4,mhpmcounter21 +800003e4: 04e7aa23 sw a4,84(a5) +800003e8: b1602773 csrr a4,mhpmcounter22 +800003ec: 04e7ac23 sw a4,88(a5) +800003f0: b1702773 csrr a4,mhpmcounter23 +800003f4: 04e7ae23 sw a4,92(a5) +800003f8: b1802773 csrr a4,mhpmcounter24 +800003fc: 06e7a023 sw a4,96(a5) +80000400: b1902773 csrr a4,mhpmcounter25 +80000404: 06e7a223 sw a4,100(a5) +80000408: b1a02773 csrr a4,mhpmcounter26 +8000040c: 06e7a423 sw a4,104(a5) +80000410: b1b02773 csrr a4,mhpmcounter27 +80000414: 06e7a623 sw a4,108(a5) +80000418: b1c02773 csrr a4,mhpmcounter28 +8000041c: 06e7a823 sw a4,112(a5) +80000420: b1d02773 csrr a4,mhpmcounter29 +80000424: 06e7aa23 sw a4,116(a5) +80000428: b1e02773 csrr a4,mhpmcounter30 +8000042c: 06e7ac23 sw a4,120(a5) +80000430: b1f02773 csrr a4,mhpmcounter31 +80000434: 06e7ae23 sw a4,124(a5) +80000438: b8002773 csrr a4,mcycleh +8000043c: 08e7a023 sw a4,128(a5) +80000440: b8102773 csrr a4,0xb81 +80000444: 08e7a223 sw a4,132(a5) +80000448: b8202773 csrr a4,minstreth +8000044c: 08e7a423 sw a4,136(a5) +80000450: b8302773 csrr a4,mhpmcounter3h +80000454: 08e7a623 sw a4,140(a5) +80000458: b8402773 csrr a4,mhpmcounter4h +8000045c: 08e7a823 sw a4,144(a5) +80000460: b8502773 csrr a4,mhpmcounter5h +80000464: 08e7aa23 sw a4,148(a5) +80000468: b8602773 csrr a4,mhpmcounter6h +8000046c: 08e7ac23 sw a4,152(a5) +80000470: b8702773 csrr a4,mhpmcounter7h +80000474: 08e7ae23 sw a4,156(a5) +80000478: b8802773 csrr a4,mhpmcounter8h +8000047c: 0ae7a023 sw a4,160(a5) +80000480: b8902773 csrr a4,mhpmcounter9h +80000484: 0ae7a223 sw a4,164(a5) +80000488: b8a02773 csrr a4,mhpmcounter10h +8000048c: 0ae7a423 sw a4,168(a5) +80000490: b8b02773 csrr a4,mhpmcounter11h +80000494: 0ae7a623 sw a4,172(a5) +80000498: b8c02773 csrr a4,mhpmcounter12h +8000049c: 0ae7a823 sw a4,176(a5) +800004a0: b8d02773 csrr a4,mhpmcounter13h +800004a4: 0ae7aa23 sw a4,180(a5) +800004a8: b8e02773 csrr a4,mhpmcounter14h +800004ac: 0ae7ac23 sw a4,184(a5) +800004b0: b8f02773 csrr a4,mhpmcounter15h +800004b4: 0ae7ae23 sw a4,188(a5) +800004b8: b9002773 csrr a4,mhpmcounter16h +800004bc: 0ce7a023 sw a4,192(a5) +800004c0: b9102773 csrr a4,mhpmcounter17h +800004c4: 0ce7a223 sw a4,196(a5) +800004c8: b9202773 csrr a4,mhpmcounter18h +800004cc: 0ce7a423 sw a4,200(a5) +800004d0: b9302773 csrr a4,mhpmcounter19h +800004d4: 0ce7a623 sw a4,204(a5) +800004d8: b9402773 csrr a4,mhpmcounter20h +800004dc: 0ce7a823 sw a4,208(a5) +800004e0: b9502773 csrr a4,mhpmcounter21h +800004e4: 0ce7aa23 sw a4,212(a5) +800004e8: b9602773 csrr a4,mhpmcounter22h +800004ec: 0ce7ac23 sw a4,216(a5) +800004f0: b9702773 csrr a4,mhpmcounter23h +800004f4: 0ce7ae23 sw a4,220(a5) +800004f8: b9802773 csrr a4,mhpmcounter24h +800004fc: 0ee7a023 sw a4,224(a5) +80000500: b9902773 csrr a4,mhpmcounter25h +80000504: 0ee7a223 sw a4,228(a5) +80000508: b9a02773 csrr a4,mhpmcounter26h +8000050c: 0ee7a423 sw a4,232(a5) +80000510: b9b02773 csrr a4,mhpmcounter27h +80000514: 0ee7a623 sw a4,236(a5) +80000518: b9c02773 csrr a4,mhpmcounter28h +8000051c: 0ee7a823 sw a4,240(a5) +80000520: b9d02773 csrr a4,mhpmcounter29h +80000524: 0ee7aa23 sw a4,244(a5) +80000528: b9e02773 csrr a4,mhpmcounter30h +8000052c: 0ee7ac23 sw a4,248(a5) +80000530: b9f02773 csrr a4,mhpmcounter31h +80000534: 0ee7ae23 sw a4,252(a5) +80000538: 00008067 ret -80000338 : -80000338: ff010113 addi sp,sp,-16 -8000033c: 00000593 li a1,0 -80000340: 00812423 sw s0,8(sp) -80000344: 00112623 sw ra,12(sp) -80000348: 00050413 mv s0,a0 -8000034c: 290000ef jal ra,800005dc <__call_exitprocs> -80000350: 800027b7 lui a5,0x80002 -80000354: b307a503 lw a0,-1232(a5) # 80001b30 <__stack_top+0x81001b30> -80000358: 03c52783 lw a5,60(a0) -8000035c: 00078463 beqz a5,80000364 -80000360: 000780e7 jalr a5 -80000364: 00040513 mv a0,s0 -80000368: d71ff0ef jal ra,800000d8 <_exit> +8000053c : +8000053c: 00050593 mv a1,a0 +80000540: 00000693 li a3,0 +80000544: 00000613 li a2,0 +80000548: 00000513 li a0,0 +8000054c: 20c0006f j 80000758 <__register_exitproc> -8000036c <__libc_fini_array>: -8000036c: ff010113 addi sp,sp,-16 -80000370: 00812423 sw s0,8(sp) -80000374: 800017b7 lui a5,0x80001 -80000378: 80001437 lui s0,0x80001 -8000037c: 70440413 addi s0,s0,1796 # 80001704 <__stack_top+0x81001704> -80000380: 70478793 addi a5,a5,1796 # 80001704 <__stack_top+0x81001704> -80000384: 408787b3 sub a5,a5,s0 -80000388: 00912223 sw s1,4(sp) -8000038c: 00112623 sw ra,12(sp) -80000390: 4027d493 srai s1,a5,0x2 -80000394: 02048063 beqz s1,800003b4 <__libc_fini_array+0x48> -80000398: ffc78793 addi a5,a5,-4 -8000039c: 00878433 add s0,a5,s0 -800003a0: 00042783 lw a5,0(s0) -800003a4: fff48493 addi s1,s1,-1 -800003a8: ffc40413 addi s0,s0,-4 -800003ac: 000780e7 jalr a5 -800003b0: fe0498e3 bnez s1,800003a0 <__libc_fini_array+0x34> -800003b4: 00c12083 lw ra,12(sp) -800003b8: 00812403 lw s0,8(sp) -800003bc: 00412483 lw s1,4(sp) -800003c0: 01010113 addi sp,sp,16 -800003c4: 00008067 ret +80000550 : +80000550: ff010113 addi sp,sp,-16 +80000554: 00000593 li a1,0 +80000558: 00812423 sw s0,8(sp) +8000055c: 00112623 sw ra,12(sp) +80000560: 00050413 mv s0,a0 +80000564: 290000ef jal ra,800007f4 <__call_exitprocs> +80000568: 800027b7 lui a5,0x80002 +8000056c: d487a503 lw a0,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000570: 03c52783 lw a5,60(a0) +80000574: 00078463 beqz a5,8000057c +80000578: 000780e7 jalr a5 +8000057c: 00040513 mv a0,s0 +80000580: b59ff0ef jal ra,800000d8 <_exit> -800003c8 <__libc_init_array>: -800003c8: ff010113 addi sp,sp,-16 -800003cc: 00812423 sw s0,8(sp) -800003d0: 01212023 sw s2,0(sp) -800003d4: 80001437 lui s0,0x80001 -800003d8: 80001937 lui s2,0x80001 -800003dc: 70040793 addi a5,s0,1792 # 80001700 <__stack_top+0x81001700> -800003e0: 70090913 addi s2,s2,1792 # 80001700 <__stack_top+0x81001700> -800003e4: 40f90933 sub s2,s2,a5 -800003e8: 00112623 sw ra,12(sp) -800003ec: 00912223 sw s1,4(sp) -800003f0: 40295913 srai s2,s2,0x2 -800003f4: 02090063 beqz s2,80000414 <__libc_init_array+0x4c> -800003f8: 70040413 addi s0,s0,1792 -800003fc: 00000493 li s1,0 -80000400: 00042783 lw a5,0(s0) -80000404: 00148493 addi s1,s1,1 -80000408: 00440413 addi s0,s0,4 -8000040c: 000780e7 jalr a5 -80000410: fe9918e3 bne s2,s1,80000400 <__libc_init_array+0x38> -80000414: 80001437 lui s0,0x80001 -80000418: 80001937 lui s2,0x80001 -8000041c: 70040793 addi a5,s0,1792 # 80001700 <__stack_top+0x81001700> -80000420: 70490913 addi s2,s2,1796 # 80001704 <__stack_top+0x81001704> -80000424: 40f90933 sub s2,s2,a5 -80000428: 40295913 srai s2,s2,0x2 -8000042c: 02090063 beqz s2,8000044c <__libc_init_array+0x84> -80000430: 70040413 addi s0,s0,1792 -80000434: 00000493 li s1,0 -80000438: 00042783 lw a5,0(s0) -8000043c: 00148493 addi s1,s1,1 -80000440: 00440413 addi s0,s0,4 -80000444: 000780e7 jalr a5 -80000448: fe9918e3 bne s2,s1,80000438 <__libc_init_array+0x70> -8000044c: 00c12083 lw ra,12(sp) -80000450: 00812403 lw s0,8(sp) -80000454: 00412483 lw s1,4(sp) -80000458: 00012903 lw s2,0(sp) -8000045c: 01010113 addi sp,sp,16 -80000460: 00008067 ret +80000584 <__libc_fini_array>: +80000584: ff010113 addi sp,sp,-16 +80000588: 00812423 sw s0,8(sp) +8000058c: 800027b7 lui a5,0x80002 +80000590: 80002437 lui s0,0x80002 +80000594: 91c40413 addi s0,s0,-1764 # 8000191c <__stack_top+0x8100191c> +80000598: 91c78793 addi a5,a5,-1764 # 8000191c <__stack_top+0x8100191c> +8000059c: 408787b3 sub a5,a5,s0 +800005a0: 00912223 sw s1,4(sp) +800005a4: 00112623 sw ra,12(sp) +800005a8: 4027d493 srai s1,a5,0x2 +800005ac: 02048063 beqz s1,800005cc <__libc_fini_array+0x48> +800005b0: ffc78793 addi a5,a5,-4 +800005b4: 00878433 add s0,a5,s0 +800005b8: 00042783 lw a5,0(s0) +800005bc: fff48493 addi s1,s1,-1 +800005c0: ffc40413 addi s0,s0,-4 +800005c4: 000780e7 jalr a5 +800005c8: fe0498e3 bnez s1,800005b8 <__libc_fini_array+0x34> +800005cc: 00c12083 lw ra,12(sp) +800005d0: 00812403 lw s0,8(sp) +800005d4: 00412483 lw s1,4(sp) +800005d8: 01010113 addi sp,sp,16 +800005dc: 00008067 ret -80000464 : -80000464: 00f00313 li t1,15 -80000468: 00050713 mv a4,a0 -8000046c: 02c37e63 bgeu t1,a2,800004a8 -80000470: 00f77793 andi a5,a4,15 -80000474: 0a079063 bnez a5,80000514 -80000478: 08059263 bnez a1,800004fc -8000047c: ff067693 andi a3,a2,-16 -80000480: 00f67613 andi a2,a2,15 -80000484: 00e686b3 add a3,a3,a4 -80000488: 00b72023 sw a1,0(a4) -8000048c: 00b72223 sw a1,4(a4) -80000490: 00b72423 sw a1,8(a4) -80000494: 00b72623 sw a1,12(a4) -80000498: 01070713 addi a4,a4,16 -8000049c: fed766e3 bltu a4,a3,80000488 -800004a0: 00061463 bnez a2,800004a8 -800004a4: 00008067 ret -800004a8: 40c306b3 sub a3,t1,a2 -800004ac: 00269693 slli a3,a3,0x2 -800004b0: 00000297 auipc t0,0x0 -800004b4: 005686b3 add a3,a3,t0 -800004b8: 00c68067 jr 12(a3) -800004bc: 00b70723 sb a1,14(a4) -800004c0: 00b706a3 sb a1,13(a4) -800004c4: 00b70623 sb a1,12(a4) -800004c8: 00b705a3 sb a1,11(a4) -800004cc: 00b70523 sb a1,10(a4) -800004d0: 00b704a3 sb a1,9(a4) -800004d4: 00b70423 sb a1,8(a4) -800004d8: 00b703a3 sb a1,7(a4) -800004dc: 00b70323 sb a1,6(a4) -800004e0: 00b702a3 sb a1,5(a4) -800004e4: 00b70223 sb a1,4(a4) -800004e8: 00b701a3 sb a1,3(a4) -800004ec: 00b70123 sb a1,2(a4) -800004f0: 00b700a3 sb a1,1(a4) -800004f4: 00b70023 sb a1,0(a4) -800004f8: 00008067 ret -800004fc: 0ff5f593 andi a1,a1,255 -80000500: 00859693 slli a3,a1,0x8 -80000504: 00d5e5b3 or a1,a1,a3 -80000508: 01059693 slli a3,a1,0x10 -8000050c: 00d5e5b3 or a1,a1,a3 -80000510: f6dff06f j 8000047c -80000514: 00279693 slli a3,a5,0x2 -80000518: 00000297 auipc t0,0x0 -8000051c: 005686b3 add a3,a3,t0 -80000520: 00008293 mv t0,ra -80000524: fa0680e7 jalr -96(a3) -80000528: 00028093 mv ra,t0 -8000052c: ff078793 addi a5,a5,-16 -80000530: 40f70733 sub a4,a4,a5 -80000534: 00f60633 add a2,a2,a5 -80000538: f6c378e3 bgeu t1,a2,800004a8 -8000053c: f3dff06f j 80000478 +800005e0 <__libc_init_array>: +800005e0: ff010113 addi sp,sp,-16 +800005e4: 00812423 sw s0,8(sp) +800005e8: 01212023 sw s2,0(sp) +800005ec: 80002437 lui s0,0x80002 +800005f0: 80002937 lui s2,0x80002 +800005f4: 91840793 addi a5,s0,-1768 # 80001918 <__stack_top+0x81001918> +800005f8: 91890913 addi s2,s2,-1768 # 80001918 <__stack_top+0x81001918> +800005fc: 40f90933 sub s2,s2,a5 +80000600: 00112623 sw ra,12(sp) +80000604: 00912223 sw s1,4(sp) +80000608: 40295913 srai s2,s2,0x2 +8000060c: 02090063 beqz s2,8000062c <__libc_init_array+0x4c> +80000610: 91840413 addi s0,s0,-1768 +80000614: 00000493 li s1,0 +80000618: 00042783 lw a5,0(s0) +8000061c: 00148493 addi s1,s1,1 +80000620: 00440413 addi s0,s0,4 +80000624: 000780e7 jalr a5 +80000628: fe9918e3 bne s2,s1,80000618 <__libc_init_array+0x38> +8000062c: 80002437 lui s0,0x80002 +80000630: 80002937 lui s2,0x80002 +80000634: 91840793 addi a5,s0,-1768 # 80001918 <__stack_top+0x81001918> +80000638: 91c90913 addi s2,s2,-1764 # 8000191c <__stack_top+0x8100191c> +8000063c: 40f90933 sub s2,s2,a5 +80000640: 40295913 srai s2,s2,0x2 +80000644: 02090063 beqz s2,80000664 <__libc_init_array+0x84> +80000648: 91840413 addi s0,s0,-1768 +8000064c: 00000493 li s1,0 +80000650: 00042783 lw a5,0(s0) +80000654: 00148493 addi s1,s1,1 +80000658: 00440413 addi s0,s0,4 +8000065c: 000780e7 jalr a5 +80000660: fe9918e3 bne s2,s1,80000650 <__libc_init_array+0x70> +80000664: 00c12083 lw ra,12(sp) +80000668: 00812403 lw s0,8(sp) +8000066c: 00412483 lw s1,4(sp) +80000670: 00012903 lw s2,0(sp) +80000674: 01010113 addi sp,sp,16 +80000678: 00008067 ret -80000540 <__register_exitproc>: -80000540: 800027b7 lui a5,0x80002 -80000544: b307a703 lw a4,-1232(a5) # 80001b30 <__stack_top+0x81001b30> -80000548: 14872783 lw a5,328(a4) -8000054c: 04078c63 beqz a5,800005a4 <__register_exitproc+0x64> -80000550: 0047a703 lw a4,4(a5) -80000554: 01f00813 li a6,31 -80000558: 06e84e63 blt a6,a4,800005d4 <__register_exitproc+0x94> -8000055c: 00271813 slli a6,a4,0x2 -80000560: 02050663 beqz a0,8000058c <__register_exitproc+0x4c> -80000564: 01078333 add t1,a5,a6 -80000568: 08c32423 sw a2,136(t1) -8000056c: 1887a883 lw a7,392(a5) -80000570: 00100613 li a2,1 -80000574: 00e61633 sll a2,a2,a4 -80000578: 00c8e8b3 or a7,a7,a2 -8000057c: 1917a423 sw a7,392(a5) -80000580: 10d32423 sw a3,264(t1) -80000584: 00200693 li a3,2 -80000588: 02d50463 beq a0,a3,800005b0 <__register_exitproc+0x70> -8000058c: 00170713 addi a4,a4,1 -80000590: 00e7a223 sw a4,4(a5) -80000594: 010787b3 add a5,a5,a6 -80000598: 00b7a423 sw a1,8(a5) -8000059c: 00000513 li a0,0 -800005a0: 00008067 ret -800005a4: 14c70793 addi a5,a4,332 -800005a8: 14f72423 sw a5,328(a4) -800005ac: fa5ff06f j 80000550 <__register_exitproc+0x10> -800005b0: 18c7a683 lw a3,396(a5) -800005b4: 00170713 addi a4,a4,1 -800005b8: 00e7a223 sw a4,4(a5) -800005bc: 00c6e633 or a2,a3,a2 -800005c0: 18c7a623 sw a2,396(a5) -800005c4: 010787b3 add a5,a5,a6 -800005c8: 00b7a423 sw a1,8(a5) -800005cc: 00000513 li a0,0 -800005d0: 00008067 ret -800005d4: fff00513 li a0,-1 -800005d8: 00008067 ret +8000067c : +8000067c: 00f00313 li t1,15 +80000680: 00050713 mv a4,a0 +80000684: 02c37e63 bgeu t1,a2,800006c0 +80000688: 00f77793 andi a5,a4,15 +8000068c: 0a079063 bnez a5,8000072c +80000690: 08059263 bnez a1,80000714 +80000694: ff067693 andi a3,a2,-16 +80000698: 00f67613 andi a2,a2,15 +8000069c: 00e686b3 add a3,a3,a4 +800006a0: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00> +800006a4: 00b72223 sw a1,4(a4) +800006a8: 00b72423 sw a1,8(a4) +800006ac: 00b72623 sw a1,12(a4) +800006b0: 01070713 addi a4,a4,16 +800006b4: fed766e3 bltu a4,a3,800006a0 +800006b8: 00061463 bnez a2,800006c0 +800006bc: 00008067 ret +800006c0: 40c306b3 sub a3,t1,a2 +800006c4: 00269693 slli a3,a3,0x2 +800006c8: 00000297 auipc t0,0x0 +800006cc: 005686b3 add a3,a3,t0 +800006d0: 00c68067 jr 12(a3) +800006d4: 00b70723 sb a1,14(a4) +800006d8: 00b706a3 sb a1,13(a4) +800006dc: 00b70623 sb a1,12(a4) +800006e0: 00b705a3 sb a1,11(a4) +800006e4: 00b70523 sb a1,10(a4) +800006e8: 00b704a3 sb a1,9(a4) +800006ec: 00b70423 sb a1,8(a4) +800006f0: 00b703a3 sb a1,7(a4) +800006f4: 00b70323 sb a1,6(a4) +800006f8: 00b702a3 sb a1,5(a4) +800006fc: 00b70223 sb a1,4(a4) +80000700: 00b701a3 sb a1,3(a4) +80000704: 00b70123 sb a1,2(a4) +80000708: 00b700a3 sb a1,1(a4) +8000070c: 00b70023 sb a1,0(a4) +80000710: 00008067 ret +80000714: 0ff5f593 andi a1,a1,255 +80000718: 00859693 slli a3,a1,0x8 +8000071c: 00d5e5b3 or a1,a1,a3 +80000720: 01059693 slli a3,a1,0x10 +80000724: 00d5e5b3 or a1,a1,a3 +80000728: f6dff06f j 80000694 +8000072c: 00279693 slli a3,a5,0x2 +80000730: 00000297 auipc t0,0x0 +80000734: 005686b3 add a3,a3,t0 +80000738: 00008293 mv t0,ra +8000073c: fa0680e7 jalr -96(a3) +80000740: 00028093 mv ra,t0 +80000744: ff078793 addi a5,a5,-16 +80000748: 40f70733 sub a4,a4,a5 +8000074c: 00f60633 add a2,a2,a5 +80000750: f6c378e3 bgeu t1,a2,800006c0 +80000754: f3dff06f j 80000690 -800005dc <__call_exitprocs>: -800005dc: fd010113 addi sp,sp,-48 -800005e0: 800027b7 lui a5,0x80002 -800005e4: 01412c23 sw s4,24(sp) -800005e8: b307aa03 lw s4,-1232(a5) # 80001b30 <__stack_top+0x81001b30> -800005ec: 03212023 sw s2,32(sp) -800005f0: 02112623 sw ra,44(sp) -800005f4: 148a2903 lw s2,328(s4) -800005f8: 02812423 sw s0,40(sp) -800005fc: 02912223 sw s1,36(sp) -80000600: 01312e23 sw s3,28(sp) -80000604: 01512a23 sw s5,20(sp) -80000608: 01612823 sw s6,16(sp) -8000060c: 01712623 sw s7,12(sp) -80000610: 01812423 sw s8,8(sp) -80000614: 04090063 beqz s2,80000654 <__call_exitprocs+0x78> -80000618: 00050b13 mv s6,a0 -8000061c: 00058b93 mv s7,a1 -80000620: 00100a93 li s5,1 -80000624: fff00993 li s3,-1 -80000628: 00492483 lw s1,4(s2) -8000062c: fff48413 addi s0,s1,-1 -80000630: 02044263 bltz s0,80000654 <__call_exitprocs+0x78> -80000634: 00249493 slli s1,s1,0x2 -80000638: 009904b3 add s1,s2,s1 -8000063c: 040b8463 beqz s7,80000684 <__call_exitprocs+0xa8> -80000640: 1044a783 lw a5,260(s1) -80000644: 05778063 beq a5,s7,80000684 <__call_exitprocs+0xa8> -80000648: fff40413 addi s0,s0,-1 -8000064c: ffc48493 addi s1,s1,-4 -80000650: ff3416e3 bne s0,s3,8000063c <__call_exitprocs+0x60> -80000654: 02c12083 lw ra,44(sp) -80000658: 02812403 lw s0,40(sp) -8000065c: 02412483 lw s1,36(sp) -80000660: 02012903 lw s2,32(sp) -80000664: 01c12983 lw s3,28(sp) -80000668: 01812a03 lw s4,24(sp) -8000066c: 01412a83 lw s5,20(sp) -80000670: 01012b03 lw s6,16(sp) -80000674: 00c12b83 lw s7,12(sp) -80000678: 00812c03 lw s8,8(sp) -8000067c: 03010113 addi sp,sp,48 -80000680: 00008067 ret -80000684: 00492783 lw a5,4(s2) -80000688: 0044a683 lw a3,4(s1) -8000068c: fff78793 addi a5,a5,-1 -80000690: 04878e63 beq a5,s0,800006ec <__call_exitprocs+0x110> -80000694: 0004a223 sw zero,4(s1) -80000698: fa0688e3 beqz a3,80000648 <__call_exitprocs+0x6c> -8000069c: 18892783 lw a5,392(s2) -800006a0: 008a9733 sll a4,s5,s0 -800006a4: 00492c03 lw s8,4(s2) -800006a8: 00f777b3 and a5,a4,a5 -800006ac: 02079263 bnez a5,800006d0 <__call_exitprocs+0xf4> -800006b0: 000680e7 jalr a3 -800006b4: 00492703 lw a4,4(s2) -800006b8: 148a2783 lw a5,328(s4) -800006bc: 01871463 bne a4,s8,800006c4 <__call_exitprocs+0xe8> -800006c0: f8f904e3 beq s2,a5,80000648 <__call_exitprocs+0x6c> -800006c4: f80788e3 beqz a5,80000654 <__call_exitprocs+0x78> -800006c8: 00078913 mv s2,a5 -800006cc: f5dff06f j 80000628 <__call_exitprocs+0x4c> -800006d0: 18c92783 lw a5,396(s2) -800006d4: 0844a583 lw a1,132(s1) -800006d8: 00f77733 and a4,a4,a5 -800006dc: 00071c63 bnez a4,800006f4 <__call_exitprocs+0x118> -800006e0: 000b0513 mv a0,s6 -800006e4: 000680e7 jalr a3 -800006e8: fcdff06f j 800006b4 <__call_exitprocs+0xd8> -800006ec: 00892223 sw s0,4(s2) -800006f0: fa9ff06f j 80000698 <__call_exitprocs+0xbc> -800006f4: 00058513 mv a0,a1 -800006f8: 000680e7 jalr a3 -800006fc: fb9ff06f j 800006b4 <__call_exitprocs+0xd8> +80000758 <__register_exitproc>: +80000758: 800027b7 lui a5,0x80002 +8000075c: d487a703 lw a4,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000760: 14872783 lw a5,328(a4) +80000764: 04078c63 beqz a5,800007bc <__register_exitproc+0x64> +80000768: 0047a703 lw a4,4(a5) +8000076c: 01f00813 li a6,31 +80000770: 06e84e63 blt a6,a4,800007ec <__register_exitproc+0x94> +80000774: 00271813 slli a6,a4,0x2 +80000778: 02050663 beqz a0,800007a4 <__register_exitproc+0x4c> +8000077c: 01078333 add t1,a5,a6 +80000780: 08c32423 sw a2,136(t1) +80000784: 1887a883 lw a7,392(a5) +80000788: 00100613 li a2,1 +8000078c: 00e61633 sll a2,a2,a4 +80000790: 00c8e8b3 or a7,a7,a2 +80000794: 1917a423 sw a7,392(a5) +80000798: 10d32423 sw a3,264(t1) +8000079c: 00200693 li a3,2 +800007a0: 02d50463 beq a0,a3,800007c8 <__register_exitproc+0x70> +800007a4: 00170713 addi a4,a4,1 +800007a8: 00e7a223 sw a4,4(a5) +800007ac: 010787b3 add a5,a5,a6 +800007b0: 00b7a423 sw a1,8(a5) +800007b4: 00000513 li a0,0 +800007b8: 00008067 ret +800007bc: 14c70793 addi a5,a4,332 +800007c0: 14f72423 sw a5,328(a4) +800007c4: fa5ff06f j 80000768 <__register_exitproc+0x10> +800007c8: 18c7a683 lw a3,396(a5) +800007cc: 00170713 addi a4,a4,1 +800007d0: 00e7a223 sw a4,4(a5) +800007d4: 00c6e633 or a2,a3,a2 +800007d8: 18c7a623 sw a2,396(a5) +800007dc: 010787b3 add a5,a5,a6 +800007e0: 00b7a423 sw a1,8(a5) +800007e4: 00000513 li a0,0 +800007e8: 00008067 ret +800007ec: fff00513 li a0,-1 +800007f0: 00008067 ret + +800007f4 <__call_exitprocs>: +800007f4: fd010113 addi sp,sp,-48 +800007f8: 800027b7 lui a5,0x80002 +800007fc: 01412c23 sw s4,24(sp) +80000800: d487aa03 lw s4,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000804: 03212023 sw s2,32(sp) +80000808: 02112623 sw ra,44(sp) +8000080c: 148a2903 lw s2,328(s4) +80000810: 02812423 sw s0,40(sp) +80000814: 02912223 sw s1,36(sp) +80000818: 01312e23 sw s3,28(sp) +8000081c: 01512a23 sw s5,20(sp) +80000820: 01612823 sw s6,16(sp) +80000824: 01712623 sw s7,12(sp) +80000828: 01812423 sw s8,8(sp) +8000082c: 04090063 beqz s2,8000086c <__call_exitprocs+0x78> +80000830: 00050b13 mv s6,a0 +80000834: 00058b93 mv s7,a1 +80000838: 00100a93 li s5,1 +8000083c: fff00993 li s3,-1 +80000840: 00492483 lw s1,4(s2) +80000844: fff48413 addi s0,s1,-1 +80000848: 02044263 bltz s0,8000086c <__call_exitprocs+0x78> +8000084c: 00249493 slli s1,s1,0x2 +80000850: 009904b3 add s1,s2,s1 +80000854: 040b8463 beqz s7,8000089c <__call_exitprocs+0xa8> +80000858: 1044a783 lw a5,260(s1) +8000085c: 05778063 beq a5,s7,8000089c <__call_exitprocs+0xa8> +80000860: fff40413 addi s0,s0,-1 +80000864: ffc48493 addi s1,s1,-4 +80000868: ff3416e3 bne s0,s3,80000854 <__call_exitprocs+0x60> +8000086c: 02c12083 lw ra,44(sp) +80000870: 02812403 lw s0,40(sp) +80000874: 02412483 lw s1,36(sp) +80000878: 02012903 lw s2,32(sp) +8000087c: 01c12983 lw s3,28(sp) +80000880: 01812a03 lw s4,24(sp) +80000884: 01412a83 lw s5,20(sp) +80000888: 01012b03 lw s6,16(sp) +8000088c: 00c12b83 lw s7,12(sp) +80000890: 00812c03 lw s8,8(sp) +80000894: 03010113 addi sp,sp,48 +80000898: 00008067 ret +8000089c: 00492783 lw a5,4(s2) +800008a0: 0044a683 lw a3,4(s1) +800008a4: fff78793 addi a5,a5,-1 +800008a8: 04878e63 beq a5,s0,80000904 <__call_exitprocs+0x110> +800008ac: 0004a223 sw zero,4(s1) +800008b0: fa0688e3 beqz a3,80000860 <__call_exitprocs+0x6c> +800008b4: 18892783 lw a5,392(s2) +800008b8: 008a9733 sll a4,s5,s0 +800008bc: 00492c03 lw s8,4(s2) +800008c0: 00f777b3 and a5,a4,a5 +800008c4: 02079263 bnez a5,800008e8 <__call_exitprocs+0xf4> +800008c8: 000680e7 jalr a3 +800008cc: 00492703 lw a4,4(s2) +800008d0: 148a2783 lw a5,328(s4) +800008d4: 01871463 bne a4,s8,800008dc <__call_exitprocs+0xe8> +800008d8: f92784e3 beq a5,s2,80000860 <__call_exitprocs+0x6c> +800008dc: f80788e3 beqz a5,8000086c <__call_exitprocs+0x78> +800008e0: 00078913 mv s2,a5 +800008e4: f5dff06f j 80000840 <__call_exitprocs+0x4c> +800008e8: 18c92783 lw a5,396(s2) +800008ec: 0844a583 lw a1,132(s1) +800008f0: 00f77733 and a4,a4,a5 +800008f4: 00071c63 bnez a4,8000090c <__call_exitprocs+0x118> +800008f8: 000b0513 mv a0,s6 +800008fc: 000680e7 jalr a3 +80000900: fcdff06f j 800008cc <__call_exitprocs+0xd8> +80000904: 00892223 sw s0,4(s2) +80000908: fa9ff06f j 800008b0 <__call_exitprocs+0xbc> +8000090c: 00058513 mv a0,a1 +80000910: 000680e7 jalr a3 +80000914: fb9ff06f j 800008cc <__call_exitprocs+0xd8> Disassembly of section .init_array: -80001700 <__init_array_start>: -80001700: 0068 addi a0,sp,12 -80001702: 8000 0x8000 +80001918 <__init_array_start>: +80001918: 0068 addi a0,sp,12 +8000191a: 8000 0x8000 Disassembly of section .data: -80001708 : -80001708: 0000 unimp -8000170a: 0000 unimp -8000170c: 19f4 addi a3,sp,252 -8000170e: 8000 0x8000 -80001710: 1a5c addi a5,sp,308 -80001712: 8000 0x8000 -80001714: 1ac4 addi s1,sp,372 -80001716: 8000 0x8000 +80001920 : +80001920: 0000 unimp +80001922: 0000 unimp +80001924: 1c0c addi a1,sp,560 +80001926: 8000 0x8000 +80001928: 1c74 addi a3,sp,572 +8000192a: 8000 0x8000 +8000192c: 1cdc addi a5,sp,628 +8000192e: 8000 0x8000 ... -800017b0: 0001 nop -800017b2: 0000 unimp -800017b4: 0000 unimp -800017b6: 0000 unimp -800017b8: 330e fld ft6,224(sp) -800017ba: abcd j 80001dac <__BSS_END__+0x1f8> -800017bc: 1234 addi a3,sp,296 -800017be: e66d bnez a2,800018a8 -800017c0: deec sw a1,124(a3) -800017c2: 0005 c.nop 1 -800017c4: 0000000b 0xb +800019c8: 0001 nop +800019ca: 0000 unimp +800019cc: 0000 unimp +800019ce: 0000 unimp +800019d0: 330e fld ft6,224(sp) +800019d2: abcd j 80001fc4 <__BSS_END__+0x1f8> +800019d4: 1234 addi a3,sp,296 +800019d6: e66d bnez a2,80001ac0 +800019d8: deec sw a1,124(a3) +800019da: 0005 c.nop 1 +800019dc: 0000000b 0xb ... Disassembly of section .sdata: -80001b30 <_global_impure_ptr>: -80001b30: 1708 addi a0,sp,928 -80001b32: 8000 0x8000 +80001d48 <_global_impure_ptr>: +80001d48: 1920 addi s0,sp,184 +80001d4a: 8000 0x8000 Disassembly of section .bss: -80001b34 : +80001d4c : ... Disassembly of section .comment: @@ -534,21 +670,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2541 jal 680 <__stack_size+0x280> + 0: 2941 jal 490 <__stack_size+0x90> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14> - c: 0000001b 0x1b - 10: 1004 addi s1,sp,32 + c: 001f 0000 1004 0x10040000001f 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -557,3 +692,5 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__stack_top+0xfff9c6> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f + 26: 0108 addi a0,sp,128 + 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/demo/kernel.elf b/driver/tests/demo/kernel.elf index c4af06b4b2276641383bd964948c5e05af7b0c72..26213022f9dbefc7bf491e857c6410abe24e876e 100755 GIT binary patch delta 1923 zcmZ9MZ)h839LIl`na=SG8;r6gnBU*jgE-EHtz1 z#UNJ}Z3SO6JhJ&hF_bw(I=NVe$qOaIh88MRtsr6{OrZ!uGN#N)rG9^D0yR&#e4g)f z&-Z)3d+xc*J@d}zNbXgtHI0piC)@cUJnw9#gk62L`hBhsx1CiM>;~W%?AVF z%|wVc6MeKNu7&9H5LsgB>+SXN)+WiZu`Q-OpC#R%S@j{6%7Nm=a^T?u*oNKvw7#@U z;`$Q7Zkw51P_@lfrf!=zm|@$z$&A|O$IO^*e!`rz%{6A+Ha}%r>~|zWE9?g|><4Z3 zI}*uIfdeqG#Q|WE1Hdu|fRsgm3k75m0P0x;fZ;3xz-ShJFqVZMoXo-x#4giZB0G2rbq#OboP(Tg=pq@hj7|tO8jOO46V>$T2$sGJ( zJO@8$vA+ogupi8@AGFz@M5n+3;1&mfMGgSV8~{=t0nI2Nj{s25BLEEN5dcQ>@Pn~D z{NQ9BelVVgAGFxtf&$nNX4ntf@b_`2zyaVE2Y^Km0LvWE^16lX)uTlQm$t8l?mDMZ zZrZin*u5@nn-6bmlC*F3ELL{ca1XYt>rNWGEG<&n3nA#?0qi<=Dy=oXogQj@6gB%= z$u!cWog0WwQKLh1rP5+g^9TMnccU`5ZjMik9s9?zv@J&4RvCY^{pd`E9*H~cqs`5o zWL?7X3y$>Sd+WB>az=GOS*K5(dQ$H{edeVB{djmVG8m?l19*eyFC?ZGND83qQsw!D zp~!_uJUlVSe>3=_2C3;tdeJMcc63GXx?>TDocM9$T&9y;3>xjKX}p?fV_o9=juTR+ zXze_k)FrYC71Cp1rP_WJDpjkk&EX5a z3(;A<8@`{9>U6G7Z$Q^pgXizm>2jS`B$QOd3XI~j>A?!L@#nMJR1{#cK3GM3T<`CP zp~tYC7KU3%p%c)x@*xmr7zFg#liA9=HaL zw$FbYunPSf9R%|EHRwOxWc7fo?;XsyVwms28h!yij`Ot(dozG)C& z2D=r0;6A^&7c>+uO!tc>!%+CCjCP3#x_rOb^Di5HGP>juZyKij3VM%=TZVab8NC&} gIZdcAAa;xn*%u@_F8(!4-?JvsB*tqEH(zQ00o9uKcmMzZ delta 1311 zcmZ9LOK1~O6o$_vGkMguPCD92jcwCpGHC@vZHui^BvNaAp>-iB6cOX1nxes=h)6T_ zfg)X4E@;7mAmU2II!bk=3q==Q2r4QB6c@S>tX1&=f%u<1u;dOmXTE!$_sq%o)(0Ce z1s%IvCPJYmaue}ZAV?*WSn7pskv_fu#^NnXZ91anlIne$BdXC9m69;mBSe~VZdx}X zU9N|nuA6azD6r|%AvLAHbB^LM?4CKY=m~w^3EU&Z>i81M;E>T94qsd<(4zuag{ZTkO-XvylC-o$}v5BJ1B&{S!%smXY-Ydtz)-R7X7ai-r@xi|}dNiMo9!w{WlN1oi#)gU0esQg_u>+sZf{L7Yc(FH2C&?_R zJy2$}8mlWHzBaCt{9Y!%tf5brJ}j)s}+2^f**m)muCBy75u4!bKorQxe0?_hkGt63_Kh0_|IqZ z4SY~+79ZqIa2L8};n+tm>H?R$sA5JpfIZE{uCo2ElAc5OpdSW5cak*)=7cA~BVcYI z^EenQ&p#?<=JS&{_&~*gY;YZ%#XrS>WPV$8um<5!hu+!XIP~QN&%pOepIJr0FTs2P zm?MFU-h&UwWMvEf4*CQ}EX&V)013X}z(9pdN(SG-`X2aq zmHa*6w+k**E4(d9d$i@2B)Y>>;#IiUyyJpxX>sFtMX_tK3xmuPp^Mc<$i= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32)) + | (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32)))) begin + assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr); + end end endcase end diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index a282afe1..b14a7e71 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -7,49 +7,29 @@ module VX_csr_unit #( input wire reset, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, + VX_perf_memsys_if perf_memsys_if, VX_perf_pipeline_if perf_pipeline_if, `endif VX_cmt_to_csr_if cmt_to_csr_if, - VX_fpu_to_csr_if fpu_to_csr_if, - - VX_csr_io_req_if csr_io_req_if, - VX_csr_io_rsp_if csr_io_rsp_if, + VX_fpu_to_csr_if fpu_to_csr_if, VX_csr_req_if csr_req_if, VX_commit_if csr_commit_if, input wire busy, + input wire[`NUM_WARPS-1:0] fpu_pending, output wire[`NUM_WARPS-1:0] pending -); - VX_csr_pipe_req_if csr_pipe_req_if(); - VX_commit_if csr_pipe_rsp_if(); - - wire select_io_rsp; - - VX_csr_io_arb csr_io_arb ( - .clk (clk), - .reset (reset), - - .select_io_rsp (select_io_rsp), - - .csr_core_req_if (csr_req_if), - .csr_io_req_if (csr_io_req_if), - .csr_pipe_req_if (csr_pipe_req_if), - - .csr_pipe_rsp_if (csr_pipe_rsp_if), - .csr_io_rsp_if (csr_io_rsp_if), - .csr_commit_if (csr_commit_if) - ); - +); wire csr_we_s1; wire [`CSR_ADDR_BITS-1:0] csr_addr_s1; wire [31:0] csr_read_data, csr_read_data_s1; wire [31:0] csr_updated_data_s1; - wire write_enable = csr_pipe_rsp_if.valid && csr_we_s1; + wire write_enable = csr_commit_if.valid && csr_we_s1; + + wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data; VX_csr_data #( .CORE_ID(CORE_ID) @@ -62,20 +42,20 @@ module VX_csr_unit #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), - .read_enable (csr_pipe_req_if.valid), - .read_addr (csr_pipe_req_if.addr), - .read_wid (csr_pipe_req_if.wid), + .read_enable (csr_req_if.valid), + .read_addr (csr_req_if.addr), + .read_wid (csr_req_if.wid), .read_data (csr_read_data), .write_enable (write_enable), .write_addr (csr_addr_s1), - .write_wid (csr_pipe_rsp_if.wid), + .write_wid (csr_commit_if.wid), .write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]), .busy (busy) ); - wire write_hazard = (csr_addr_s1 == csr_pipe_req_if.addr) - && (csr_pipe_rsp_if.wid == csr_pipe_req_if.wid) - && csr_pipe_rsp_if.valid; + wire write_hazard = (csr_addr_s1 == csr_req_if.addr) + && (csr_commit_if.wid == csr_req_if.wid) + && csr_commit_if.valid; wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data; @@ -83,53 +63,55 @@ module VX_csr_unit #( reg csr_we_s0_unqual; - always @(*) begin - csr_we_s0_unqual = 0; - case (csr_pipe_req_if.op_type) + always @(*) begin + case (csr_req_if.op_type) `CSR_RW: begin - csr_updated_data = csr_pipe_req_if.data; + csr_updated_data = csr_req_data; csr_we_s0_unqual = 1; end `CSR_RS: begin - csr_updated_data = csr_read_data_qual | csr_pipe_req_if.data; - csr_we_s0_unqual = (csr_pipe_req_if.data != 0); + csr_updated_data = csr_read_data_qual | csr_req_data; + csr_we_s0_unqual = (csr_req_data != 0); end `CSR_RC: begin - csr_updated_data = csr_read_data_qual & ~csr_pipe_req_if.data; - csr_we_s0_unqual = (csr_pipe_req_if.data != 0); + csr_updated_data = csr_read_data_qual & ~csr_req_data; + csr_we_s0_unqual = (csr_req_data != 0); + end + default: begin + csr_updated_data = 'x; + csr_we_s0_unqual = 0; end - default: csr_updated_data = 'x; endcase end - wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid]; + wire stall_in = fpu_pending[csr_req_if.wid]; - wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in; + wire csr_req_valid = csr_req_if.valid && !stall_in; - wire stall_out = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid; + wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (!stall_out), - .data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}), - .data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1}) + .data_in ({csr_req_valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}), + .data_out ({csr_commit_if.valid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1}) ); for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign csr_pipe_rsp_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i : + assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i : (csr_addr_s1 == `CSR_LTID || csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) : csr_read_data_s1; end - assign csr_pipe_rsp_if.eop = 1'b1; + assign csr_commit_if.eop = 1'b1; // can accept new request? - assign csr_pipe_req_if.ready = ~(stall_out || stall_in); + assign csr_req_if.ready = ~(stall_out || stall_in); // pending request reg [`NUM_WARPS-1:0] pending_r; @@ -137,11 +119,11 @@ module VX_csr_unit #( if (reset) begin pending_r <= 0; end else begin - if (csr_pipe_rsp_if.valid && csr_pipe_rsp_if.ready) begin - pending_r[csr_pipe_rsp_if.wid] <= 0; + if (csr_commit_if.valid && csr_commit_if.ready) begin + pending_r[csr_commit_if.wid] <= 0; end - if (csr_pipe_req_if.valid && csr_pipe_req_if.ready) begin - pending_r[csr_pipe_req_if.wid] <= 1; + if (csr_req_if.valid && csr_req_if.ready) begin + pending_r[csr_req_if.wid] <= 1; end end end diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 6b65c227..f195d75e 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -6,11 +6,7 @@ module VX_execute #( `SCOPE_IO_VX_execute input wire clk, - input wire reset, - - // CSR io interface - VX_csr_io_req_if csr_io_req_if, - VX_csr_io_rsp_if csr_io_rsp_if, + input wire reset, // Dcache interface VX_dcache_core_req_if dcache_req_if, @@ -81,8 +77,6 @@ module VX_execute #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), - .csr_io_req_if (csr_io_req_if), - .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), .csr_commit_if (csr_commit_if), .fpu_pending (fpu_pending), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 720e9ffc..be31cb29 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -34,19 +34,7 @@ module VX_pipeline #( input wire icache_rsp_valid, input wire [31:0] icache_rsp_data, input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag, - output wire icache_rsp_ready, - - // CSR I/O Request - input wire csr_req_valid, - input wire[11:0] csr_req_addr, - input wire csr_req_rw, - input wire[31:0] csr_req_data, - output wire csr_req_ready, - - // CSR I/O Response - output wire csr_rsp_valid, - output wire[31:0] csr_rsp_data, - input wire csr_rsp_ready, + output wire icache_rsp_ready, `ifdef PERF_ENABLE VX_perf_memsys_if perf_memsys_if, @@ -116,26 +104,6 @@ module VX_pipeline #( assign icache_core_rsp_if.tag = icache_rsp_tag; assign icache_rsp_ready = icache_core_rsp_if.ready; - // - // CSR IO request - // - - VX_csr_io_req_if csr_io_req_if(); - assign csr_io_req_if.valid = csr_req_valid; - assign csr_io_req_if.rw = csr_req_rw; - assign csr_io_req_if.addr = csr_req_addr; - assign csr_io_req_if.data = csr_req_data; - assign csr_req_ready = csr_io_req_if.ready; - - // - // CSR IO response - // - - VX_csr_io_rsp_if csr_io_rsp_if(); - assign csr_rsp_valid = csr_io_rsp_if.valid; - assign csr_rsp_data = csr_io_rsp_if.data; - assign csr_io_rsp_if.ready = csr_rsp_ready; - /////////////////////////////////////////////////////////////////////////// VX_cmt_to_csr_if cmt_to_csr_if(); @@ -226,9 +194,6 @@ module VX_pipeline #( .dcache_req_if (dcache_core_req_if), .dcache_rsp_if (dcache_core_rsp_if), - - .csr_io_req_if (csr_io_req_if), - .csr_io_rsp_if (csr_io_rsp_if), .cmt_to_csr_if (cmt_to_csr_if), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index c59ba86d..bb56e9b9 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -22,19 +22,6 @@ module Vortex ( input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, - // CSR Request - input wire csr_req_valid, - input wire [`VX_CSR_ID_WIDTH-1:0] csr_req_coreid, - input wire [11:0] csr_req_addr, - input wire csr_req_rw, - input wire [31:0] csr_req_data, - output wire csr_req_ready, - - // CSR Response - output wire csr_rsp_valid, - output wire [31:0] csr_rsp_data, - input wire csr_rsp_ready, - // Status output wire busy ); @@ -53,21 +40,8 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_valid; - wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_rw; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_req_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_ready; - - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_valid; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_rsp_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_ready; - wire [`NUM_CLUSTERS-1:0] per_cluster_busy; - wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_req_coreid >> `CLOG2(`NUM_CORES)); - wire [`NC_BITS-1:0] csr_core_id = `NC_BITS'(csr_req_coreid); - for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin wire cluster_reset; @@ -100,58 +74,10 @@ module Vortex ( .mem_rsp_tag (per_cluster_mem_rsp_tag [i]), .mem_rsp_ready (per_cluster_mem_rsp_ready [i]), - .csr_req_valid (per_cluster_csr_req_valid [i]), - .csr_req_coreid (csr_core_id), - .csr_req_rw (per_cluster_csr_req_rw [i]), - .csr_req_addr (per_cluster_csr_req_addr [i]), - .csr_req_data (per_cluster_csr_req_data [i]), - .csr_req_ready (per_cluster_csr_req_ready [i]), - - .csr_rsp_valid (per_cluster_csr_rsp_valid [i]), - .csr_rsp_data (per_cluster_csr_rsp_data [i]), - .csr_rsp_ready (per_cluster_csr_rsp_ready [i]), - .busy (per_cluster_busy [i]) ); end - VX_csr_arb #( - .NUM_REQS (`NUM_CLUSTERS), - .DATA_WIDTH (32), - .ADDR_WIDTH (12), - .BUFFERED_REQ (1), - .BUFFERED_RSP (1) - ) csr_arb ( - .clk (clk), - .reset (reset), - - .request_id (csr_cluster_id), - - // input requests - .req_valid_in (csr_req_valid), - .req_addr_in (csr_req_addr), - .req_rw_in (csr_req_rw), - .req_data_in (csr_req_data), - .req_ready_in (csr_req_ready), - - // output request - .req_valid_out (per_cluster_csr_req_valid), - .req_addr_out (per_cluster_csr_req_addr), - .req_rw_out (per_cluster_csr_req_rw), - .req_data_out (per_cluster_csr_req_data), - .req_ready_out (per_cluster_csr_req_ready), - - // input responses - .rsp_valid_in (per_cluster_csr_rsp_valid), - .rsp_data_in (per_cluster_csr_rsp_data), - .rsp_ready_in (per_cluster_csr_rsp_ready), - - // output response - .rsp_valid_out (csr_rsp_valid), - .rsp_data_out (csr_rsp_data), - .rsp_ready_out (csr_rsp_ready) - ); - assign busy = (| per_cluster_busy); if (`L3_ENABLE) begin diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 55ef6712..4184aa9c 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -63,8 +63,6 @@ localparam AFU_ID_H = 16'h0004; // AFU ID Higher localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; localparam CMD_RUN = `AFU_IMAGE_CMD_RUN; -localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ; -localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE; localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE; localparam MMIO_IO_ADDR = `AFU_IMAGE_MMIO_IO_ADDR; @@ -75,10 +73,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS; localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ; localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE; -localparam MMIO_CSR_CORE = `AFU_IMAGE_MMIO_CSR_CORE; -localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; -localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; -localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; +localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS; localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW; @@ -88,9 +83,7 @@ localparam STATE_READ = 1; localparam STATE_WRITE = 2; localparam STATE_START = 3; localparam STATE_RUN = 4; -localparam STATE_CSR_READ = 5; -localparam STATE_CSR_WRITE = 6; -localparam STATE_MAX_VALUE = 7; +localparam STATE_MAX_VALUE = 5; localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); `ifdef SCOPE @@ -99,6 +92,8 @@ localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); wire [127:0] afu_id = `AFU_ACCEL_UUID; +wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES), 16'(`IMPLEMENTATION_ID)}; + reg [STATE_WIDTH-1:0] state; // Vortex ports /////////////////////////////////////////////////////////////// @@ -116,18 +111,7 @@ wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_rsp_data; wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag; wire vx_mem_rsp_ready; -wire vx_csr_io_req_valid; -wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; -wire [11:0] vx_csr_io_req_addr; -wire vx_csr_io_req_rw; -wire [31:0] vx_csr_io_req_data; -wire vx_csr_io_req_ready; - -wire vx_csr_io_rsp_valid; -wire [31:0] vx_csr_io_rsp_data; -wire vx_csr_io_rsp_ready; - -wire vx_busy; +wire vx_busy; reg vx_reset; reg vx_mem_en; @@ -145,11 +129,6 @@ wire cmd_scope_read; wire cmd_scope_write; `endif -reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -reg [11:0] cmd_csr_addr; -reg [31:0] cmd_csr_rdata; -reg [31:0] cmd_csr_wdata; - // MMIO controller //////////////////////////////////////////////////////////// `IGNORE_WARNINGS_BEGIN @@ -246,27 +225,9 @@ always @(posedge clk) begin `endif end `endif - MMIO_CSR_CORE: begin - cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_ADDR: begin - cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_DATA: begin - cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); - `endif - end default: begin `ifdef DBG_PRINT_OPAE - $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); `endif end endcase @@ -298,12 +259,6 @@ always @(posedge clk) begin end `endif end - MMIO_CSR_READ: begin - mmio_tx.data <= 64'(cmd_csr_rdata); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); - `endif - end `ifdef SCOPE MMIO_SCOPE_READ: begin mmio_tx.data <= cmd_scope_rdata; @@ -312,6 +267,12 @@ always @(posedge clk) begin `endif end `endif + MMIO_DEV_CAPS: begin + mmio_tx.data <= dev_caps; + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_DEV_CAPS: addr=%0h, data=%0h", $time, mmio_hdr.address, dev_caps); + `endif + end default: begin mmio_tx.data <= 64'h0; `ifdef DBG_PRINT_OPAE @@ -326,7 +287,6 @@ end wire cmd_read_done; wire cmd_write_done; -wire cmd_csr_done; wire cmd_run_done; reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr; @@ -366,18 +326,6 @@ always @(posedge clk) begin vx_reset <= 1; state <= STATE_START; end - CMD_CSR_READ: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr); - `endif - state <= STATE_CSR_READ; - end - CMD_CSR_WRITE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE CSR_WRITE: addr=%0h data=%0d", $time, cmd_csr_addr, cmd_csr_wdata); - `endif - state <= STATE_CSR_WRITE; - end default: begin state <= state; end @@ -421,24 +369,6 @@ always @(posedge clk) begin end end - STATE_CSR_READ: begin - if (cmd_csr_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - STATE_CSR_WRITE: begin - if (cmd_csr_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - default: begin state <= state; end @@ -926,40 +856,6 @@ assign cci_mem_req_valid = cci_mem_req_rw ? cci_mem_wr_req_valid : cci_mem_rd_re assign cci_mem_req_addr = cci_mem_req_rw ? cci_mem_wr_req_addr : cci_mem_rd_req_addr; assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr; -// CSRs /////////////////////////////////////////////////////////////////////// - -reg csr_io_req_sent; - -assign vx_csr_io_req_valid = !csr_io_req_sent - && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); -assign vx_csr_io_req_coreid = cmd_csr_core; -assign vx_csr_io_req_rw = (STATE_CSR_WRITE == state); -assign vx_csr_io_req_addr = cmd_csr_addr; -assign vx_csr_io_req_data = cmd_csr_wdata; - -assign vx_csr_io_rsp_ready = 1; - -assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; - -always @(posedge clk) begin - if (reset) begin - csr_io_req_sent <= 0; - end else begin - if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin - csr_io_req_sent <= 1; - end - if (cmd_csr_done) begin - csr_io_req_sent <= 0; - end - end - - if ((STATE_CSR_READ == state) - && vx_csr_io_rsp_ready - && vx_csr_io_rsp_valid) begin - cmd_csr_rdata <= vx_csr_io_rsp_data; - end -end - // Vortex ///////////////////////////////////////////////////////////////////// assign cmd_run_done = !vx_busy; @@ -984,19 +880,6 @@ Vortex #() vortex ( .mem_rsp_data (vx_mem_rsp_data), .mem_rsp_tag (vx_mem_rsp_tag), .mem_rsp_ready (vx_mem_rsp_ready), - - // CSR Request - .csr_req_valid (vx_csr_io_req_valid), - .csr_req_coreid (vx_csr_io_req_coreid), - .csr_req_addr (vx_csr_io_req_addr), - .csr_req_rw (vx_csr_io_req_rw), - .csr_req_data (vx_csr_io_req_data), - .csr_req_ready (vx_csr_io_req_ready), - - // CSR Response - .csr_rsp_valid (vx_csr_io_rsp_valid), - .csr_rsp_data (vx_csr_io_rsp_data), - .csr_rsp_ready (vx_csr_io_rsp_ready), // status .busy (vx_busy) diff --git a/hw/rtl/afu/vortex_afu.vh b/hw/rtl/afu/vortex_afu.vh index c92a3a32..386710e5 100644 --- a/hw/rtl/afu/vortex_afu.vh +++ b/hw/rtl/afu/vortex_afu.vh @@ -26,21 +26,16 @@ `define AFU_ACCEL_NAME "vortex_afu" `define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c -`define AFU_IMAGE_CMD_CSR_READ 4 -`define AFU_IMAGE_CMD_CSR_WRITE 5 `define AFU_IMAGE_CMD_MEM_READ 1 `define AFU_IMAGE_CMD_MEM_WRITE 2 `define AFU_IMAGE_CMD_RUN 3 `define AFU_IMAGE_MMIO_CMD_TYPE 10 -`define AFU_IMAGE_MMIO_CSR_CORE 24 -`define AFU_IMAGE_MMIO_CSR_ADDR 26 -`define AFU_IMAGE_MMIO_CSR_DATA 28 -`define AFU_IMAGE_MMIO_CSR_READ 30 `define AFU_IMAGE_MMIO_DATA_SIZE 16 `define AFU_IMAGE_MMIO_IO_ADDR 12 `define AFU_IMAGE_MMIO_MEM_ADDR 14 `define AFU_IMAGE_MMIO_SCOPE_READ 20 `define AFU_IMAGE_MMIO_SCOPE_WRITE 22 +`define AFU_IMAGE_MMIO_DEV_CAPS 24 `define AFU_IMAGE_MMIO_STATUS 18 `define AFU_IMAGE_POWER 0 diff --git a/hw/rtl/interfaces/VX_csr_io_req_if.v b/hw/rtl/interfaces/VX_csr_io_req_if.v deleted file mode 100644 index 225fcd1f..00000000 --- a/hw/rtl/interfaces/VX_csr_io_req_if.v +++ /dev/null @@ -1,16 +0,0 @@ -`ifndef VX_CSR_IO_REQ_IF -`define VX_CSR_IO_REQ_IF - -`include "VX_define.vh" - -interface VX_csr_io_req_if (); - - wire valid; - wire [`CSR_ADDR_BITS-1:0] addr; - wire rw; - wire [31:0] data; - wire ready; - -endinterface - -`endif diff --git a/hw/rtl/interfaces/VX_csr_io_rsp_if.v b/hw/rtl/interfaces/VX_csr_io_rsp_if.v deleted file mode 100644 index 333894e3..00000000 --- a/hw/rtl/interfaces/VX_csr_io_rsp_if.v +++ /dev/null @@ -1,14 +0,0 @@ -`ifndef VX_CSR_IO_RSP_IF -`define VX_CSR_IO_RSP_IF - -`include "VX_define.vh" - -interface VX_csr_io_rsp_if (); - - wire valid; - wire [31:0] data; - wire ready; - -endinterface - -`endif diff --git a/hw/rtl/interfaces/VX_csr_pipe_req_if.v b/hw/rtl/interfaces/VX_csr_pipe_req_if.v deleted file mode 100644 index e87cd3cd..00000000 --- a/hw/rtl/interfaces/VX_csr_pipe_req_if.v +++ /dev/null @@ -1,22 +0,0 @@ -`ifndef VX_CSR_PIPE_REQ_IF -`define VX_CSR_PIPE_REQ_IF - -`include "VX_define.vh" - -interface VX_csr_pipe_req_if (); - - wire valid; - wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] tmask; - wire [31:0] PC; - wire [`CSR_BITS-1:0] op_type; - wire [`CSR_ADDR_BITS-1:0] addr; - wire [31:0] data; - wire [`NR_BITS-1:0] rd; - wire wb; - wire is_io; - wire ready; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 7dc57c43..8f85ce9e 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -77,15 +77,9 @@ void Simulator::reset() { mem_rsp_vec_.clear(); mem_rsp_active_ = false; - csr_req_active_ = false; - csr_rsp_value_ = nullptr; vortex_->mem_rsp_valid = 0; vortex_->mem_req_ready = 0; - //vortex_->io_req_ready = 0; - //vortex_->io_rsp_valid = 0; - vortex_->csr_req_valid = 0; - vortex_->csr_rsp_ready = 0; vortex_->reset = 1; @@ -108,14 +102,11 @@ void Simulator::step() { this->eval(); mem_rsp_ready_ = vortex_->mem_rsp_ready; - csr_req_ready_ = vortex_->csr_req_ready; vortex_->clk = 1; this->eval(); this->eval_mem_bus(); - this->eval_io_bus(); - this->eval_csr_bus(); #ifndef NDEBUG fflush(stdout); @@ -209,53 +200,6 @@ void Simulator::eval_mem_bus() { vortex_->mem_req_ready = !mem_stalled; } -void Simulator::eval_io_bus() { - /*for (int i = 0; i < NUM_THREADS; ++i) { - if (((vortex_->io_req_valid >> i) & 0x1) - && ((VL_WDATA_GETW(vortex_->io_req_addr, i, NUM_THREADS, 30) << 2) == IO_BUS_ADDR_COUT)) { - assert(vortex_->io_req_rw); - int data = vortex_->io_req_data[i]; - int tid = data >> 16; - char c = data & 0xff; - auto& ss_buf = print_bufs_[tid]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - vortex_->io_req_ready = 1; - vortex_->io_rsp_valid = 0;*/ -} - -void Simulator::eval_csr_bus() { - if (csr_req_active_) { - if (vortex_->csr_req_valid && csr_req_ready_) { - #ifndef NDEBUG - if (vortex_->csr_req_rw) - std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << ", value=" << vortex_->csr_req_data << std::endl; - else - std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << std::endl; - #endif - vortex_->csr_req_valid = 0; - if (vortex_->csr_req_rw) - csr_req_active_ = false; - } - if (vortex_->csr_rsp_valid && vortex_->csr_rsp_ready) { - *csr_rsp_value_ = vortex_->csr_rsp_data; - vortex_->csr_rsp_ready = 0; - csr_req_active_ = false; - #ifndef NDEBUG - std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_rsp_data << std::endl; - #endif - } - } else { - vortex_->csr_req_valid = 0; - vortex_->csr_rsp_ready = 0; - } -} - void Simulator::wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->step(); @@ -266,33 +210,6 @@ bool Simulator::is_busy() const { return vortex_->busy; } -bool Simulator::csr_req_active() const { - return csr_req_active_; -} - -void Simulator::set_csr(int core_id, int addr, unsigned value) { - vortex_->csr_req_valid = 1; - vortex_->csr_req_coreid = core_id; - vortex_->csr_req_addr = addr; - vortex_->csr_req_rw = 1; - vortex_->csr_req_data = value; - vortex_->csr_rsp_ready = 0; - - csr_req_active_ = true; -} - -void Simulator::get_csr(int core_id, int addr, unsigned *value) { - vortex_->csr_req_valid = 1; - vortex_->csr_req_coreid = core_id; - vortex_->csr_req_addr = addr; - vortex_->csr_req_rw = 0; - vortex_->csr_rsp_ready = 1; - - csr_rsp_value_ = value; - - csr_req_active_ = true; -} - void Simulator::run() { #ifndef NDEBUG std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index cc131a4b..5163270b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -30,14 +30,9 @@ public: bool is_busy() const; - bool csr_req_active() const; - void reset(); void step(); void wait(uint32_t cycles); - - void set_csr(int core_id, int addr, unsigned value); - void get_csr(int core_id, int addr, unsigned *value); void run(); @@ -61,16 +56,11 @@ private: void eval(); void eval_mem_bus(); - void eval_io_bus(); - void eval_csr_bus(); std::list mem_rsp_vec_; bool mem_rsp_active_; - bool mem_rsp_ready_; - bool csr_req_ready_; - bool csr_req_active_; - uint32_t* csr_rsp_value_; + bool mem_rsp_ready_; RAM *ram_; VVortex *vortex_; diff --git a/hw/syn/opae/vortex_afu.json b/hw/syn/opae/vortex_afu.json index dc163056..1d49bf51 100644 --- a/hw/syn/opae/vortex_afu.json +++ b/hw/syn/opae/vortex_afu.json @@ -8,8 +8,6 @@ "cmd-mem-read": 1, "cmd-mem-write": 2, "cmd-run": 3, - "cmd-csr-read": 4, - "cmd-csr-write": 5, "mmio-cmd-type": 10, "mmio-io-addr": 12, @@ -18,10 +16,7 @@ "mmio-status": 18, "mmio-scope-read": 20, "mmio-scope-write": 22, - "mmio-csr-core": 24, - "mmio-csr-addr": 26, - "mmio-csr-data": 28, - "mmio-csr-read": 30, + "mmio-dev-caps": 24, "afu-top-interface": { diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 9fa3df14..73da51ac 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -41,21 +41,29 @@ set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON -#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED -#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" -#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS -#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0 -#set_global_assignment -name FITTER_EFFORT "STANDARD FIT" -#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM -#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON -#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON -#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON -#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON -#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 -#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 -#set_global_assignment -name SEED 1 +set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" +set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS +set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0 +set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON + +set_global_assignment -name USE_HIGH_SPEED_ADDER ON +set_global_assignment -name MUX_RESTRUCTURE ON +set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON +set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED" +set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON +set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON + +set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +set_global_assignment -name SEED 1 switch $opts(family) { "Arria 10" { diff --git a/runtime/Makefile b/runtime/Makefile index ba9d1366..ab9d150a 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw PROJECT = libvortexrt -SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c +SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_perf.c OBJS := $(addsuffix .o, $(notdir $(SRCS))) diff --git a/runtime/include/vx_intrinsics.h b/runtime/include/vx_intrinsics.h index 962a28fd..402e167f 100644 --- a/runtime/include/vx_intrinsics.h +++ b/runtime/include/vx_intrinsics.h @@ -7,6 +7,51 @@ extern "C" { #endif +#ifdef __ASSEMBLY__ +#define __ASM_STR(x) x +#else +#define __ASM_STR(x) #x +#endif + +#define vx_csr_swap(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_read(csr) ({ \ + register unsigned __v; \ + __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) : "=r" (__v) :: "memory"); \ + __v; \ +}) + +#define vx_csr_write(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) + +#define vx_csr_read_set(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_set(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) + +#define vx_csr_read_clear(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \ + __v; \ +}) + +#define vx_csr_clear(csr, val) ({ \ + unsigned __v = (unsigned )(val); \ + __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \ +}) + // Set thread mask inline void vx_tmc(unsigned num_threads) { asm volatile (".insn s 0x6b, 0, x0, 0(%0)" :: "r"(num_threads)); @@ -95,20 +140,6 @@ inline int vx_num_cores() { return result; } -// Return the number of cycles -inline int vx_num_cycles() { - int result; - asm volatile ("csrr %0, %1" : "=r"(result) : "i"(CSR_CYCLE)); - return result; -} - -// Return the number of instructions -inline int vx_num_instrs() { - int result; - asm volatile ("csrr %0, %1" : "=r"(result) : "i"(CSR_INSTRET)); - return result; -} - #define __if(b) vx_split(b); \ if (b) diff --git a/runtime/src/vx_perf.c b/runtime/src/vx_perf.c new file mode 100644 index 00000000..175ffb3d --- /dev/null +++ b/runtime/src/vx_perf.c @@ -0,0 +1,27 @@ + +#include +#include +#include + +#define DUMP_CSR_4(d, s) \ + csr_mem[d + 0] = vx_csr_read(s + 0); \ + csr_mem[d + 1] = vx_csr_read(s + 1); \ + csr_mem[d + 2] = vx_csr_read(s + 2); \ + csr_mem[d + 3] = vx_csr_read(s + 3); + +#define DUMP_CSR_32(d, s) \ + DUMP_CSR_4(d + 0, s + 0) \ + DUMP_CSR_4(d + 4, s + 4) \ + DUMP_CSR_4(d + 8, s + 8) \ + DUMP_CSR_4(d + 12, s + 12) \ + DUMP_CSR_4(d + 16, s + 16) \ + DUMP_CSR_4(d + 20, s + 20) \ + DUMP_CSR_4(d + 24, s + 24) \ + DUMP_CSR_4(d + 28, s + 28) + +void vx_perf_dump() { + int core_id = vx_core_id(); + uint32_t* const csr_mem = (uint32_t*)(IO_ADDR_CSR + 64 * sizeof(uint32_t) * core_id); + DUMP_CSR_32(0, CSR_MPM_BASE) + DUMP_CSR_32(32, CSR_MPM_BASE_H) +} \ No newline at end of file diff --git a/runtime/src/vx_start.S b/runtime/src/vx_start.S index c3bf2800..8b953668 100644 --- a/runtime/src/vx_start.S +++ b/runtime/src/vx_start.S @@ -42,6 +42,9 @@ _start: .type _exit, @function .global _exit _exit: + # dump performance CSRs + call vx_perf_dump + # disable all threads in current warp li a0, 0 .insn s 0x6b, 0, x0, 0(a0) # tmc a0 diff --git a/simX/core.cpp b/simX/core.cpp index a1133196..6cd14c03 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -269,16 +269,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) { } else if (addr == CSR_NC) { // Number of cores return arch_.num_cores(); - } else if (addr == CSR_INSTRET) { + } else if (addr == CSR_MINSTRET) { // NumInsts return insts_; - } else if (addr == CSR_INSTRET_H) { + } else if (addr == CSR_MINSTRET_H) { // NumInsts return (Word)(insts_ >> 32); - } else if (addr == CSR_CYCLE) { + } else if (addr == CSR_MCYCLE) { // NumCycles return (Word)steps_; - } else if (addr == CSR_CYCLE_H) { + } else if (addr == CSR_MCYCLE_H) { // NumCycles return (Word)(steps_ >> 32); } else {