diff --git a/.travis.yml b/.travis.yml index 60fdf7d3..a753da01 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,14 +17,10 @@ install: - ci/toolchain_install.sh -all - export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain - export VERILATOR_ROOT=/opt/verilator - - export PATH=$VERILATOR_ROOT/bin:$PATH - - # VORTEX - - git clone --recursive https://github.com/vortexgpgpu/vortex.git - - cd vortex - - make -j`nproc` + - export PATH=$VERILATOR_ROOT/bin:$PATH script: + - make -j`nproc` - ci/test_runtime.sh - ci/test_driver.sh - ci/test_riscv_isa.sh diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index d0f17edc..a177b9db 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -23,8 +23,8 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -#DEBUG=1 -#SCOPE=1 +DEBUG=1 +SCOPE=1 CFLAGS += -fPIC diff --git a/driver/tests/Makefile b/driver/tests/Makefile index 989b3690..4bbd1a6e 100644 --- a/driver/tests/Makefile +++ b/driver/tests/Makefile @@ -2,14 +2,17 @@ all: $(MAKE) -C basic $(MAKE) -C demo $(MAKE) -C dogfood + $(MAKE) -C graphics run: $(MAKE) -C basic run-rtlsim $(MAKE) -C demo run-rtlsim $(MAKE) -C dogfood run-rtlsim + $(MAKE) -C graphics run-rtlsim clean: $(MAKE) -C basic clean $(MAKE) -C demo clean $(MAKE) -C dogfood clean + $(MAKE) -C graphics clean diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 3b60b636..88557ae1 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "common.h" #define RT_CHECK(_expr) \ @@ -68,6 +69,9 @@ uint64_t shuffle(int i, uint64_t value) { int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { int errors = 0; + + auto time_start = std::chrono::high_resolution_clock::now(); + int num_blocks_8 = (64 * num_blocks) / 8; // update source buffer @@ -85,7 +89,9 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; + auto t0 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0)); + auto t1 = std::chrono::high_resolution_clock::now(); // clear destination buffer for (int i = 0; i < num_blocks_8; ++i) { @@ -94,7 +100,9 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; + auto t2 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0)); + auto t3 = std::chrono::high_resolution_clock::now(); // verify result std::cout << "verify result" << std::endl; @@ -114,6 +122,16 @@ int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) { return 1; } + auto time_end = std::chrono::high_resolution_clock::now(); + + double elapsed; + elapsed = std::chrono::duration_cast(t1 - t0).count(); + printf("upload time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(t3 - t2).count(); + printf("download time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(time_end - time_start).count(); + printf("Total elapsed time: %lg ms\n", elapsed); + return 0; } @@ -121,6 +139,8 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t num_points) { int errors = 0; + + auto time_start = std::chrono::high_resolution_clock::now(); // update source buffer { @@ -130,7 +150,9 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, } } std::cout << "upload source buffer" << std::endl; + auto t0 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0)); + auto t1 = std::chrono::high_resolution_clock::now(); // clear destination buffer { @@ -143,21 +165,25 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); // start device - std::cout << "start device" << std::endl; + std::cout << "start execution" << std::endl; + auto t2 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_start(device)); - - // wait for completion - std::cout << "wait for completion" << std::endl; RT_CHECK(vx_ready_wait(device, -1)); + auto t3 = std::chrono::high_resolution_clock::now(); // flush the caches std::cout << "flush the caches" << std::endl; + auto t4 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); + auto t5 = std::chrono::high_resolution_clock::now(); // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; + auto t6 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); + auto t7 = std::chrono::high_resolution_clock::now(); + // verify result std::cout << "verify result" << std::endl; for (uint32_t i = 0; i < num_points; ++i) { @@ -176,6 +202,20 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, return 1; } + auto time_end = std::chrono::high_resolution_clock::now(); + + double elapsed; + elapsed = std::chrono::duration_cast(t1 - t0).count(); + printf("upload time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(t3 - t2).count(); + printf("execute time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(t5 - t4).count(); + printf("flush time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(t7 - t6).count(); + printf("download time: %lg ms\n", elapsed); + elapsed = std::chrono::duration_cast(time_end - time_start).count(); + printf("Total elapsed time: %lg ms\n", elapsed); + return 0; } @@ -196,7 +236,7 @@ int main(int argc, char *argv[]) { unsigned max_cores; RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores)); - uint32_t num_points = max_cores * count; + uint32_t num_points = 1 * count; uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64; uint32_t buf_size = num_blocks * 64; @@ -222,9 +262,7 @@ int main(int argc, char *argv[]) { // run tests if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; - RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1)); - if (num_blocks >= 4) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, num_blocks/2)); - if (num_blocks >= 2) RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); + RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks)); } if (1 == test || -1 == test) { @@ -251,4 +289,4 @@ int main(int argc, char *argv[]) { std::cout << "Test PASSED" << std::endl; return 0; -} +} \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 41110da1..3591b8c2 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -2,7 +2,6 @@ `include "platform_if.vh" import local_mem_cfg_pkg::*; `include "afu_json_info.vh" -`include "VX_define.vh" `else `include "vortex_afu.vh" /* verilator lint_off IMPORTSTAR */ @@ -992,7 +991,7 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_BIND_top_vortex + `SCOPE_BIND_afu_vortex .clk (clk), .reset (reset | vx_reset), @@ -1068,36 +1067,19 @@ end `ifdef SCOPE -`SCOPE_ASSIGN (scope_reset, vx_reset); - -`SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid); -`SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); -`SCOPE_ASSIGN (scope_dram_req_rw, vx_dram_req_rw); -`SCOPE_ASSIGN (scope_dram_req_byteen,vx_dram_req_byteen); -`SCOPE_ASSIGN (scope_dram_req_data, vx_dram_req_data); -`SCOPE_ASSIGN (scope_dram_req_tag, vx_dram_req_tag); -`SCOPE_ASSIGN (scope_dram_req_ready, vx_dram_req_ready); - -`SCOPE_ASSIGN (scope_dram_rsp_valid, vx_dram_rsp_valid); -`SCOPE_ASSIGN (scope_dram_rsp_data, vx_dram_rsp_data); -`SCOPE_ASSIGN (scope_dram_rsp_tag, vx_dram_rsp_tag); -`SCOPE_ASSIGN (scope_dram_rsp_ready, vx_dram_rsp_ready); - -`SCOPE_ASSIGN (scope_snp_req_valid, vx_snp_req_valid); -`SCOPE_ASSIGN (scope_snp_req_addr, {vx_snp_req_addr, 4'b0}); -`SCOPE_ASSIGN (scope_snp_req_invalidate, vx_snp_req_invalidate); -`SCOPE_ASSIGN (scope_snp_req_tag, vx_snp_req_tag); -`SCOPE_ASSIGN (scope_snp_req_ready, vx_snp_req_ready); - -`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid); -`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag); -`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready); - -`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid); -`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag); -`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready); - -`SCOPE_ASSIGN (scope_busy, vx_busy); +`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid); +`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid); +`SCOPE_ASSIGN (mmio_hdr_address, mmio_hdr.address); +`SCOPE_ASSIGN (mmio_hdr_length, mmio_hdr.length); +`SCOPE_ASSIGN (ccip_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (ccip_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid); +`SCOPE_ASSIGN (ccip_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid); +`SCOPE_ASSIGN (ccip_sTxPort_c0_fire, af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull); +`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address); +`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (ccip_sTxPort_c1_fire, af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull); +`SCOPE_ASSIGN (ccip_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address); +`SCOPE_ASSIGN (ccip_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid); wire scope_changed = `SCOPE_TRIGGER; diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 9f4ff5f7..6f3492c7 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -36,13 +36,27 @@ `endif `ifndef IO_BUS_BASE_ADDR -`define IO_BUS_BASE_ADDR 32'hFFFFFF00 +`define IO_BUS_BASE_ADDR 32'hFF000000 `endif `ifndef IO_BUS_ADDR_COUT `define IO_BUS_ADDR_COUT 32'hFFFFFFFC `endif +`ifndef FRAME_BUFFER_BASE_ADDR +`define FRAME_BUFFER_BASE_ADDR 32'hFF000000 +`endif + +`ifndef FRAME_BUFFER_WIDTH +`define FRAME_BUFFER_WIDTH 16'd1920 +`endif + +`ifndef FRAME_BUFFER_HEIGHT +`define FRAME_BUFFER_HEIGHT 16'd1080 +`endif + +`define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT) + `ifndef L2_ENABLE `define L2_ENABLE 0 `endif diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index ffad1717..e732e973 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -90,18 +90,18 @@ module VX_gpu_unit #( // can accept new request? assign gpu_req_if.ready = gpu_commit_if.ready; - `SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid); - `SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid); - `SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask); - `SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type); - `SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]); - `SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data); - `SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready); - `SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid); - `SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid); - `SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc); - `SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn); - `SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split); - `SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier); + `SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready); + `SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid); + `SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask); + `SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type); + `SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]); + `SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data); + + `SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid); + `SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid); + `SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc); + `SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn); + `SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split); + `SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 6bb52123..13776c55 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -60,16 +60,14 @@ module VX_icache_stage #( // Can accept new response? assign icache_rsp_if.ready = ifetch_rsp_if.ready; - `SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid); - `SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid); - `SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0}); - `SCOPE_ASSIGN (scope_icache_req_tag, req_tag); - `SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready); + `SCOPE_ASSIGN (icache_req_fire, icache_req_fire); + `SCOPE_ASSIGN (icache_req_wid, ifetch_req_if.wid); + `SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0}); + `SCOPE_ASSIGN (icache_req_tag, req_tag); - `SCOPE_ASSIGN (scope_icache_rsp_valid, icache_rsp_if.valid); - `SCOPE_ASSIGN (scope_icache_rsp_data, icache_rsp_if.data); - `SCOPE_ASSIGN (scope_icache_rsp_tag, rsp_tag); - `SCOPE_ASSIGN (scope_icache_rsp_ready, icache_rsp_if.ready); + `SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready); + `SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data); + `SCOPE_ASSIGN (icache_rsp_tag, rsp_tag); `ifdef DBG_PRINT_CORE_ICACHE always @(posedge clk) begin diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 46e0388f..91070f3e 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -100,38 +100,38 @@ module VX_issue #( .gpu_req_if (gpu_req_if) ); - `SCOPE_ASSIGN (scope_issue_valid, ibuf_deq_if.valid); - `SCOPE_ASSIGN (scope_issue_wid, ibuf_deq_if.wid); - `SCOPE_ASSIGN (scope_issue_tmask, ibuf_deq_if.tmask); - `SCOPE_ASSIGN (scope_issue_pc, ibuf_deq_if.PC); - `SCOPE_ASSIGN (scope_issue_ex_type, ibuf_deq_if.ex_type); - `SCOPE_ASSIGN (scope_issue_op_type, ibuf_deq_if.op_type); - `SCOPE_ASSIGN (scope_issue_op_mod, ibuf_deq_if.op_mod); - `SCOPE_ASSIGN (scope_issue_wb, ibuf_deq_if.wb); - `SCOPE_ASSIGN (scope_issue_rd, ibuf_deq_if.rd); - `SCOPE_ASSIGN (scope_issue_rs1, ibuf_deq_if.rs1); - `SCOPE_ASSIGN (scope_issue_rs2, ibuf_deq_if.rs2); - `SCOPE_ASSIGN (scope_issue_rs3, ibuf_deq_if.rs3); - `SCOPE_ASSIGN (scope_issue_imm, ibuf_deq_if.imm); - `SCOPE_ASSIGN (scope_issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC); - `SCOPE_ASSIGN (scope_issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm); - `SCOPE_ASSIGN (scope_issue_ready, ibuf_deq_if.ready); - `SCOPE_ASSIGN (scope_scoreboard_delay, scoreboard_delay); - `SCOPE_ASSIGN (scope_gpr_delay, ~gpr_req_if.ready); - `SCOPE_ASSIGN (scope_execute_delay, ~execute_if.ready); + `SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready); + `SCOPE_ASSIGN (issue_wid, ibuf_deq_if.wid); + `SCOPE_ASSIGN (issue_tmask, ibuf_deq_if.tmask); + `SCOPE_ASSIGN (issue_pc, ibuf_deq_if.PC); + `SCOPE_ASSIGN (issue_ex_type, ibuf_deq_if.ex_type); + `SCOPE_ASSIGN (issue_op_type, ibuf_deq_if.op_type); + `SCOPE_ASSIGN (issue_op_mod, ibuf_deq_if.op_mod); + `SCOPE_ASSIGN (issue_wb, ibuf_deq_if.wb); + `SCOPE_ASSIGN (issue_rd, ibuf_deq_if.rd); + `SCOPE_ASSIGN (issue_rs1, ibuf_deq_if.rs1); + `SCOPE_ASSIGN (issue_rs2, ibuf_deq_if.rs2); + `SCOPE_ASSIGN (issue_rs3, ibuf_deq_if.rs3); + `SCOPE_ASSIGN (issue_imm, ibuf_deq_if.imm); + `SCOPE_ASSIGN (issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC); + `SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm); + + `SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay); + `SCOPE_ASSIGN (gpr_delay, ~gpr_req_if.ready); + `SCOPE_ASSIGN (execute_delay, ~execute_if.ready); - `SCOPE_ASSIGN (scope_gpr_rsp_valid, gpr_rsp_if.valid); - `SCOPE_ASSIGN (scope_gpr_rsp_wid, gpr_rsp_if.wid); - `SCOPE_ASSIGN (scope_gpr_rsp_pc, gpr_rsp_if.PC); - `SCOPE_ASSIGN (scope_gpr_rsp_a, gpr_rsp_if.rs1_data); - `SCOPE_ASSIGN (scope_gpr_rsp_b, gpr_rsp_if.rs2_data); - `SCOPE_ASSIGN (scope_gpr_rsp_c, gpr_rsp_if.rs3_data); + `SCOPE_ASSIGN (gpr_rsp_valid, gpr_rsp_if.valid); + `SCOPE_ASSIGN (gpr_rsp_wid, gpr_rsp_if.wid); + `SCOPE_ASSIGN (gpr_rsp_pc, gpr_rsp_if.PC); + `SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data); + `SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data); + `SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data); - `SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid); - `SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid); - `SCOPE_ASSIGN (scope_writeback_pc, writeback_if.PC); - `SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd); - `SCOPE_ASSIGN (scope_writeback_data, writeback_if.data); + `SCOPE_ASSIGN (writeback_valid, writeback_if.valid); + `SCOPE_ASSIGN (writeback_wid, writeback_if.wid); + `SCOPE_ASSIGN (writeback_pc, writeback_if.PC); + `SCOPE_ASSIGN (writeback_rd, writeback_if.rd); + `SCOPE_ASSIGN (writeback_data, writeback_if.data); `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 52646138..f41cfc22 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -192,20 +192,18 @@ module VX_lsu_unit #( assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall); // scope registration - `SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid); - `SCOPE_ASSIGN (scope_dcache_req_addr, req_address); - `SCOPE_ASSIGN (scope_dcache_req_rw, req_rw); - `SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen); - `SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data); - `SCOPE_ASSIGN (scope_dcache_req_tag, req_tag); - `SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready); - `SCOPE_ASSIGN (scope_dcache_req_wid, req_wid); - `SCOPE_ASSIGN (scope_dcache_req_pc, req_pc); + `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}}); + `SCOPE_ASSIGN (dcache_req_wid, req_wid); + `SCOPE_ASSIGN (dcache_req_pc, req_pc); + `SCOPE_ASSIGN (dcache_req_addr, req_address); + `SCOPE_ASSIGN (dcache_req_rw, req_rw); + `SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen); + `SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data); + `SCOPE_ASSIGN (dcache_req_tag, req_tag); - `SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid); - `SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data); - `SCOPE_ASSIGN (scope_dcache_rsp_tag, rsp_tag); - `SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready); + `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}}); + `SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data); + `SCOPE_ASSIGN (dcache_rsp_tag, rsp_tag); `ifdef DBG_PRINT_CORE_DCACHE always @(posedge clk) begin diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index c4552d21..92fee56b 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -5,7 +5,7 @@ `include "scope-defs.vh" -`define SCOPE_ASSIGN(d,s) assign d = s +`define SCOPE_ASSIGN(d,s) assign scope_``d = s `else @@ -35,7 +35,7 @@ `define SCOPE_BIND_Vortex_cluster(__i__) -`define SCOPE_BIND_top_vortex +`define SCOPE_BIND_afu_vortex `define SCOPE_IO_VX_lsu_unit diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 2938c60b..dc620ab2 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -250,11 +250,11 @@ module VX_warp_sched #( assign busy = (active_warps != 0); - `SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp); - `SCOPE_ASSIGN (scope_wsched_active_warps, active_warps); - `SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table); - `SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready); - `SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule); - `SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc); + `SCOPE_ASSIGN (wsched_scheduled_warp, scheduled_warp); + `SCOPE_ASSIGN (wsched_active_warps, active_warps); + `SCOPE_ASSIGN (wsched_schedule_table, schedule_table); + `SCOPE_ASSIGN (wsched_schedule_ready, schedule_ready); + `SCOPE_ASSIGN (wsched_warp_to_schedule, warp_to_schedule); + `SCOPE_ASSIGN (wsched_warp_pc, warp_pc); endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 914a9232..4a458a01 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -456,6 +456,32 @@ module Vortex ( ); end + `SCOPE_ASSIGN (reset, reset); + + `SCOPE_ASSIGN (dram_req_fire, dram_req_valid && dram_req_ready); + `SCOPE_ASSIGN (dram_req_addr, {dram_req_addr, 4'b0}); + `SCOPE_ASSIGN (dram_req_rw, dram_req_rw); + `SCOPE_ASSIGN (dram_req_byteen,dram_req_byteen); + `SCOPE_ASSIGN (dram_req_data, dram_req_data); + `SCOPE_ASSIGN (dram_req_tag, dram_req_tag); + + `SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready); + `SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data); + `SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag); + + `SCOPE_ASSIGN (snp_req_fire, snp_req_valid && snp_req_ready); + `SCOPE_ASSIGN (snp_req_addr, {snp_req_addr, 4'b0}); + `SCOPE_ASSIGN (snp_req_invalidate, snp_req_invalidate); + `SCOPE_ASSIGN (snp_req_tag, snp_req_tag); + + `SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready); + `SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag); + + `SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready); + `SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag); + + `SCOPE_ASSIGN (busy, busy); + `ifdef DBG_PRINT_DRAM always @(posedge clk) begin if (dram_req_valid && dram_req_ready) begin diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index d3e31162..4ff264cb 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -737,18 +737,18 @@ module VX_bank #( end `endif -`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0); -`SCOPE_ASSIGN (scope_valid_st1, valid_st1); -`SCOPE_ASSIGN (scope_valid_st2, valid_st2); +`SCOPE_ASSIGN (valid_st0, qual_valid_st0); +`SCOPE_ASSIGN (valid_st1, valid_st1); +`SCOPE_ASSIGN (valid_st2, valid_st2); -`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1); -`SCOPE_ASSIGN (scope_miss_st1, miss_st1); -`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1); -`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1); -`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe); +`SCOPE_ASSIGN (is_mrvq_st1, is_mrvq_st1); +`SCOPE_ASSIGN (miss_st1, miss_st1); +`SCOPE_ASSIGN (dirty_st1, dirty_st1); +`SCOPE_ASSIGN (force_miss_st1, force_request_miss_st1); +`SCOPE_ASSIGN (stall_pipe, stall_bank_pipe); -`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); -`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); -`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); +`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); +`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); +`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); endmodule diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index da7cfe73..7c32180b 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -7,9 +7,9 @@ "../rtl/cache/VX_cache_config.vh" ], "modules": { - "top": { + "afu": { "submodules": { - "vortex": {"type":"Vortex"} + "vortex": {"type":"Vortex", "enabled":false} } }, "Vortex": { @@ -73,41 +73,50 @@ "VX_bank": {} }, "taps": { - "top": { + "afu": { + "?ccip_sRxPort_c0_mmioRdValid":1, + "?ccip_sRxPort_c0_mmioWrValid":1, + "mmio_hdr_address":16, + "mmio_hdr_length":2, + "ccip_sRxPort_c0_hdr_mdata":16, + "?ccip_sRxPort_c0_rspValid":1, + "?ccip_sRxPort_c1_rspValid":1, + "?ccip_sTxPort_c0_fire":1, + "ccip_sTxPort_c0_hdr_address":42, + "ccip_sTxPort_c0_hdr_mdata":16, + "?ccip_sTxPort_c1_fire":1, + "ccip_sTxPort_c1_hdr_address":42, + "ccip_sTxPort_c2_mmioRdValid":1 + }, + "afu/vortex": { "!reset": 1, - "?dram_req_valid": 1, + "?dram_req_fire": 1, "dram_req_addr": 32, "dram_req_rw": 1, "dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH", "dram_req_data":"`VX_DRAM_LINE_WIDTH", "dram_req_tag":"`VX_DRAM_TAG_WIDTH", - "?dram_req_ready": 1, - "?dram_rsp_valid": 1, + "?dram_rsp_fire": 1, "dram_rsp_data":"`VX_DRAM_LINE_WIDTH", "dram_rsp_tag":"`VX_DRAM_TAG_WIDTH", - "?dram_rsp_ready": 1, - "?snp_req_valid": 1, + "?snp_req_fire": 1, "snp_req_addr": 32, "snp_req_invalidate": 1, "snp_req_tag":"`VX_SNP_TAG_WIDTH", - "?snp_req_ready": 1, - "?snp_rsp_valid": 1, - "snp_rsp_tag":"`VX_SNP_TAG_WIDTH", - "?snp_rsp_ready": 1, + "?snp_rsp_fire": 1, + "snp_rsp_tag":"`VX_SNP_TAG_WIDTH", "busy": 1 }, - "top/vortex/cluster/core/pipeline/fetch/icache_stage": { - "?icache_req_valid": 1, + "afu/vortex/cluster/core/pipeline/fetch/icache_stage": { + "?icache_req_fire": 1, "icache_req_wid":"`NW_BITS", "icache_req_addr": 32, "icache_req_tag":"`ICORE_TAG_ID_BITS", - "?icache_req_ready": 1, - "?icache_rsp_valid": 1, + "?icache_rsp_fire": 1, "icache_rsp_data": 32, - "icache_rsp_tag":"`ICORE_TAG_ID_BITS", - "?icache_rsp_ready": 1 + "icache_rsp_tag":"`ICORE_TAG_ID_BITS" }, - "top/vortex/cluster/core/pipeline/fetch/warp_sched": { + "afu/vortex/cluster/core/pipeline/fetch/warp_sched": { "?wsched_scheduled_warp": 1, "wsched_active_warps": "`NUM_WARPS", "wsched_schedule_table": "`NUM_WARPS", @@ -115,14 +124,13 @@ "wsched_warp_to_schedule": "`NW_BITS", "wsched_warp_pc": "32" }, - "top/vortex/cluster/core/pipeline/execute/gpu_unit": { - "?gpu_req_valid": 1, + "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { + "?gpu_req_fire": 1, "gpu_req_wid": "`NW_BITS", "gpu_req_tmask": "`NUM_THREADS", "gpu_req_op_type": "`GPU_BITS", "gpu_req_rs1": "32", - "gpu_req_rs2": "32", - "?gpu_req_ready": 1, + "gpu_req_rs2": "32", "?gpu_rsp_valid": 1, "gpu_rsp_wid": "`NW_BITS", "gpu_rsp_tmc": "`GPU_TMC_SIZE", @@ -130,8 +138,8 @@ "gpu_rsp_split": "`GPU_SPLIT_SIZE", "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" }, - "top/vortex/cluster/core/pipeline/execute/lsu_unit": { - "?dcache_req_valid":"`NUM_THREADS", + "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { + "?dcache_req_fire":"`NUM_THREADS", "dcache_req_wid":"`NW_BITS", "dcache_req_pc": 32, "dcache_req_addr":"`NUM_THREADS * 32", @@ -139,14 +147,12 @@ "dcache_req_byteen":"`NUM_THREADS * 4", "dcache_req_data": "`NUM_THREADS * 32", "dcache_req_tag":"`DCORE_TAG_ID_BITS", - "?dcache_req_ready": 1, - "?dcache_rsp_valid":"`NUM_THREADS", + "?dcache_rsp_fire":"`NUM_THREADS", "dcache_rsp_data":"`NUM_THREADS * 32", - "dcache_rsp_tag":"`DCORE_TAG_ID_BITS", - "?dcache_rsp_ready": 1 + "dcache_rsp_tag":"`DCORE_TAG_ID_BITS" }, - "top/vortex/cluster/core/pipeline/issue": { - "?issue_valid": 1, + "afu/vortex/cluster/core/pipeline/issue": { + "?issue_fire": 1, "issue_wid":"`NW_BITS", "issue_tmask":"`NUM_THREADS", "issue_pc": 32, @@ -161,7 +167,6 @@ "issue_imm": 32, "issue_rs1_is_pc": 1, "issue_rs2_is_imm": 1, - "?issue_ready": 1, "?gpr_rsp_valid": 1, "gpr_rsp_wid":"`NW_BITS", "gpr_rsp_pc": 32, @@ -177,7 +182,7 @@ "!scoreboard_delay": 1, "!execute_delay": 1 }, - "top/vortex/l3cache/bank, top/vortex/cluster/l2cache/bank, top/vortex/cluster/core/mem_unit/dcache/bank, top/vortex/cluster/core/mem_unit/icache/bank, top/vortex/cluster/core/mem_unit/smem/bank": { + "afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": { "?valid_st0": 1, "?valid_st1": 1, "?valid_st2": 1, diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 8be8381f..7a6c61b5 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -638,10 +638,7 @@ def gen_vl_header(file, modules, taps): print("`define SCOPE_TRIGGER \\", file=f) i = 0 - excluded_list = [] for key in toptaps: - if key in excluded_list: - continue tap = toptaps[key] if tap[2] != 2: continue @@ -653,16 +650,7 @@ def gen_vl_header(file, modules, taps): print(" | \\", file=f) print("\t(", file=f, end='') name = trigger_name("scope_" + key.replace('/', '_') + su, size) - if key.endswith("_valid"): - ready_signal = key[:-6] + "_ready" - if ready_signal in toptaps: - rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size) - print(name + " && " + rname, file=f, end='') - excluded_list.append(ready_signal) - else: - print(name, file=f, end='') - else: - print(name, file=f, end='') + print(name, file=f, end='') print(")", file=f, end='') i += 1 print("", file=f)