diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 71d66858..b13b897d 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -18,7 +18,7 @@ CXXFLAGS +=-fstack-protector CXXFLAGS += -fPIC # Enable scope analyzer -CXXFLAGS += -DSCOPE +#CXXFLAGS += -DSCOPE LDFLAGS += -luuid diff --git a/driver/opae/scope.cpp b/driver/opae/scope.cpp index 068155cc..d3051d7e 100644 --- a/driver/opae/scope.cpp +++ b/driver/opae/scope.cpp @@ -26,32 +26,49 @@ struct scope_signal_t { }; static const scope_signal_t scope_signals[] = { - { 32, "icache_req_addr" }, { 2, "icache_req_warp_num" }, + { 32, "icache_req_addr" }, { 2, "icache_req_tag" }, + { 32, "icache_rsp_data" }, { 2, "icache_rsp_tag" }, - { 32, "dcache_req_addr" }, + { 2, "dcache_req_warp_num" }, + { 32, "dcache_req_curr_PC" }, + { 32, "dcache_req_addr" }, + { 1, "dcache_req_rw" }, + { 4, "dcache_req_byteen" }, + { 32, "dcache_req_data" }, { 2, "dcache_req_tag" }, + { 32, "dcache_rsp_data" }, - { 2 , "dcache_rsp_tag" }, + { 2 , "dcache_rsp_tag" }, + { 32, "dram_req_addr" }, + { 1, "dram_req_rw" }, + { 16, "dram_req_byteen" }, + { 32, "dram_req_data" }, { 29, "dram_req_tag" }, + + { 32, "dram_rsp_data" }, { 29, "dram_rsp_tag" }, + { 32, "snp_req_addr" }, { 1, "snp_req_invalidate" }, { 16, "snp_req_tag" }, { 16, "snp_rsp_tag" }, + { 2, "decode_warp_num" }, { 32, "decode_curr_PC" }, { 1, "decode_is_jal" }, { 5, "decode_rs1" }, { 5, "decode_rs2" }, + { 2, "execute_warp_num" }, { 5, "execute_rd" }, { 32, "execute_a" }, { 32, "execute_b" }, + { 2, "writeback_warp_num" }, { 2, "writeback_wb" }, { 5, "writeback_rd" }, @@ -61,18 +78,22 @@ static const scope_signal_t scope_signals[] = { { 1, "icache_req_ready" }, { 1, "icache_rsp_valid" }, { 1, "icache_rsp_ready" }, + { 4, "dcache_req_valid" }, { 1, "dcache_req_ready" }, { 4, "dcache_rsp_valid" }, { 1, "dcache_rsp_ready" }, + { 1, "dram_req_valid" }, { 1, "dram_req_ready" }, { 1, "dram_rsp_valid" }, { 1, "dram_rsp_ready" }, + { 1, "snp_req_valid" }, { 1, "snp_req_ready" }, { 1, "snp_rsp_valid" }, { 1, "snp_rsp_ready" }, + { 4, "decode_valid" }, { 4, "execute_valid" }, { 4, "writeback_valid" }, @@ -134,16 +155,15 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width)); std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl; - assert(fwidth == (int)frame_width); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 3)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames)); std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl; CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1)); + assert(fwidth == (int)frame_width); std::vector signal_data(frame_width+1); - + uint64_t frame_offset = 0; uint64_t frame_no = 0; uint64_t timestamp = 0; diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 6e51a568..2f7282fe 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -120,7 +120,6 @@ extern int vx_dev_open(vx_device_h* hdevice) { #ifdef SCOPE { - int ret = vx_scope_start(device->fpga, 0); if (ret != 0) return ret; diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 197c2808..118edcc2 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -94,7 +94,7 @@ int run_memcopy_test(vx_buffer_h sbuf, } if (errors != 0) { - std::cout << "Found " << errors << " errors!" << std::endl; + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; std::cout << "FAILED!" << std::endl; return 1; } @@ -161,7 +161,7 @@ int run_kernel_test(vx_device_h device, } if (errors != 0) { - std::cout << "Found " << errors << " errors!" << std::endl; + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; std::cout << "FAILED!" << std::endl; return 1; } diff --git a/driver/tests/basic/common.h b/driver/tests/basic/common.h index 6daf89e7..0ac862d3 100644 --- a/driver/tests/basic/common.h +++ b/driver/tests/basic/common.h @@ -3,6 +3,6 @@ #define DEV_MEM_SRC_ADDR 0x10000040 #define DEV_MEM_DST_ADDR 0x20000080 -#define NUM_BLOCKS 2 +#define NUM_BLOCKS 16 #endif \ No newline at end of file diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 38de9c81..3845199a 100644 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 08f76375..8af084f5 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -1,5 +1,7 @@ vortex_afu.json +QI:vortex_afu.qsf + +define+NDEBUG #+define+SCOPE @@ -98,7 +100,7 @@ vortex_afu.json ../rtl/VX_decode.v ../rtl/VX_inst_multiplex.v ../rtl/VX_lsu_addr_gen.v -../rtl/VX_dcache_io_arb.v +../rtl/VX_dcache_arb.v ../rtl/VX_mem_arb.v ../rtl/pipe_regs/VX_f_d_reg.v diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf new file mode 100644 index 00000000..cf05f28c --- /dev/null +++ b/hw/opae/vortex_afu.qsf @@ -0,0 +1,4 @@ + +# Analysis & Synthesis Assignments +set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 21cf923c..c9e68b05 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -800,9 +800,14 @@ end `SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid); `SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); +`SCOPE_ASSIGN(scope_dram_req_rw, vx_dram_req_rw); +`SCOPE_ASSIGN(scope_dram_req_byteen,vx_dram_req_byteen); +`SCOPE_ASSIGN(scope_dram_req_data, vx_dram_req_data[31:0]); `SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag); `SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready); + `SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid); +`SCOPE_ASSIGN(scope_dram_rsp_data, vx_dram_rsp_data[31:0]); `SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag); `SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready); @@ -811,11 +816,12 @@ end `SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate); `SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag); `SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready); + `SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid); `SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); `SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 491, "oops!") +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 641, "oops!") wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) diff --git a/hw/rtl/VX_dcache_arb.v b/hw/rtl/VX_dcache_arb.v new file mode 100644 index 00000000..ae734994 --- /dev/null +++ b/hw/rtl/VX_dcache_arb.v @@ -0,0 +1,48 @@ +`include "VX_define.vh" + +module VX_dcache_arb ( + input wire io_select, + + // Core request + VX_cache_core_req_if core_req_if, + + // Dcache request + VX_cache_core_req_if core_dcache_req_if, + + // I/O request + VX_cache_core_req_if core_io_req_if, + + // Dcache response + VX_cache_core_rsp_if core_dcache_rsp_if, + + // I/O response + VX_cache_core_rsp_if core_io_rsp_if, + + // Core response + VX_cache_core_rsp_if core_rsp_if +); + assign core_dcache_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{~io_select}}; + assign core_dcache_req_if.core_req_rw = core_req_if.core_req_rw; + assign core_dcache_req_if.core_req_byteen = core_req_if.core_req_byteen; + assign core_dcache_req_if.core_req_addr = core_req_if.core_req_addr; + assign core_dcache_req_if.core_req_data = core_req_if.core_req_data; + assign core_dcache_req_if.core_req_tag = core_req_if.core_req_tag; + + assign core_io_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{io_select}}; + assign core_io_req_if.core_req_rw = core_req_if.core_req_rw; + assign core_io_req_if.core_req_byteen = core_req_if.core_req_byteen; + assign core_io_req_if.core_req_addr = core_req_if.core_req_addr; + assign core_io_req_if.core_req_data = core_req_if.core_req_data; + assign core_io_req_if.core_req_tag = core_req_if.core_req_tag; + + assign core_req_if.core_req_ready = io_select ? core_io_req_if.core_req_ready : core_dcache_req_if.core_req_ready; + + wire dcache_rsp_valid = (| core_dcache_rsp_if.core_rsp_valid); + + assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_valid : core_io_rsp_if.core_rsp_valid; + assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_data : core_io_rsp_if.core_rsp_data; + assign core_rsp_if.core_rsp_tag = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_tag : core_io_rsp_if.core_rsp_tag; + assign core_dcache_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready; + assign core_io_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready && ~dcache_rsp_valid; + +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_dcache_io_arb.v b/hw/rtl/VX_dcache_io_arb.v deleted file mode 100644 index ea68545a..00000000 --- a/hw/rtl/VX_dcache_io_arb.v +++ /dev/null @@ -1,48 +0,0 @@ -`include "VX_define.vh" - -module VX_dcache_io_arb ( - input wire io_select, - - // Core request - VX_cache_core_req_if core_req_if, - - // Dcache request - VX_cache_core_req_if core_dcache_req_if, - - // I/O request - VX_cache_core_req_if core_io_req_if, - - // Dcache response - VX_cache_core_rsp_if core_dcache_rsp_if, - - // I/O response - VX_cache_core_rsp_if core_io_rsp_if, - - // Core response - VX_cache_core_rsp_if core_rsp_if -); - assign core_dcache_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{~io_select}}; - assign core_dcache_req_if.core_req_rw = core_req_if.core_req_rw; - assign core_dcache_req_if.core_req_byteen= core_req_if.core_req_byteen; - assign core_dcache_req_if.core_req_addr = core_req_if.core_req_addr; - assign core_dcache_req_if.core_req_data = core_req_if.core_req_data; - assign core_dcache_req_if.core_req_tag = core_req_if.core_req_tag; - - assign core_io_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{io_select}}; - assign core_io_req_if.core_req_rw = core_req_if.core_req_rw; - assign core_io_req_if.core_req_byteen= core_req_if.core_req_byteen; - assign core_io_req_if.core_req_addr = core_req_if.core_req_addr; - assign core_io_req_if.core_req_data = core_req_if.core_req_data; - assign core_io_req_if.core_req_tag = core_req_if.core_req_tag; - - assign core_req_if.core_req_ready = io_select ? core_io_req_if.core_req_ready : core_dcache_req_if.core_req_ready; - - wire dcache_rsp_valid = (| core_dcache_rsp_if.core_rsp_valid); - - assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_valid : core_io_rsp_if.core_rsp_valid; - assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_data : core_io_rsp_if.core_rsp_data; - assign core_rsp_if.core_rsp_tag = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_tag : core_io_rsp_if.core_rsp_tag; - assign core_dcache_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready; - assign core_io_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready && ~dcache_rsp_valid; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index f4185a46..7e34d146 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -290,18 +290,26 @@ `ifdef SCOPE `define SCOPE_SIGNALS_DATA_LIST \ - scope_icache_req_addr, \ scope_icache_req_warp_num, \ + scope_icache_req_addr, \ scope_icache_req_tag, \ scope_icache_rsp_data, \ scope_icache_rsp_tag, \ - scope_dcache_req_addr, \ scope_dcache_req_warp_num, \ + scope_dcache_req_curr_PC, \ + scope_dcache_req_addr, \ + scope_dcache_req_rw, \ + scope_dcache_req_byteen, \ + scope_dcache_req_data, \ scope_dcache_req_tag, \ scope_dcache_rsp_data, \ scope_dcache_rsp_tag, \ scope_dram_req_addr, \ + scope_dram_req_rw, \ + scope_dram_req_byteen, \ + scope_dram_req_data, \ scope_dram_req_tag, \ + scope_dram_rsp_data, \ scope_dram_rsp_tag, \ scope_snp_req_addr, \ scope_snp_req_invalidate, \ @@ -350,8 +358,8 @@ `define SCOPE_SIGNALS_DECL \ wire scope_icache_req_valid; \ - wire [31:0] scope_icache_req_addr; \ wire [1:0] scope_icache_req_warp_num; \ + wire [31:0] scope_icache_req_addr; \ wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ wire scope_icache_req_ready; \ wire scope_icache_rsp_valid; \ @@ -359,8 +367,12 @@ wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \ wire scope_icache_rsp_ready; \ wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \ - wire [31:0] scope_dcache_req_addr; \ wire [1:0] scope_dcache_req_warp_num; \ + wire [31:0] scope_dcache_req_curr_PC; \ + wire [31:0] scope_dcache_req_addr; \ + wire scope_dcache_req_rw; \ + wire [3:0] scope_dcache_req_byteen; \ + wire [31:0] scope_dcache_req_data; \ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ wire scope_dcache_req_ready; \ wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \ @@ -369,9 +381,13 @@ wire scope_dcache_rsp_ready; \ wire scope_dram_req_valid; \ wire [31:0] scope_dram_req_addr; \ + wire scope_dram_req_rw; \ + wire [15:0] scope_dram_req_byteen; \ + wire [31:0] scope_dram_req_data; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ wire scope_dram_req_ready; \ wire scope_dram_rsp_valid; \ + wire [31:0] scope_dram_rsp_data; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ wire scope_dram_rsp_ready; \ wire scope_snp_req_valid; \ @@ -407,8 +423,8 @@ `define SCOPE_SIGNALS_ICACHE_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_icache_req_valid, \ - output wire [31:0] scope_icache_req_addr, \ output wire [1:0] scope_icache_req_warp_num, \ + output wire [31:0] scope_icache_req_addr, \ output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ output wire scope_icache_req_ready, \ output wire scope_icache_rsp_valid, \ @@ -420,8 +436,12 @@ `define SCOPE_SIGNALS_DCACHE_IO \ /* verilator lint_off UNDRIVEN */ \ output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \ - output wire [31:0] scope_dcache_req_addr, \ output wire [1:0] scope_dcache_req_warp_num, \ + output wire [31:0] scope_dcache_req_curr_PC, \ + output wire [31:0] scope_dcache_req_addr, \ + output wire scope_dcache_req_rw, \ + output wire [3:0] scope_dcache_req_byteen, \ + output wire [31:0] scope_dcache_req_data, \ output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ output wire scope_dcache_req_ready, \ output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \ @@ -434,9 +454,13 @@ /* verilator lint_off UNDRIVEN */ \ output wire scope_dram_req_valid, \ output wire [31:0] scope_dram_req_addr, \ + output wire scope_dram_req_rw, \ + output wire [15:0] scope_dram_req_byteen, \ + output wire [31:0] scope_dram_req_data, \ output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \ output wire scope_dram_req_ready, \ output wire scope_dram_rsp_valid, \ + output wire [31:0] scope_dram_rsp_data, \ output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \ output wire scope_dram_rsp_ready, \ /* verilator lint_on UNDRIVEN */ @@ -484,8 +508,8 @@ `define SCOPE_SIGNALS_ICACHE_ATTACH \ .scope_icache_req_valid (scope_icache_req_valid), \ - .scope_icache_req_addr (scope_icache_req_addr), \ .scope_icache_req_warp_num (scope_icache_req_warp_num), \ + .scope_icache_req_addr (scope_icache_req_addr), \ .scope_icache_req_tag (scope_icache_req_tag), \ .scope_icache_req_ready (scope_icache_req_ready), \ .scope_icache_rsp_valid (scope_icache_rsp_valid), \ @@ -495,8 +519,12 @@ `define SCOPE_SIGNALS_DCACHE_ATTACH \ .scope_dcache_req_valid (scope_dcache_req_valid), \ - .scope_dcache_req_addr (scope_dcache_req_addr), \ .scope_dcache_req_warp_num (scope_dcache_req_warp_num), \ + .scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \ + .scope_dcache_req_addr (scope_dcache_req_addr), \ + .scope_dcache_req_rw (scope_dcache_req_rw), \ + .scope_dcache_req_byteen(scope_dcache_req_byteen), \ + .scope_dcache_req_data (scope_dcache_req_data), \ .scope_dcache_req_tag (scope_dcache_req_tag), \ .scope_dcache_req_ready (scope_dcache_req_ready), \ .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ @@ -507,9 +535,13 @@ `define SCOPE_SIGNALS_DRAM_ATTACH \ .scope_dram_req_valid (scope_dram_req_valid), \ .scope_dram_req_addr (scope_dram_req_addr), \ + .scope_dram_req_rw (scope_dram_req_rw), \ + .scope_dram_req_byteen (scope_dram_req_byteen), \ + .scope_dram_req_data (scope_dram_req_data), \ .scope_dram_req_tag (scope_dram_req_tag), \ .scope_dram_req_ready (scope_dram_req_ready), \ .scope_dram_rsp_valid (scope_dram_rsp_valid), \ + .scope_dram_rsp_data (scope_dram_rsp_data), \ .scope_dram_rsp_tag (scope_dram_rsp_tag), \ .scope_dram_rsp_ready (scope_dram_rsp_ready), diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 936d3354..a51bc206 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -84,10 +84,11 @@ module VX_icache_stage #( assign icache_rsp_if.core_rsp_ready = ~total_freeze; `SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.core_req_valid); - `SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.core_req_addr, 2'b0}); `SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num); + `SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.core_req_addr, 2'b0}); `SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.core_req_tag); `SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.core_req_ready); + `SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_if.core_rsp_valid); `SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_if.core_rsp_data); `SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_if.core_rsp_tag); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index fc0ef23f..a0df418d 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -164,10 +164,15 @@ module VX_lsu_unit #( assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.core_req_valid); - `SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_if.core_req_addr[0], 2'b0}); `SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num); + `SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc); + `SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_if.core_req_addr[0], 2'b0}); + `SCOPE_ASSIGN(scope_dcache_req_rw, core_req_rw); + `SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.core_req_byteen[0]); + `SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.core_req_data[0]); `SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.core_req_tag); `SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.core_req_ready); + `SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.core_rsp_valid); `SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.core_rsp_data[0]); `SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.core_rsp_tag); @@ -176,10 +181,12 @@ module VX_lsu_unit #( `ifdef DBG_PRINT_CORE_DCACHE always_ff @(posedge clk) begin if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin - $display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); + $display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", + $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); end if ((| dcache_rsp_if.core_rsp_valid) && dcache_rsp_if.core_rsp_ready) begin - $display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); + $display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", + $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); end end `endif diff --git a/hw/rtl/VX_mem_ctrl.v b/hw/rtl/VX_mem_ctrl.v index 8b0db7b8..c8e27436 100644 --- a/hw/rtl/VX_mem_ctrl.v +++ b/hw/rtl/VX_mem_ctrl.v @@ -41,7 +41,7 @@ module VX_mem_ctrl # ( // use "case equality" to handle uninitialized entry wire smem_select = (({core_dcache_req_if.core_req_addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR) === 1'b1); - VX_dcache_io_arb dcache_io_arb ( + VX_dcache_arb dcache_smem_arb ( .io_select (smem_select), .core_req_if (core_dcache_req_if), .core_dcache_req_if (core_dcache_req_qual_if), diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index f036f293..e54a527a 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -6,39 +6,38 @@ module VX_scheduler ( input wire memory_delay, input wire exec_delay, input wire gpr_stage_delay, + VX_frE_to_bckE_req_if bckE_req_if, VX_wb_if writeback_if, - output wire schedule_delay, - output wire is_empty + output wire schedule_delay, + output wire is_empty ); - reg[31:0] count_valid; + reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; + reg [31:0] count_valid; - assign is_empty = (count_valid == 0); + wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); + wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); + + wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); + wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); + wire is_mem = (is_store || is_load); + wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); + wire is_csr = bckE_req_if.is_csr; + wire is_exec = !is_mem && !is_gpu && !is_csr; - reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - - wire valid_wb = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0); - wire wb_inc = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); + wire using_rs2 = is_store + || (bckE_req_if.rs2_src == `RS2_REG) + || bckE_req_if.is_barrier + || bckE_req_if.is_wspawn; wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0); wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0); wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0); - wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); - wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); - - // classify our next instruction. - wire is_mem = is_store || is_load; - wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); - wire is_csr = bckE_req_if.is_csr; - wire is_exec = !is_mem && !is_gpu && !is_csr; - - wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn; - - wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0)); - wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2)); - wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0)); + wire rs1_rename_qual = (rs1_rename) && (bckE_req_if.rs1 != 0); + wire rs2_rename_qual = (rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2); + wire rd_rename_qual = (rd_rename) && (bckE_req_if.rd != 0); wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; @@ -48,20 +47,26 @@ module VX_scheduler ( || (gpr_stage_delay && (is_mem || is_exec)) || (exec_delay && is_exec)); + assign is_empty = (count_valid == 0); + integer i, w; wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid; always @(posedge clk) begin if (reset) begin - for (w = 0; w < `NUM_WARPS; w=w+1) begin + for (w = 0; w < `NUM_WARPS; w++) begin for (i = 0; i < 32; i++) begin rename_table[w][i] <= 0; end end count_valid <= 0; end else begin - if (valid_wb) begin + if (acquire_rd && !schedule_delay) begin + rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; + count_valid <= count_valid + 1; + end + if (release_rd) begin assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0); rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask; if (0 == valid_wb_new_mask) begin @@ -69,12 +74,8 @@ module VX_scheduler ( count_valid <= count_valid - 1; end end - - if (!schedule_delay && wb_inc) begin - rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; - count_valid <= count_valid + 1; - end + end - end + end endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 93c5dd5b..a1864c7e 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -260,7 +260,7 @@ module Vortex #( // use "case equality" to handle uninitialized address value wire io_select = (({core_dcache_req_if.core_req_addr[0], 2'b0} >= `IO_BUS_BASE_ADDR) === 1'b1); - VX_dcache_io_arb dcache_io_arb ( + VX_dcache_arb dcache_io_arb ( .io_select (io_select), .core_req_if (core_dcache_req_if), .core_dcache_req_if (arb_dcache_req_if), diff --git a/hw/syn/quartus/cache/project.tcl b/hw/syn/quartus/cache/project.tcl index afe69d48..0b591385 100644 --- a/hw/syn/quartus/cache/project.tcl +++ b/hw/syn/quartus/cache/project.tcl @@ -1,30 +1,56 @@ load_package flow package require cmdline -set options { \ - { "project.arg" "" "Project name" } \ - { "family.arg" "" "Device family name" } \ - { "device.arg" "" "Device name" } \ - { "top.arg" "" "Top level module" } \ - { "sdc.arg" "" "Timing Design Constraints file" } \ - { "src.arg" "" "Verilog source file" } \ - { "inc.arg" "." "Include path" } \ +set options { + { "project.arg" "" "Project name" } + { "family.arg" "" "Device family name" } + { "device.arg" "" "Device name" } + { "top.arg" "" "Top level module" } + { "src.arg" "" "Verilog source file" } + { "inc.arg" "" "Include path (optional)" } + { "sdc.arg" "" "Timing Design Constraints file (optional)" } + { "set.arg" "" "Macro value (optional)" } } +set q_args_orig $quartus(args) + array set opts [::cmdline::getoptions quartus(args) $options] +# Verify required parameters +set requiredParameters {project family device top src} +foreach p $requiredParameters { + if {$opts($p) == ""} { + puts stderr "Missing required parameter: -$p" + exit 1 + } +} + project_new $opts(project) -overwrite set_global_assignment -name FAMILY $opts(family) set_global_assignment -name DEVICE $opts(device) set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name VERILOG_FILE $opts(src) -set_global_assignment -name SEARCH_PATH $opts(inc) -set_global_assignment -name SDC_FILE $opts(sdc) set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +set idx 0 +foreach arg $q_args_orig { + incr idx + if [string match "-src" $arg] { + set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] + } + if [string match "-inc" $arg] { + set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] + } + if [string match "-sdc" $arg] { + set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] + } + if [string match "-set" $arg] { + set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] + } +} + proc make_all_pins_virtual {} { execute_module -tool map set name_ids [get_names -filter * -node_type pin] diff --git a/hw/syn/quartus/pipeline/project.tcl b/hw/syn/quartus/pipeline/project.tcl index afe69d48..0b591385 100644 --- a/hw/syn/quartus/pipeline/project.tcl +++ b/hw/syn/quartus/pipeline/project.tcl @@ -1,30 +1,56 @@ load_package flow package require cmdline -set options { \ - { "project.arg" "" "Project name" } \ - { "family.arg" "" "Device family name" } \ - { "device.arg" "" "Device name" } \ - { "top.arg" "" "Top level module" } \ - { "sdc.arg" "" "Timing Design Constraints file" } \ - { "src.arg" "" "Verilog source file" } \ - { "inc.arg" "." "Include path" } \ +set options { + { "project.arg" "" "Project name" } + { "family.arg" "" "Device family name" } + { "device.arg" "" "Device name" } + { "top.arg" "" "Top level module" } + { "src.arg" "" "Verilog source file" } + { "inc.arg" "" "Include path (optional)" } + { "sdc.arg" "" "Timing Design Constraints file (optional)" } + { "set.arg" "" "Macro value (optional)" } } +set q_args_orig $quartus(args) + array set opts [::cmdline::getoptions quartus(args) $options] +# Verify required parameters +set requiredParameters {project family device top src} +foreach p $requiredParameters { + if {$opts($p) == ""} { + puts stderr "Missing required parameter: -$p" + exit 1 + } +} + project_new $opts(project) -overwrite set_global_assignment -name FAMILY $opts(family) set_global_assignment -name DEVICE $opts(device) set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name VERILOG_FILE $opts(src) -set_global_assignment -name SEARCH_PATH $opts(inc) -set_global_assignment -name SDC_FILE $opts(sdc) set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +set idx 0 +foreach arg $q_args_orig { + incr idx + if [string match "-src" $arg] { + set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] + } + if [string match "-inc" $arg] { + set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] + } + if [string match "-sdc" $arg] { + set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] + } + if [string match "-set" $arg] { + set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] + } +} + proc make_all_pins_virtual {} { execute_module -tool map set name_ids [get_names -filter * -node_type pin] diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 8feaf127..4e760887 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -8,7 +8,7 @@ FAMILY = "Arria 10" DEVICE = 10AX115N3F40E2SG # Executable Configuration -SYN_ARGS = --parallel --read_settings_files=on +SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 FIT_ARGS = --part=$(DEVICE) --read_settings_files=on ASM_ARGS = STA_ARGS = --do_report_timing @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae" + quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" -macro "NOPAE" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top/project.tcl b/hw/syn/quartus/top/project.tcl index afe69d48..0b591385 100644 --- a/hw/syn/quartus/top/project.tcl +++ b/hw/syn/quartus/top/project.tcl @@ -1,30 +1,56 @@ load_package flow package require cmdline -set options { \ - { "project.arg" "" "Project name" } \ - { "family.arg" "" "Device family name" } \ - { "device.arg" "" "Device name" } \ - { "top.arg" "" "Top level module" } \ - { "sdc.arg" "" "Timing Design Constraints file" } \ - { "src.arg" "" "Verilog source file" } \ - { "inc.arg" "." "Include path" } \ +set options { + { "project.arg" "" "Project name" } + { "family.arg" "" "Device family name" } + { "device.arg" "" "Device name" } + { "top.arg" "" "Top level module" } + { "src.arg" "" "Verilog source file" } + { "inc.arg" "" "Include path (optional)" } + { "sdc.arg" "" "Timing Design Constraints file (optional)" } + { "set.arg" "" "Macro value (optional)" } } +set q_args_orig $quartus(args) + array set opts [::cmdline::getoptions quartus(args) $options] +# Verify required parameters +set requiredParameters {project family device top src} +foreach p $requiredParameters { + if {$opts($p) == ""} { + puts stderr "Missing required parameter: -$p" + exit 1 + } +} + project_new $opts(project) -overwrite set_global_assignment -name FAMILY $opts(family) set_global_assignment -name DEVICE $opts(device) set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) -set_global_assignment -name VERILOG_FILE $opts(src) -set_global_assignment -name SEARCH_PATH $opts(inc) -set_global_assignment -name SDC_FILE $opts(sdc) set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 +set idx 0 +foreach arg $q_args_orig { + incr idx + if [string match "-src" $arg] { + set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] + } + if [string match "-inc" $arg] { + set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] + } + if [string match "-sdc" $arg] { + set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] + } + if [string match "-set" $arg] { + set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] + } +} + proc make_all_pins_virtual {} { execute_module -tool map set name_ids [get_names -filter * -node_type pin] diff --git a/hw/syn/quartus/vortex/Makefile b/hw/syn/quartus/vortex/Makefile index 62b5cdd9..0b591385 100644 --- a/hw/syn/quartus/vortex/Makefile +++ b/hw/syn/quartus/vortex/Makefile @@ -1,70 +1,67 @@ -PROJECT = Vortex_Socket -TOP_LEVEL_ENTITY = Vortex_Socket -SRC_FILE = Vortex_Socket.v -PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf +load_package flow +package require cmdline -# Part, Family -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG +set options { + { "project.arg" "" "Project name" } + { "family.arg" "" "Device family name" } + { "device.arg" "" "Device name" } + { "top.arg" "" "Top level module" } + { "src.arg" "" "Verilog source file" } + { "inc.arg" "" "Include path (optional)" } + { "sdc.arg" "" "Timing Design Constraints file (optional)" } + { "set.arg" "" "Macro value (optional)" } +} -# Executable Configuration -SYN_ARGS = --parallel --read_settings_files=on -FIT_ARGS = --part=$(DEVICE) --read_settings_files=on -ASM_ARGS = -STA_ARGS = --do_report_timing +set q_args_orig $quartus(args) -# Build targets -all: $(PROJECT).sta.rpt +array set opts [::cmdline::getoptions quartus(args) $options] -syn: $(PROJECT).syn.rpt +# Verify required parameters +set requiredParameters {project family device top src} +foreach p $requiredParameters { + if {$opts($p) == ""} { + puts stderr "Missing required parameter: -$p" + exit 1 + } +} -fit: $(PROJECT).fit.rpt +project_new $opts(project) -overwrite -asm: $(PROJECT).asm.rpt +set_global_assignment -name FAMILY $opts(family) +set_global_assignment -name DEVICE $opts(device) +set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) +set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin +set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL +set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -sta: $(PROJECT).sta.rpt +set idx 0 +foreach arg $q_args_orig { + incr idx + if [string match "-src" $arg] { + set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx] + } + if [string match "-inc" $arg] { + set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx] + } + if [string match "-sdc" $arg] { + set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx] + } + if [string match "-set" $arg] { + set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx] + } +} -smart: smart.log +proc make_all_pins_virtual {} { + execute_module -tool map + set name_ids [get_names -filter * -node_type pin] + foreach_in_collection name_id $name_ids { + set pin_name [get_name_info -info full_path $name_id] + post_message "Making VIRTUAL_PIN assignment to $pin_name" + set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON + } + export_assignments +} -# Target implementations -STAMP = echo done > +make_all_pins_virtual -$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) - quartus_syn $(PROJECT) $(SYN_ARGS) - $(STAMP) fit.chg - -$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt - quartus_fit $(PROJECT) $(FIT_ARGS) - $(STAMP) asm.chg - $(STAMP) sta.chg - -$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt - quartus_asm $(PROJECT) $(ASM_ARGS) - -$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt - quartus_sta $(PROJECT) $(STA_ARGS) - -smart.log: $(PROJECT_FILES) - quartus_sh --determine_smart_action $(PROJECT) > smart.log - -# Project initialization -$(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" - -syn.chg: - $(STAMP) syn.chg - -fit.chg: - $(STAMP) fit.chg - -sta.chg: - $(STAMP) sta.chg - -asm.chg: - $(STAMP) asm.chg - -program: $(PROJECT).sof - quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" - -clean: - rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox +project_close \ No newline at end of file