diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index d0f17edc..8eade186 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -9,6 +9,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE @@ -58,7 +59,7 @@ VL_FLAGS += verilator.vlt # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS) + VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else VL_FLAGS += -DNDEBUG diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index dfddb482..1883126a 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -31,9 +31,9 @@ opae_sim::opae_sim() { #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedFstC(); + trace_ = new VerilatedVcdC(); vortex_afu_->trace(trace_, 99); - trace_->open("trace.fst"); + trace_->open("trace.vcd"); #endif this->reset(); diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 58b57757..54421d26 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -5,7 +5,7 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include @@ -88,6 +88,6 @@ private: RAM ram_; Vvortex_afu_shim *vortex_afu_; #ifdef VCD_OUTPUT - VerilatedFstC *trace_; + VerilatedVcdC *trace_; #endif }; \ No newline at end of file diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 1a66b335..cd0975bf 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -9,6 +9,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE @@ -55,7 +56,7 @@ VL_FLAGS += verilator.vlt # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS) + VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else VL_FLAGS += -DNDEBUG diff --git a/driver/tests/dogfood/dogfood.cpp b/driver/tests/dogfood/dogfood.cpp index 5d2e6016..87985c88 100644 --- a/driver/tests/dogfood/dogfood.cpp +++ b/driver/tests/dogfood/dogfood.cpp @@ -177,7 +177,7 @@ int main(int argc, char *argv[]) { size_t buf_size = num_points * sizeof(uint32_t); std::cout << "number of points: " << num_points << std::endl; - std::cout << "buffer size: " << std::hex << buf_size << std::dec << " bytes" << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; // upload program std::cout << "upload kernel" << std::endl; diff --git a/hw/opae/README b/hw/opae/README index ca141072..785dfa5c 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -83,6 +83,8 @@ tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd tar -zcvf run.log.tar.gz build_ase_1c/work/run.log tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd tar -cvjf vx_scope.vcd.tar.bz2 vx_scope.vcd +tar -cvjf trace.fst.tar.bz2 trace.fst run.log +tar -cvjf trace.vcd.tar.bz2 trace.vcd run.log # decompress VCD trace tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz @@ -113,4 +115,7 @@ if slack = +1.664 -> minimal period = 5-1.664 = 3.336 -> fmax = 1/3.336 = 300 Mh make -C ../../rtlsim clean && reset && make -C ../../rtlsim # split tar into multiple parts -split -b 50M home.tar.bz2 "home.tar.bz2.part" \ No newline at end of file +split -b 50M home.tar.bz2 "home.tar.bz2.part" + + +cat run.log | grep -c "cache[0-9]*:[0-9]* dram_req" \ No newline at end of file diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 897468c2..d36f5e7c 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -10,6 +10,7 @@ #+define+DBG_PRINT_CACHE_BANK #+define+DBG_PRINT_CACHE_SNP #+define+DBG_PRINT_CACHE_MSRQ +#+define+DBG_PRINT_CACHE_DATA #+define+DBG_PRINT_DRAM #+define+DBG_PRINT_PIPELINE #+define+DBG_PRINT_OPAE diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index a8843bc1..724ff8dd 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -1105,7 +1105,7 @@ wire scope_changed = `SCOPE_TRIGGER; VX_scope #( .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), .BUSW (64), - .SIZE (4096), + .SIZE (`SCOPE_SIZE), .UPDW ($bits({`SCOPE_UPDATE_LIST})) ) scope ( .clk (clk), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 6c899096..d2e7e5c0 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -211,34 +211,34 @@ module VX_cluster #( .reset (reset), // input requests - .in_io_req_valid (per_core_io_req_valid), - .in_io_req_rw (per_core_io_req_rw), - .in_io_req_byteen (per_core_io_req_byteen), - .in_io_req_addr (per_core_io_req_addr), - .in_io_req_data (per_core_io_req_data), - .in_io_req_tag (per_core_io_req_tag), - .in_io_req_ready (per_core_io_req_ready), + .io_req_valid_in (per_core_io_req_valid), + .io_req_rw_in (per_core_io_req_rw), + .io_req_byteen_in (per_core_io_req_byteen), + .io_req_addr_in (per_core_io_req_addr), + .io_req_data_in (per_core_io_req_data), + .io_req_tag_in (per_core_io_req_tag), + .io_req_ready_in (per_core_io_req_ready), // input responses - .in_io_rsp_valid (per_core_io_rsp_valid), - .in_io_rsp_data (per_core_io_rsp_data), - .in_io_rsp_tag (per_core_io_rsp_tag), - .in_io_rsp_ready (per_core_io_rsp_ready), + .io_rsp_valid_in (per_core_io_rsp_valid), + .io_rsp_data_in (per_core_io_rsp_data), + .io_rsp_tag_in (per_core_io_rsp_tag), + .io_rsp_ready_in (per_core_io_rsp_ready), // output request - .out_io_req_valid (io_req_valid), - .out_io_req_rw (io_req_rw), - .out_io_req_byteen (io_req_byteen), - .out_io_req_addr (io_req_addr), - .out_io_req_data (io_req_data), - .out_io_req_tag (io_req_tag), - .out_io_req_ready (io_req_ready), + .io_req_valid_out (io_req_valid), + .io_req_rw_out (io_req_rw), + .io_req_byteen_out (io_req_byteen), + .io_req_addr_out (io_req_addr), + .io_req_data_out (io_req_data), + .io_req_tag_out (io_req_tag), + .io_req_ready_out (io_req_ready), // output response - .out_io_rsp_valid (io_rsp_valid), - .out_io_rsp_tag (io_rsp_tag), - .out_io_rsp_data (io_rsp_data), - .out_io_rsp_ready (io_rsp_ready) + .io_rsp_valid_out (io_rsp_valid), + .io_rsp_tag_out (io_rsp_tag), + .io_rsp_data_out (io_rsp_data), + .io_rsp_ready_out (io_rsp_ready) ); VX_csr_io_arb #( @@ -250,28 +250,28 @@ module VX_cluster #( .request_id (csr_io_req_coreid), // input requests - .in_csr_io_req_valid (csr_io_req_valid), - .in_csr_io_req_addr (csr_io_req_addr), - .in_csr_io_req_rw (csr_io_req_rw), - .in_csr_io_req_data (csr_io_req_data), - .in_csr_io_req_ready (csr_io_req_ready), + .csr_io_req_valid_in (csr_io_req_valid), + .csr_io_req_addr_in (csr_io_req_addr), + .csr_io_req_rw_in (csr_io_req_rw), + .csr_io_req_data_in (csr_io_req_data), + .csr_io_req_ready_in (csr_io_req_ready), // input responses - .in_csr_io_rsp_valid (per_core_csr_io_rsp_valid), - .in_csr_io_rsp_data (per_core_csr_io_rsp_data), - .in_csr_io_rsp_ready (per_core_csr_io_rsp_ready), + .csr_io_rsp_valid_in (per_core_csr_io_rsp_valid), + .csr_io_rsp_data_in (per_core_csr_io_rsp_data), + .csr_io_rsp_ready_in (per_core_csr_io_rsp_ready), // output request - .out_csr_io_req_valid (per_core_csr_io_req_valid), - .out_csr_io_req_addr (per_core_csr_io_req_addr), - .out_csr_io_req_rw (per_core_csr_io_req_rw), - .out_csr_io_req_data (per_core_csr_io_req_data), - .out_csr_io_req_ready (per_core_csr_io_req_ready), + .csr_io_req_valid_out (per_core_csr_io_req_valid), + .csr_io_req_addr_out (per_core_csr_io_req_addr), + .csr_io_req_rw_out (per_core_csr_io_req_rw), + .csr_io_req_data_out (per_core_csr_io_req_data), + .csr_io_req_ready_out (per_core_csr_io_req_ready), // output response - .out_csr_io_rsp_valid (csr_io_rsp_valid), - .out_csr_io_rsp_data (csr_io_rsp_data), - .out_csr_io_rsp_ready (csr_io_rsp_ready) + .csr_io_rsp_valid_out (csr_io_rsp_valid), + .csr_io_rsp_data_out (csr_io_rsp_data), + .csr_io_rsp_ready_out (csr_io_rsp_ready) ); assign busy = (| per_core_busy); @@ -281,72 +281,72 @@ module VX_cluster #( // L2 Cache /////////////////////////////////////////////////////////// - wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid; - wire[`L2NUM_REQUESTS-1:0] l2_core_req_rw; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] l2_core_req_byteen; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_core_req_addr; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data; - wire l2_core_req_ready; + wire[`L2NUM_REQUESTS-1:0] core_dram_req_valid; + wire[`L2NUM_REQUESTS-1:0] core_dram_req_rw; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_req_tag; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data; + wire core_dram_req_ready; - wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag; - wire l2_core_rsp_ready; + wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_valid; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag; + wire core_dram_rsp_ready; - wire[`NUM_CORES-1:0] l2_snp_fwdout_valid; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr; - wire[`NUM_CORES-1:0] l2_snp_fwdout_invalidate; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag; - wire[`NUM_CORES-1:0] l2_snp_fwdout_ready; + wire[`NUM_CORES-1:0] core_snp_fwdout_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; + wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; + wire[`NUM_CORES-1:0] core_snp_fwdout_ready; - wire[`NUM_CORES-1:0] l2_snp_fwdin_valid; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag; - wire[`NUM_CORES-1:0] l2_snp_fwdin_ready; + wire[`NUM_CORES-1:0] core_snp_fwdin_valid; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag; + wire[`NUM_CORES-1:0] core_snp_fwdin_ready; for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin - assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; - assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; + assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; + assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; - assign l2_core_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; - assign l2_core_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; + assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; + assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; - assign l2_core_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)]; - assign l2_core_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)]; + assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)]; + assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)]; - assign l2_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; - assign l2_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; + assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; + assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; - assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)]; - assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; + assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)]; + assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; - assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; - assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; + assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; + assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; - assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready; - assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready; + assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready; + assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready; - assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i] && l2_core_rsp_ready; - assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1] && l2_core_rsp_ready; + assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] && core_dram_rsp_ready; + assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] && core_dram_rsp_ready; - assign per_core_D_dram_rsp_data [(i/2)] = l2_core_rsp_data[i]; - assign per_core_I_dram_rsp_data [(i/2)] = l2_core_rsp_data[i+1]; + assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i]; + assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1]; - assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i]; - assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; + assign per_core_D_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i]; + assign per_core_I_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i+1]; - assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)]; - assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_invalidate [(i/2)] = l2_snp_fwdout_invalidate [(i/2)]; - assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)]; - assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; + assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)]; + assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; - assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; - assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; - assign per_core_snp_rsp_ready [(i/2)] = l2_snp_fwdin_ready [(i/2)]; + assign core_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; + assign core_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; + assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)]; end - assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); + assign core_dram_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); VX_cache #( .CACHE_ID (`L2CACHE_ID), @@ -357,11 +357,10 @@ module VX_cluster #( .NUM_REQUESTS (`L2NUM_REQUESTS), .CREQ_SIZE (`L2CREQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE), - .DFPQ_SIZE (`L2DFPQ_SIZE), + .DRPQ_SIZE (`L2DRPQ_SIZE), .SNRQ_SIZE (`L2SNRQ_SIZE), .CWBQ_SIZE (`L2CWBQ_SIZE), - .DWBQ_SIZE (`L2DWBQ_SIZE), - .DFQQ_SIZE (`L2DFQQ_SIZE), + .DREQ_SIZE (`L2DREQ_SIZE), .DRAM_ENABLE (1), .WRITE_ENABLE (1), .SNOOP_FORWARDING (1), @@ -378,19 +377,19 @@ module VX_cluster #( .reset (reset), // Core request - .core_req_valid (l2_core_req_valid), - .core_req_rw (l2_core_req_rw), - .core_req_byteen (l2_core_req_byteen), - .core_req_addr (l2_core_req_addr), - .core_req_data (l2_core_req_data), - .core_req_tag (l2_core_req_tag), - .core_req_ready (l2_core_req_ready), + .core_req_valid (core_dram_req_valid), + .core_req_rw (core_dram_req_rw), + .core_req_byteen (core_dram_req_byteen), + .core_req_addr (core_dram_req_addr), + .core_req_data (core_dram_req_data), + .core_req_tag (core_dram_req_tag), + .core_req_ready (core_dram_req_ready), // Core response - .core_rsp_valid (l2_core_rsp_valid), - .core_rsp_data (l2_core_rsp_data), - .core_rsp_tag (l2_core_rsp_tag), - .core_rsp_ready (l2_core_rsp_ready), + .core_rsp_valid (core_dram_rsp_valid), + .core_rsp_data (core_dram_rsp_data), + .core_rsp_tag (core_dram_rsp_tag), + .core_rsp_ready (core_dram_rsp_ready), // DRAM request .dram_req_valid (dram_req_valid), @@ -420,86 +419,86 @@ module VX_cluster #( .snp_rsp_ready (snp_rsp_ready), // Snoop forwarding out - .snp_fwdout_valid (l2_snp_fwdout_valid), - .snp_fwdout_addr (l2_snp_fwdout_addr), - .snp_fwdout_invalidate(l2_snp_fwdout_invalidate), - .snp_fwdout_tag (l2_snp_fwdout_tag), - .snp_fwdout_ready (l2_snp_fwdout_ready), + .snp_fwdout_valid (core_snp_fwdout_valid), + .snp_fwdout_addr (core_snp_fwdout_addr), + .snp_fwdout_invalidate(core_snp_fwdout_invalidate), + .snp_fwdout_tag (core_snp_fwdout_tag), + .snp_fwdout_ready (core_snp_fwdout_ready), // Snoop forwarding in - .snp_fwdin_valid (l2_snp_fwdin_valid), - .snp_fwdin_tag (l2_snp_fwdin_tag), - .snp_fwdin_ready (l2_snp_fwdin_ready) + .snp_fwdin_valid (core_snp_fwdin_valid), + .snp_fwdin_tag (core_snp_fwdin_tag), + .snp_fwdin_ready (core_snp_fwdin_ready) ); end else begin - wire[`L2NUM_REQUESTS-1:0] arb_dram_req_valid; - wire[`L2NUM_REQUESTS-1:0] arb_dram_req_rw; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] arb_dram_req_byteen; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_dram_req_addr; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_req_tag; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_req_data; - wire[`L2NUM_REQUESTS-1:0] arb_dram_req_ready; + wire[`L2NUM_REQUESTS-1:0] core_dram_req_valid; + wire[`L2NUM_REQUESTS-1:0] core_dram_req_rw; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_req_tag; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data; + wire[`L2NUM_REQUESTS-1:0] core_dram_req_ready; - wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_valid; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_rsp_data; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_rsp_tag; - wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_ready; + wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_valid; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag; + wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_ready; - wire[`NUM_CORES-1:0] arb_snp_fwdout_valid; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr; - wire[`NUM_CORES-1:0] arb_snp_fwdout_invalidate; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag; - wire[`NUM_CORES-1:0] arb_snp_fwdout_ready; + wire[`NUM_CORES-1:0] core_snp_fwdout_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; + wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; + wire[`NUM_CORES-1:0] core_snp_fwdout_ready; - wire[`NUM_CORES-1:0] arb_snp_fwdin_valid; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag; - wire[`NUM_CORES-1:0] arb_snp_fwdin_ready; + wire[`NUM_CORES-1:0] core_snp_fwdin_valid; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag; + wire[`NUM_CORES-1:0] core_snp_fwdin_ready; for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin - assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; - assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; + assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; + assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; - assign arb_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; - assign arb_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; + assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; + assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; - assign arb_dram_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)]; - assign arb_dram_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)]; + assign core_dram_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)]; + assign core_dram_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)]; - assign arb_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; - assign arb_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; + assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; + assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; - assign arb_dram_req_data [i] = per_core_D_dram_req_data[(i/2)]; - assign arb_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; + assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)]; + assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; - assign arb_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; - assign arb_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; + assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; + assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; - assign per_core_D_dram_req_ready [(i/2)] = arb_dram_req_ready[i]; - assign per_core_I_dram_req_ready [(i/2)] = arb_dram_req_ready[i+1]; + assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready[i]; + assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready[i+1]; - assign per_core_D_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i]; - assign per_core_I_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i+1]; + assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i]; + assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1]; - assign per_core_D_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i]; - assign per_core_I_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i+1]; + assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i]; + assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1]; - assign per_core_D_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i]; - assign per_core_I_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i+1]; + assign per_core_D_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i]; + assign per_core_I_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i+1]; - assign arb_dram_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; - assign arb_dram_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; + assign core_dram_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; + assign core_dram_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; - assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)]; - assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_invalidate [(i/2)] = arb_snp_fwdout_invalidate [(i/2)]; - assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)]; - assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; + assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)]; + assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; - assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; - assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; - assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)]; + assign core_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; + assign core_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; + assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)]; end if (`NUM_CORES > 1) begin @@ -525,26 +524,26 @@ module VX_cluster #( .snp_rsp_tag (snp_rsp_tag), .snp_rsp_ready (snp_rsp_ready), - .snp_fwdout_valid (arb_snp_fwdout_valid), - .snp_fwdout_addr (arb_snp_fwdout_addr), - .snp_fwdout_invalidate(arb_snp_fwdout_invalidate), - .snp_fwdout_tag (arb_snp_fwdout_tag), - .snp_fwdout_ready (arb_snp_fwdout_ready), + .snp_fwdout_valid (core_snp_fwdout_valid), + .snp_fwdout_addr (core_snp_fwdout_addr), + .snp_fwdout_invalidate(core_snp_fwdout_invalidate), + .snp_fwdout_tag (core_snp_fwdout_tag), + .snp_fwdout_ready (core_snp_fwdout_ready), - .snp_fwdin_valid (arb_snp_fwdin_valid), - .snp_fwdin_tag (arb_snp_fwdin_tag), - .snp_fwdin_ready (arb_snp_fwdin_ready) + .snp_fwdin_valid (core_snp_fwdin_valid), + .snp_fwdin_tag (core_snp_fwdin_tag), + .snp_fwdin_ready (core_snp_fwdin_ready) ); end else begin - assign arb_snp_fwdout_valid = snp_req_valid; - assign arb_snp_fwdout_addr = snp_req_addr; - assign arb_snp_fwdout_invalidate = snp_req_invalidate; - assign arb_snp_fwdout_tag = snp_req_tag; - assign snp_req_ready = arb_snp_fwdout_ready; + assign core_snp_fwdout_valid = snp_req_valid; + assign core_snp_fwdout_addr = snp_req_addr; + assign core_snp_fwdout_invalidate = snp_req_invalidate; + assign core_snp_fwdout_tag = snp_req_tag; + assign snp_req_ready = core_snp_fwdout_ready; - assign snp_rsp_valid = arb_snp_fwdin_valid; - assign snp_rsp_tag = arb_snp_fwdin_tag; - assign arb_snp_fwdin_ready = snp_rsp_ready; + assign snp_rsp_valid = core_snp_fwdin_valid; + assign snp_rsp_tag = core_snp_fwdin_tag; + assign core_snp_fwdin_ready = snp_rsp_ready; end VX_mem_arb #( @@ -557,34 +556,34 @@ module VX_cluster #( .reset (reset), // Core request - .in_mem_req_valid (arb_dram_req_valid), - .in_mem_req_rw (arb_dram_req_rw), - .in_mem_req_byteen (arb_dram_req_byteen), - .in_mem_req_addr (arb_dram_req_addr), - .in_mem_req_data (arb_dram_req_data), - .in_mem_req_tag (arb_dram_req_tag), - .in_mem_req_ready (arb_dram_req_ready), + .mem_req_valid_in (core_dram_req_valid), + .mem_req_rw_in (core_dram_req_rw), + .mem_req_byteen_in (core_dram_req_byteen), + .mem_req_addr_in (core_dram_req_addr), + .mem_req_data_in (core_dram_req_data), + .mem_req_tag_in (core_dram_req_tag), + .mem_req_ready_in (core_dram_req_ready), // Core response - .in_mem_rsp_valid (arb_dram_rsp_valid), - .in_mem_rsp_data (arb_dram_rsp_data), - .in_mem_rsp_tag (arb_dram_rsp_tag), - .in_mem_rsp_ready (arb_dram_rsp_ready), + .mem_rsp_valid_in (core_dram_rsp_valid), + .mem_rsp_data_in (core_dram_rsp_data), + .mem_rsp_tag_in (core_dram_rsp_tag), + .mem_rsp_ready_in (core_dram_rsp_ready), // DRAM request - .out_mem_req_valid (dram_req_valid), - .out_mem_req_rw (dram_req_rw), - .out_mem_req_byteen (dram_req_byteen), - .out_mem_req_addr (dram_req_addr), - .out_mem_req_data (dram_req_data), - .out_mem_req_tag (dram_req_tag), - .out_mem_req_ready (dram_req_ready), + .mem_req_valid_out (dram_req_valid), + .mem_req_rw_out (dram_req_rw), + .mem_req_byteen_out (dram_req_byteen), + .mem_req_addr_out (dram_req_addr), + .mem_req_data_out (dram_req_data), + .mem_req_tag_out (dram_req_tag), + .mem_req_ready_out (dram_req_ready), // DRAM response - .out_mem_rsp_valid (dram_rsp_valid), - .out_mem_rsp_tag (dram_rsp_tag), - .out_mem_rsp_data (dram_rsp_data), - .out_mem_rsp_ready (dram_rsp_ready) + .mem_rsp_valid_out (dram_rsp_valid), + .mem_rsp_tag_out (dram_rsp_tag), + .mem_rsp_data_out (dram_rsp_data), + .mem_rsp_ready_out (dram_rsp_ready) ); end diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 6f3492c7..aafb86d1 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -213,29 +213,24 @@ `define DMRVQ_SIZE `MAX(`NUM_WARPS*`NUM_THREADS, 8) `endif -// Dram Fill Rsp Queue Size -`ifndef DDFPQ_SIZE -`define DDFPQ_SIZE 8 -`endif - -// Snoop Req Queue Size -`ifndef DSNRQ_SIZE -`define DSNRQ_SIZE 8 -`endif - // Core Writeback Queue Size `ifndef DCWBQ_SIZE `define DCWBQ_SIZE `DCREQ_SIZE `endif -// Dram Writeback Queue Size -`ifndef DDWBQ_SIZE -`define DDWBQ_SIZE 4 +// DRAM Request Queue Size +`ifndef DDREQ_SIZE +`define DDREQ_SIZE 8 `endif -// Dram Fill Req Queue Size -`ifndef DDFQQ_SIZE -`define DDFQQ_SIZE `DCREQ_SIZE +// DRAM Response Queue Size +`ifndef DDRPQ_SIZE +`define DDRPQ_SIZE 8 +`endif + +// Snoop Req Queue Size +`ifndef DSNRQ_SIZE +`define DSNRQ_SIZE 8 `endif // Icache Configurable Knobs ================================================== @@ -270,24 +265,19 @@ `define IMRVQ_SIZE `MAX(`ICREQ_SIZE, 8) `endif -// Dram Fill Rsp Queue Size -`ifndef IDFPQ_SIZE -`define IDFPQ_SIZE 8 -`endif - // Core Writeback Queue Size `ifndef ICWBQ_SIZE `define ICWBQ_SIZE `ICREQ_SIZE `endif -// Dram Writeback Queue Size -`ifndef IDWBQ_SIZE -`define IDWBQ_SIZE 8 +// DRAM Request Queue Size +`ifndef IDREQ_SIZE +`define IDREQ_SIZE 8 `endif -// Dram Fill Req Queue Size -`ifndef IDFQQ_SIZE -`define IDFQQ_SIZE `ICREQ_SIZE +// DRAM Response Queue Size +`ifndef IDRPQ_SIZE +`define IDRPQ_SIZE 8 `endif // SM Configurable Knobs ====================================================== @@ -354,29 +344,24 @@ `define L2MRVQ_SIZE `MAX(`L2CREQ_SIZE, 8) `endif -// Dram Fill Rsp Queue Size -`ifndef L2DFPQ_SIZE -`define L2DFPQ_SIZE 8 -`endif - -// Snoop Req Queue Size -`ifndef L2SNRQ_SIZE -`define L2SNRQ_SIZE 8 -`endif - // Core Writeback Queue Size `ifndef L2CWBQ_SIZE `define L2CWBQ_SIZE `L2CREQ_SIZE `endif -// Dram Writeback Queue Size -`ifndef L2DWBQ_SIZE -`define L2DWBQ_SIZE 8 +// DRAM Request Queue Size +`ifndef L2DREQ_SIZE +`define L2DREQ_SIZE 8 `endif -// Dram Fill Req Queue Size -`ifndef L2DFQQ_SIZE -`define L2DFQQ_SIZE `L2CREQ_SIZE +// DRAM Response Queue Size +`ifndef L2DRPQ_SIZE +`define L2DRPQ_SIZE 8 +`endif + +// Snoop Req Queue Size +`ifndef L2SNRQ_SIZE +`define L2SNRQ_SIZE 8 `endif // L3cache Configurable Knobs ================================================= @@ -411,9 +396,19 @@ `define L3MRVQ_SIZE `MAX(`L3CREQ_SIZE, 8) `endif -// Dram Fill Rsp Queue Size -`ifndef L3DFPQ_SIZE -`define L3DFPQ_SIZE 8 +// Core Writeback Queue Size +`ifndef L3CWBQ_SIZE +`define L3CWBQ_SIZE `L3CREQ_SIZE +`endif + +// DRAM Request Queue Size +`ifndef L3DREQ_SIZE +`define L3DREQ_SIZE 8 +`endif + +// DRAM Response Queue Size +`ifndef L3DRPQ_SIZE +`define L3DRPQ_SIZE 8 `endif // Snoop Req Queue Size @@ -421,19 +416,4 @@ `define L3SNRQ_SIZE 8 `endif -// Core Writeback Queue Size -`ifndef L3CWBQ_SIZE -`define L3CWBQ_SIZE `L3CREQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef L3DWBQ_SIZE -`define L3DWBQ_SIZE 8 -`endif - -// Dram Fill Req Queue Size -`ifndef L3DFQQ_SIZE -`define L3DFQQ_SIZE `L3CREQ_SIZE -`endif - `endif diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index aa1032a8..689b21c2 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -260,7 +260,7 @@ module VX_core #( .core_dcache_req_if (arb_dcache_req_if), .core_dcache_rsp_if (arb_dcache_rsp_if), - // Dram <-> Dcache + // DRAM <-> Dcache .dcache_dram_req_if (dcache_dram_req_if), .dcache_dram_rsp_if (dcache_dram_rsp_if), .dcache_snp_req_if (dcache_snp_req_if), @@ -270,7 +270,7 @@ module VX_core #( .core_icache_req_if (core_icache_req_if), .core_icache_rsp_if (core_icache_rsp_if), - // Dram <-> Icache + // DRAM <-> Icache .icache_dram_req_if (icache_dram_req_if), .icache_dram_rsp_if (icache_dram_rsp_if) ); diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 5ee0831e..73fdc38c 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -17,7 +17,9 @@ module VX_csr_data #( input wire write_enable, input wire[`CSR_ADDR_BITS-1:0] write_addr, input wire[`NW_BITS-1:0] write_wid, - input wire[`CSR_WIDTH-1:0] write_data + input wire[`CSR_WIDTH-1:0] write_data, + + input wire busy ); reg [`CSR_WIDTH-1:0] csr_satp; reg [`CSR_WIDTH-1:0] csr_mstatus; @@ -86,7 +88,9 @@ module VX_csr_data #( csr_cycle <= 0; csr_instret <= 0; end else begin - csr_cycle <= csr_cycle + 1; + if (busy) begin + csr_cycle <= csr_cycle + 1; + end if (cmt_to_csr_if.valid) begin csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits); end diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index 04097c55..99028363 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -10,28 +10,28 @@ module VX_csr_io_arb #( input wire [REQS_BITS-1:0] request_id, // input requests - input wire in_csr_io_req_valid, - input wire [11:0] in_csr_io_req_addr, - input wire in_csr_io_req_rw, - input wire [31:0] in_csr_io_req_data, - output wire in_csr_io_req_ready, + input wire csr_io_req_valid_in, + input wire [11:0] csr_io_req_addr_in, + input wire csr_io_req_rw_in, + input wire [31:0] csr_io_req_data_in, + output wire csr_io_req_ready_in, // input response - input wire [NUM_REQUESTS-1:0] in_csr_io_rsp_valid, - input wire [NUM_REQUESTS-1:0][31:0] in_csr_io_rsp_data, - output wire [NUM_REQUESTS-1:0] in_csr_io_rsp_ready, + input wire [NUM_REQUESTS-1:0] csr_io_rsp_valid_in, + input wire [NUM_REQUESTS-1:0][31:0] csr_io_rsp_data_in, + output wire [NUM_REQUESTS-1:0] csr_io_rsp_ready_in, // output request - output wire [NUM_REQUESTS-1:0] out_csr_io_req_valid, - output wire [NUM_REQUESTS-1:0][11:0] out_csr_io_req_addr, - output wire [NUM_REQUESTS-1:0] out_csr_io_req_rw, - output wire [NUM_REQUESTS-1:0][31:0] out_csr_io_req_data, - input wire [NUM_REQUESTS-1:0] out_csr_io_req_ready, + output wire [NUM_REQUESTS-1:0] csr_io_req_valid_out, + output wire [NUM_REQUESTS-1:0][11:0] csr_io_req_addr_out, + output wire [NUM_REQUESTS-1:0] csr_io_req_rw_out, + output wire [NUM_REQUESTS-1:0][31:0] csr_io_req_data_out, + input wire [NUM_REQUESTS-1:0] csr_io_req_ready_out, // output response - output wire out_csr_io_rsp_valid, - output wire [31:0] out_csr_io_rsp_data, - input wire out_csr_io_rsp_ready + output wire csr_io_rsp_valid_out, + output wire [31:0] csr_io_rsp_data_out, + input wire csr_io_rsp_ready_out ); if (NUM_REQUESTS == 1) begin @@ -39,26 +39,26 @@ module VX_csr_io_arb #( `UNUSED_VAR (reset) `UNUSED_VAR (request_id) - assign out_csr_io_req_valid = in_csr_io_req_valid; - assign out_csr_io_req_rw = in_csr_io_req_rw; - assign out_csr_io_req_addr = in_csr_io_req_addr; - assign out_csr_io_req_data = in_csr_io_req_data; - assign in_csr_io_req_ready = out_csr_io_req_ready; + assign csr_io_req_valid_out = csr_io_req_valid_in; + assign csr_io_req_rw_out = csr_io_req_rw_in; + assign csr_io_req_addr_out = csr_io_req_addr_in; + assign csr_io_req_data_out = csr_io_req_data_in; + assign csr_io_req_ready_in = csr_io_req_ready_out; - assign out_csr_io_rsp_valid = in_csr_io_rsp_valid; - assign out_csr_io_rsp_data = in_csr_io_rsp_data; - assign in_csr_io_rsp_ready = out_csr_io_rsp_ready; + assign csr_io_rsp_valid_out = csr_io_rsp_valid_in; + assign csr_io_rsp_data_out = csr_io_rsp_data_in; + assign csr_io_rsp_ready_in = csr_io_rsp_ready_out; end else begin for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i)); - assign out_csr_io_req_rw[i] = in_csr_io_req_rw; - assign out_csr_io_req_addr[i] = in_csr_io_req_addr; - assign out_csr_io_req_data[i] = in_csr_io_req_data; + assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i)); + assign csr_io_req_rw_out[i] = csr_io_req_rw_in; + assign csr_io_req_addr_out[i] = csr_io_req_addr_in; + assign csr_io_req_data_out[i] = csr_io_req_data_in; end - assign in_csr_io_req_ready = out_csr_io_req_ready[request_id]; + assign csr_io_req_ready_in = csr_io_req_ready_out[request_id]; reg [REQS_BITS-1:0] bus_rsp_sel; @@ -67,17 +67,17 @@ module VX_csr_io_arb #( ) arbiter ( .clk (clk), .reset (reset), - .requests (in_csr_io_rsp_valid), + .requests (csr_io_rsp_valid_in), .grant_index (bus_rsp_sel), `UNUSED_PIN (grant_valid), `UNUSED_PIN (grant_onehot) ); - assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel]; - assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel]; + assign csr_io_rsp_valid_out = csr_io_rsp_valid_in [bus_rsp_sel]; + assign csr_io_rsp_data_out = csr_io_rsp_data_in [bus_rsp_sel]; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i)); + assign csr_io_rsp_ready_in[i] = csr_io_rsp_ready_out && (bus_rsp_sel == `REQS_BITS'(i)); end end diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 40b76d83..c249b7fb 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -13,7 +13,9 @@ module VX_csr_unit #( VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_req_if csr_req_if, - VX_exu_to_cmt_if csr_commit_if + VX_exu_to_cmt_if csr_commit_if, + + input wire busy ); VX_csr_req_if csr_pipe_req_if(); VX_exu_to_cmt_if csr_pipe_rsp_if(); @@ -53,7 +55,8 @@ module VX_csr_unit #( .write_enable (csr_we_s1), .write_addr (csr_addr_s1), .write_wid (csr_pipe_rsp_if.wid), - .write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]) + .write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]), + .busy (busy) ); wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr) diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 9586bcd6..6cf8c263 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -38,6 +38,7 @@ module VX_execute #( VX_fpu_to_cmt_if fpu_commit_if, VX_exu_to_cmt_if gpu_commit_if, + input wire busy, output wire ebreak ); @@ -69,11 +70,12 @@ module VX_execute #( .clk (clk), .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), - .csr_to_issue_if (csr_to_issue_if), + .csr_to_issue_if(csr_to_issue_if), .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), - .csr_commit_if (csr_commit_if) + .csr_commit_if (csr_commit_if), + .busy (busy) ); `ifdef EXT_M_ENABLE diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index e732e973..87a6606d 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -72,23 +72,25 @@ module VX_gpu_unit #( assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1); // output - - assign warp_ctl_if.valid = gpu_req_if.valid && gpu_commit_if.ready; - assign warp_ctl_if.wid = gpu_commit_if.wid; - assign warp_ctl_if.tmc = tmc; - assign warp_ctl_if.wspawn = wspawn; - assign warp_ctl_if.split = split; - assign warp_ctl_if.barrier = barrier; - - assign gpu_commit_if.valid = gpu_req_if.valid; - assign gpu_commit_if.wid = gpu_req_if.wid; - assign gpu_commit_if.tmask = gpu_req_if.tmask; - assign gpu_commit_if.PC = gpu_req_if.PC; - assign gpu_commit_if.rd = gpu_req_if.rd; - assign gpu_commit_if.wb = gpu_req_if.wb; + wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid; + + VX_generic_register #( + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE) + ) csr_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), + .out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) + ); + + assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready; + assign warp_ctl_if.wid = gpu_commit_if.wid; + // can accept new request? - assign gpu_req_if.ready = gpu_commit_if.ready; + assign gpu_req_if.ready = ~stall; `SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready); `SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid); diff --git a/hw/rtl/VX_io_arb.v b/hw/rtl/VX_io_arb.v index 43b40c41..eeb78488 100644 --- a/hw/rtl/VX_io_arb.v +++ b/hw/rtl/VX_io_arb.v @@ -14,52 +14,52 @@ module VX_io_arb #( input wire reset, // input requests - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] in_io_req_valid, - input wire [NUM_REQUESTS-1:0] in_io_req_rw, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] in_io_req_byteen, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] in_io_req_addr, - input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] in_io_req_data, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_io_req_tag, - output wire [NUM_REQUESTS-1:0] in_io_req_ready, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in, + input wire [NUM_REQUESTS-1:0] io_req_rw_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in, + input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in, + output wire [NUM_REQUESTS-1:0] io_req_ready_in, // input response - output wire [NUM_REQUESTS-1:0] in_io_rsp_valid, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_io_rsp_data, - output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_io_rsp_tag, - input wire [NUM_REQUESTS-1:0] in_io_rsp_ready, + output wire [NUM_REQUESTS-1:0] io_rsp_valid_in, + output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in, + output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in, + input wire [NUM_REQUESTS-1:0] io_rsp_ready_in, // output request - output wire [`NUM_THREADS-1:0] out_io_req_valid, - output wire out_io_req_rw, - output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] out_io_req_byteen, - output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] out_io_req_addr, - output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] out_io_req_data, - output wire [TAG_OUT_WIDTH-1:0] out_io_req_tag, - input wire out_io_req_ready, + output wire [`NUM_THREADS-1:0] io_req_valid_out, + output wire io_req_rw_out, + output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out, + output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out, + output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out, + output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out, + input wire io_req_ready_out, // output response - input wire out_io_rsp_valid, - input wire [WORD_WIDTH-1:0] out_io_rsp_data, - input wire [TAG_OUT_WIDTH-1:0] out_io_rsp_tag, - output wire out_io_rsp_ready + input wire io_rsp_valid_out, + input wire [WORD_WIDTH-1:0] io_rsp_data_out, + input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out, + output wire io_rsp_ready_out ); if (NUM_REQUESTS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - assign out_io_req_valid = in_io_req_valid; - assign out_io_req_rw = in_io_req_rw; - assign out_io_req_byteen = in_io_req_byteen; - assign out_io_req_addr = in_io_req_addr; - assign out_io_req_data = in_io_req_data; - assign out_io_req_tag = in_io_req_tag; - assign in_io_req_ready = out_io_req_ready; + assign io_req_valid_out = io_req_valid_in; + assign io_req_rw_out = io_req_rw_in; + assign io_req_byteen_out = io_req_byteen_in; + assign io_req_addr_out = io_req_addr_in; + assign io_req_data_out = io_req_data_in; + assign io_req_tag_out = io_req_tag_in; + assign io_req_ready_in = io_req_ready_out; - assign in_io_rsp_valid = out_io_rsp_valid; - assign in_io_rsp_data = out_io_rsp_data; - assign in_io_rsp_tag = out_io_rsp_tag; - assign out_io_rsp_ready = in_io_rsp_ready; + assign io_rsp_valid_in = io_rsp_valid_out; + assign io_rsp_data_in = io_rsp_data_out; + assign io_rsp_tag_in = io_rsp_tag_out; + assign io_rsp_ready_out = io_rsp_ready_in; end else begin @@ -68,7 +68,7 @@ module VX_io_arb #( wire [NUM_REQUESTS-1:0] valid_requests; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign valid_requests[i] = (| in_io_req_valid[i]); + assign valid_requests[i] = (| io_req_valid_in[i]); end VX_rr_arbiter #( @@ -82,25 +82,25 @@ module VX_io_arb #( `UNUSED_PIN (grant_onehot) ); - assign out_io_req_valid = in_io_req_valid [bus_req_sel]; - assign out_io_req_rw = in_io_req_rw [bus_req_sel]; - assign out_io_req_byteen = in_io_req_byteen [bus_req_sel]; - assign out_io_req_addr = in_io_req_addr [bus_req_sel]; - assign out_io_req_data = in_io_req_data [bus_req_sel]; - assign out_io_req_tag = {in_io_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)}; + assign io_req_valid_out = io_req_valid_in [bus_req_sel]; + assign io_req_rw_out = io_req_rw_in [bus_req_sel]; + assign io_req_byteen_out = io_req_byteen_in [bus_req_sel]; + assign io_req_addr_out = io_req_addr_in [bus_req_sel]; + assign io_req_data_out = io_req_data_in [bus_req_sel]; + assign io_req_tag_out = {io_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)}; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign in_io_req_ready[i] = out_io_req_ready && (bus_req_sel == REQS_BITS'(i)); + assign io_req_ready_in[i] = io_req_ready_out && (bus_req_sel == REQS_BITS'(i)); end - wire [REQS_BITS-1:0] bus_rsp_sel = out_io_rsp_tag[REQS_BITS-1:0]; + wire [REQS_BITS-1:0] bus_rsp_sel = io_rsp_tag_out[REQS_BITS-1:0]; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign in_io_rsp_valid[i] = out_io_rsp_valid && (bus_rsp_sel == REQS_BITS'(i)); - assign in_io_rsp_data[i] = out_io_rsp_data; - assign in_io_rsp_tag[i] = out_io_rsp_tag[REQS_BITS +: TAG_IN_WIDTH]; + assign io_rsp_valid_in[i] = io_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i)); + assign io_rsp_data_in[i] = io_rsp_data_out; + assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; end - assign out_io_rsp_ready = in_io_rsp_ready[bus_rsp_sel]; + assign io_rsp_ready_out = io_rsp_ready_in[bus_rsp_sel]; end diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index f41cfc22..00fabefd 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -110,7 +110,7 @@ module VX_lsu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .SIZE (`LSUQ_SIZE) - ) lsu_cam ( + ) lsu_cam ( .clk (clk), .reset (reset), .write_addr (req_tag), diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index f8c5ac37..502b3bfc 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -14,52 +14,52 @@ module VX_mem_arb #( input wire reset, // input requests - input wire [NUM_REQUESTS-1:0] in_mem_req_valid, - input wire [NUM_REQUESTS-1:0] in_mem_req_rw, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] in_mem_req_byteen, - input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] in_mem_req_addr, - input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_req_data, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_req_tag, - output wire [NUM_REQUESTS-1:0] in_mem_req_ready, + input wire [NUM_REQUESTS-1:0] mem_req_valid_in, + input wire [NUM_REQUESTS-1:0] mem_req_rw_in, + input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in, + input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in, + input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in, + output wire [NUM_REQUESTS-1:0] mem_req_ready_in, // input response - output wire [NUM_REQUESTS-1:0] in_mem_rsp_valid, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_rsp_data, - output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_rsp_tag, - input wire [NUM_REQUESTS-1:0] in_mem_rsp_ready, + output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in, + output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in, + output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in, + input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in, // output request - output wire out_mem_req_valid, - output wire out_mem_req_rw, - output wire [WORD_SIZE-1:0] out_mem_req_byteen, - output wire [ADDR_WIDTH-1:0] out_mem_req_addr, - output wire [WORD_WIDTH-1:0] out_mem_req_data, - output wire [TAG_OUT_WIDTH-1:0] out_mem_req_tag, - input wire out_mem_req_ready, + output wire mem_req_valid_out, + output wire mem_req_rw_out, + output wire [WORD_SIZE-1:0] mem_req_byteen_out, + output wire [ADDR_WIDTH-1:0] mem_req_addr_out, + output wire [WORD_WIDTH-1:0] mem_req_data_out, + output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out, + input wire mem_req_ready_out, // output response - input wire out_mem_rsp_valid, - input wire [WORD_WIDTH-1:0] out_mem_rsp_data, - input wire [TAG_OUT_WIDTH-1:0] out_mem_rsp_tag, - output wire out_mem_rsp_ready + input wire mem_rsp_valid_out, + input wire [WORD_WIDTH-1:0] mem_rsp_data_out, + input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out, + output wire mem_rsp_ready_out ); if (NUM_REQUESTS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - assign out_mem_req_valid = in_mem_req_valid; - assign out_mem_req_rw = in_mem_req_rw; - assign out_mem_req_byteen = in_mem_req_byteen; - assign out_mem_req_addr = in_mem_req_addr; - assign out_mem_req_data = in_mem_req_data; - assign out_mem_req_tag = in_mem_req_tag; - assign in_mem_req_ready = out_mem_req_ready; + assign mem_req_valid_out = mem_req_valid_in; + assign mem_req_rw_out = mem_req_rw_in; + assign mem_req_byteen_out = mem_req_byteen_in; + assign mem_req_addr_out = mem_req_addr_in; + assign mem_req_data_out = mem_req_data_in; + assign mem_req_tag_out = mem_req_tag_in; + assign mem_req_ready_in = mem_req_ready_out; - assign in_mem_rsp_valid = out_mem_rsp_valid; - assign in_mem_rsp_data = out_mem_rsp_data; - assign in_mem_rsp_tag = out_mem_rsp_tag; - assign out_mem_rsp_ready = in_mem_rsp_ready; + assign mem_rsp_valid_in = mem_rsp_valid_out; + assign mem_rsp_data_in = mem_rsp_data_out; + assign mem_rsp_tag_in = mem_rsp_tag_out; + assign mem_rsp_ready_out = mem_rsp_ready_in; end else begin @@ -70,31 +70,31 @@ module VX_mem_arb #( ) arbiter ( .clk (clk), .reset (reset), - .requests (in_mem_req_valid), + .requests (mem_req_valid_in), .grant_index (bus_req_sel), `UNUSED_PIN (grant_valid), `UNUSED_PIN (grant_onehot) ); - assign out_mem_req_valid = in_mem_req_valid [bus_req_sel]; - assign out_mem_req_rw = in_mem_req_rw [bus_req_sel]; - assign out_mem_req_byteen = in_mem_req_byteen [bus_req_sel]; - assign out_mem_req_addr = in_mem_req_addr [bus_req_sel]; - assign out_mem_req_data = in_mem_req_data [bus_req_sel]; - assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)}; + assign mem_req_valid_out = mem_req_valid_in [bus_req_sel]; + assign mem_req_rw_out = mem_req_rw_in [bus_req_sel]; + assign mem_req_byteen_out = mem_req_byteen_in [bus_req_sel]; + assign mem_req_addr_out = mem_req_addr_in [bus_req_sel]; + assign mem_req_data_out = mem_req_data_in [bus_req_sel]; + assign mem_req_tag_out = {mem_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)}; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i)); + assign mem_req_ready_in[i] = mem_req_ready_out && (bus_req_sel == REQS_BITS'(i)); end - wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0]; + wire [REQS_BITS-1:0] bus_rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0]; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i)); - assign in_mem_rsp_data[i] = out_mem_rsp_data; - assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH]; + assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i)); + assign mem_rsp_data_in[i] = mem_rsp_data_out; + assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; end - assign out_mem_rsp_ready = in_mem_rsp_ready[bus_rsp_sel]; + assign mem_rsp_ready_out = mem_rsp_ready_in[bus_rsp_sel]; end diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index d7a4ffd7..40fec965 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -12,7 +12,7 @@ module VX_mem_unit # ( VX_cache_core_req_if core_dcache_req_if, VX_cache_core_rsp_if core_dcache_rsp_if, - // Dram <-> Dcache + // DRAM <-> Dcache VX_cache_dram_req_if dcache_dram_req_if, VX_cache_dram_rsp_if dcache_dram_rsp_if, VX_cache_snp_req_if dcache_snp_req_if, @@ -22,7 +22,7 @@ module VX_mem_unit # ( VX_cache_core_req_if core_icache_req_if, VX_cache_core_rsp_if core_icache_rsp_if, - // Dram <-> Icache + // DRAM <-> Icache VX_cache_dram_req_if icache_dram_req_if, VX_cache_dram_rsp_if icache_dram_rsp_if ); @@ -65,11 +65,10 @@ module VX_mem_unit # ( .NUM_REQUESTS (`SNUM_REQUESTS), .CREQ_SIZE (`SCREQ_SIZE), .MRVQ_SIZE (8), - .DFPQ_SIZE (1), + .DRPQ_SIZE (1), .SNRQ_SIZE (1), .CWBQ_SIZE (`SCWBQ_SIZE), - .DWBQ_SIZE (1), - .DFQQ_SIZE (1), + .DREQ_SIZE (1), .SNOOP_FORWARDING (0), .DRAM_ENABLE (0), .WRITE_ENABLE (1), @@ -146,11 +145,10 @@ module VX_mem_unit # ( .NUM_REQUESTS (`DNUM_REQUESTS), .CREQ_SIZE (`DCREQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE), - .DFPQ_SIZE (`DDFPQ_SIZE), + .DRPQ_SIZE (`DDRPQ_SIZE), .SNRQ_SIZE (`DSNRQ_SIZE), .CWBQ_SIZE (`DCWBQ_SIZE), - .DWBQ_SIZE (`DDWBQ_SIZE), - .DFQQ_SIZE (`DDFQQ_SIZE), + .DREQ_SIZE (`DDREQ_SIZE), .SNOOP_FORWARDING (0), .DRAM_ENABLE (1), .WRITE_ENABLE (1), @@ -228,11 +226,10 @@ module VX_mem_unit # ( .NUM_REQUESTS (`INUM_REQUESTS), .CREQ_SIZE (`ICREQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE), - .DFPQ_SIZE (`IDFPQ_SIZE), + .DRPQ_SIZE (`IDRPQ_SIZE), .SNRQ_SIZE (1), .CWBQ_SIZE (`ICWBQ_SIZE), - .DWBQ_SIZE (`IDWBQ_SIZE), - .DFQQ_SIZE (`IDFQQ_SIZE), + .DREQ_SIZE (`IDREQ_SIZE), .SNOOP_FORWARDING (0), .DRAM_ENABLE (1), .WRITE_ENABLE (0), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index b6a5444c..efb75497 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -200,6 +200,7 @@ module VX_pipeline #( .fpu_commit_if (fpu_commit_if), .gpu_commit_if (gpu_commit_if), + .busy (busy), .ebreak (ebreak) ); diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 92fee56b..27344d77 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -7,6 +7,8 @@ `define SCOPE_ASSIGN(d,s) assign scope_``d = s +`define SCOPE_SIZE 4096 + `else `define SCOPE_IO_VX_icache_stage diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 4a458a01..8dacb37a 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -135,7 +135,7 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; - wire l3_core_req_ready; + wire cluster_dram_req_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; @@ -196,7 +196,7 @@ module Vortex ( .dram_req_addr (per_cluster_dram_req_addr [i]), .dram_req_data (per_cluster_dram_req_data [i]), .dram_req_tag (per_cluster_dram_req_tag [i]), - .dram_req_ready (l3_core_req_ready), + .dram_req_ready (cluster_dram_req_ready), .dram_rsp_valid (per_cluster_dram_rsp_valid [i]), .dram_rsp_data (per_cluster_dram_rsp_data [i]), @@ -252,34 +252,34 @@ module Vortex ( .reset (reset), // input requests - .in_io_req_valid (per_cluster_io_req_valid), - .in_io_req_rw (per_cluster_io_req_rw), - .in_io_req_byteen (per_cluster_io_req_byteen), - .in_io_req_addr (per_cluster_io_req_addr), - .in_io_req_data (per_cluster_io_req_data), - .in_io_req_tag (per_cluster_io_req_tag), - .in_io_req_ready (per_cluster_io_req_ready), + .io_req_valid_in (per_cluster_io_req_valid), + .io_req_rw_in (per_cluster_io_req_rw), + .io_req_byteen_in (per_cluster_io_req_byteen), + .io_req_addr_in (per_cluster_io_req_addr), + .io_req_data_in (per_cluster_io_req_data), + .io_req_tag_in (per_cluster_io_req_tag), + .io_req_ready_in (per_cluster_io_req_ready), // input responses - .in_io_rsp_valid (per_cluster_io_rsp_valid), - .in_io_rsp_data (per_cluster_io_rsp_data), - .in_io_rsp_tag (per_cluster_io_rsp_tag), - .in_io_rsp_ready (per_cluster_io_rsp_ready), + .io_rsp_valid_in (per_cluster_io_rsp_valid), + .io_rsp_data_in (per_cluster_io_rsp_data), + .io_rsp_tag_in (per_cluster_io_rsp_tag), + .io_rsp_ready_in (per_cluster_io_rsp_ready), // output request - .out_io_req_valid (io_req_valid), - .out_io_req_rw (io_req_rw), - .out_io_req_byteen (io_req_byteen), - .out_io_req_addr (io_req_addr), - .out_io_req_data (io_req_data), - .out_io_req_tag (io_req_tag), - .out_io_req_ready (io_req_ready), + .io_req_valid_out (io_req_valid), + .io_req_rw_out (io_req_rw), + .io_req_byteen_out (io_req_byteen), + .io_req_addr_out (io_req_addr), + .io_req_data_out (io_req_data), + .io_req_tag_out (io_req_tag), + .io_req_ready_out (io_req_ready), // output response - .out_io_rsp_valid (io_rsp_valid), - .out_io_rsp_tag (io_rsp_tag), - .out_io_rsp_data (io_rsp_data), - .out_io_rsp_ready (io_rsp_ready) + .io_rsp_valid_out (io_rsp_valid), + .io_rsp_tag_out (io_rsp_tag), + .io_rsp_data_out (io_rsp_data), + .io_rsp_ready_out (io_rsp_ready) ); VX_csr_io_arb #( @@ -291,28 +291,28 @@ module Vortex ( .request_id (csr_io_request_id), // input requests - .in_csr_io_req_valid (csr_io_req_valid), - .in_csr_io_req_addr (csr_io_req_addr), - .in_csr_io_req_rw (csr_io_req_rw), - .in_csr_io_req_data (csr_io_req_data), - .in_csr_io_req_ready (csr_io_req_ready), + .csr_io_req_valid_in (csr_io_req_valid), + .csr_io_req_addr_in (csr_io_req_addr), + .csr_io_req_rw_in (csr_io_req_rw), + .csr_io_req_data_in (csr_io_req_data), + .csr_io_req_ready_in (csr_io_req_ready), // input responses - .in_csr_io_rsp_valid (per_cluster_csr_io_rsp_valid), - .in_csr_io_rsp_data (per_cluster_csr_io_rsp_data), - .in_csr_io_rsp_ready (per_cluster_csr_io_rsp_ready), + .csr_io_rsp_valid_in (per_cluster_csr_io_rsp_valid), + .csr_io_rsp_data_in (per_cluster_csr_io_rsp_data), + .csr_io_rsp_ready_in (per_cluster_csr_io_rsp_ready), // output request - .out_csr_io_req_valid (per_cluster_csr_io_req_valid), - .out_csr_io_req_addr (per_cluster_csr_io_req_addr), - .out_csr_io_req_rw (per_cluster_csr_io_req_rw), - .out_csr_io_req_data (per_cluster_csr_io_req_data), - .out_csr_io_req_ready (per_cluster_csr_io_req_ready), + .csr_io_req_valid_out (per_cluster_csr_io_req_valid), + .csr_io_req_addr_out (per_cluster_csr_io_req_addr), + .csr_io_req_rw_out (per_cluster_csr_io_req_rw), + .csr_io_req_data_out (per_cluster_csr_io_req_data), + .csr_io_req_ready_out (per_cluster_csr_io_req_ready), // output response - .out_csr_io_rsp_valid (csr_io_rsp_valid), - .out_csr_io_rsp_data (csr_io_rsp_data), - .out_csr_io_rsp_ready (csr_io_rsp_ready) + .csr_io_rsp_valid_out (csr_io_rsp_valid), + .csr_io_rsp_data_out (csr_io_rsp_data), + .csr_io_rsp_ready_out (csr_io_rsp_ready) ); assign busy = (| per_cluster_busy); @@ -320,56 +320,56 @@ module Vortex ( // L3 Cache /////////////////////////////////////////////////////////// - wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid; - wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag; + wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_valid; + wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_rw; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] cluster_dram_req_byteen; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_dram_req_addr; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_req_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_req_tag; - wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data; - wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; - wire l3_core_rsp_ready; + wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag; + wire cluster_dram_rsp_ready; - wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; - wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; - wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; + wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_snp_fwdout_addr; + wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_invalidate; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdout_tag; + wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_ready; - wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag; - wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready; + wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdin_tag; + wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_ready; for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin // Core Request - assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i]; - assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i]; - assign l3_core_req_byteen [i] = per_cluster_dram_req_byteen[i]; - assign l3_core_req_addr [i] = per_cluster_dram_req_addr [i]; - assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i]; - assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; + assign cluster_dram_req_valid [i] = per_cluster_dram_req_valid [i]; + assign cluster_dram_req_rw [i] = per_cluster_dram_req_rw [i]; + assign cluster_dram_req_byteen [i] = per_cluster_dram_req_byteen[i]; + assign cluster_dram_req_addr [i] = per_cluster_dram_req_addr [i]; + assign cluster_dram_req_tag [i] = per_cluster_dram_req_tag [i]; + assign cluster_dram_req_data [i] = per_cluster_dram_req_data [i]; // Core Response - assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i] && l3_core_rsp_ready; - assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i]; - assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i]; + assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] && cluster_dram_rsp_ready; + assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i]; + assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i]; // Snoop Forwarding out - assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i]; - assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i]; - assign per_cluster_snp_req_invalidate [i] = l3_snp_fwdout_invalidate[i]; - assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i]; - assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i]; + assign per_cluster_snp_req_valid [i] = cluster_snp_fwdout_valid[i]; + assign per_cluster_snp_req_addr [i] = cluster_snp_fwdout_addr[i]; + assign per_cluster_snp_req_invalidate [i] = cluster_snp_fwdout_invalidate[i]; + assign per_cluster_snp_req_tag [i] = cluster_snp_fwdout_tag[i]; + assign cluster_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i]; // Snoop Forwarding in - assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i]; - assign l3_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i]; - assign per_cluster_snp_rsp_ready [i] = l3_snp_fwdin_ready [i]; + assign cluster_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i]; + assign cluster_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i]; + assign per_cluster_snp_rsp_ready [i] = cluster_snp_fwdin_ready [i]; end - assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); + assign cluster_dram_rsp_ready = (& per_cluster_dram_rsp_ready); VX_cache #( .CACHE_ID (`L3CACHE_ID), @@ -380,11 +380,10 @@ module Vortex ( .NUM_REQUESTS (`L3NUM_REQUESTS), .CREQ_SIZE (`L3CREQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE), - .DFPQ_SIZE (`L3DFPQ_SIZE), + .DRPQ_SIZE (`L3DRPQ_SIZE), .SNRQ_SIZE (`L3SNRQ_SIZE), .CWBQ_SIZE (`L3CWBQ_SIZE), - .DWBQ_SIZE (`L3DWBQ_SIZE), - .DFQQ_SIZE (`L3DFQQ_SIZE), + .DREQ_SIZE (`L3DREQ_SIZE), .DRAM_ENABLE (1), .WRITE_ENABLE (1), .SNOOP_FORWARDING (1), @@ -401,19 +400,19 @@ module Vortex ( .reset (reset), // Core request - .core_req_valid (l3_core_req_valid), - .core_req_rw (l3_core_req_rw), - .core_req_byteen (l3_core_req_byteen), - .core_req_addr (l3_core_req_addr), - .core_req_data (l3_core_req_data), - .core_req_tag (l3_core_req_tag), - .core_req_ready (l3_core_req_ready), + .core_req_valid (cluster_dram_req_valid), + .core_req_rw (cluster_dram_req_rw), + .core_req_byteen (cluster_dram_req_byteen), + .core_req_addr (cluster_dram_req_addr), + .core_req_data (cluster_dram_req_data), + .core_req_tag (cluster_dram_req_tag), + .core_req_ready (cluster_dram_req_ready), // Core response - .core_rsp_valid (l3_core_rsp_valid), - .core_rsp_data (l3_core_rsp_data), - .core_rsp_tag (l3_core_rsp_tag), - .core_rsp_ready (l3_core_rsp_ready), + .core_rsp_valid (cluster_dram_rsp_valid), + .core_rsp_data (cluster_dram_rsp_data), + .core_rsp_tag (cluster_dram_rsp_tag), + .core_rsp_ready (cluster_dram_rsp_ready), // DRAM request .dram_req_valid (dram_req_valid), @@ -443,16 +442,16 @@ module Vortex ( .snp_rsp_ready (snp_rsp_ready), // Snoop forwarding out - .snp_fwdout_valid (l3_snp_fwdout_valid), - .snp_fwdout_addr (l3_snp_fwdout_addr), - .snp_fwdout_invalidate(l3_snp_fwdout_invalidate), - .snp_fwdout_tag (l3_snp_fwdout_tag), - .snp_fwdout_ready (l3_snp_fwdout_ready), + .snp_fwdout_valid (cluster_snp_fwdout_valid), + .snp_fwdout_addr (cluster_snp_fwdout_addr), + .snp_fwdout_invalidate(cluster_snp_fwdout_invalidate), + .snp_fwdout_tag (cluster_snp_fwdout_tag), + .snp_fwdout_ready (cluster_snp_fwdout_ready), // Snoop forwarding in - .snp_fwdin_valid (l3_snp_fwdin_valid), - .snp_fwdin_tag (l3_snp_fwdin_tag), - .snp_fwdin_ready (l3_snp_fwdin_ready) + .snp_fwdin_valid (cluster_snp_fwdin_valid), + .snp_fwdin_tag (cluster_snp_fwdin_tag), + .snp_fwdin_ready (cluster_snp_fwdin_ready) ); end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 4ff264cb..23ea49cd 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -19,18 +19,16 @@ module VX_bank #( parameter CREQ_SIZE = 0, // Miss Reserv Queue Knob parameter MRVQ_SIZE = 0, - // Dram Fill Rsp Queue Size - parameter DFPQ_SIZE = 0, + // DRAM Response Queue Size + parameter DRPQ_SIZE = 0, // Snoop Req Queue Size parameter SNRQ_SIZE = 0, // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size parameter CWBQ_SIZE = 0, - // Dram Writeback Queue Size - parameter DWBQ_SIZE = 0, - // Dram Fill Req Queue Size - parameter DFQQ_SIZE = 0, + // DRAM Request Queue Size + parameter DREQ_SIZE = 0, // Enable cache writeable parameter WRITE_ENABLE = 0, @@ -65,40 +63,37 @@ module VX_bank #( output wire core_req_ready, // Core Response - output wire core_rsp_valid, - output wire [`REQS_BITS-1:0] core_rsp_tid, - output wire [`WORD_WIDTH-1:0] core_rsp_data, - output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, - input wire core_rsp_ready, + output wire core_rsp_valid, + output wire [`REQS_BITS-1:0] core_rsp_tid, + output wire [`WORD_WIDTH-1:0] core_rsp_data, + output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, + input wire core_rsp_ready, - // Dram Fill Requests - output wire dram_fill_req_valid, - output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr, - input wire dram_fill_req_ready, + // DRAM request + output wire dram_req_valid, + output wire dram_req_rw, + output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, + output wire [`LINE_ADDR_WIDTH-1:0] dram_req_addr, + output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, + input wire dram_req_ready, + + // DRAM response + input wire dram_rsp_valid, + input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr, + input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, + output wire dram_rsp_ready, - // Dram Fill Response - input wire dram_fill_rsp_valid, - input wire [`BANK_LINE_WIDTH-1:0] dram_fill_rsp_data, - input wire [`LINE_ADDR_WIDTH-1:0] dram_fill_rsp_addr, - output wire dram_fill_rsp_ready, + // Snoop Request + input wire snp_req_valid, + input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, + input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, + output wire snp_req_ready, - // Dram WB Requests - output wire dram_wb_req_valid, - output wire [BANK_LINE_SIZE-1:0] dram_wb_req_byteen, - output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr, - output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data, - input wire dram_wb_req_ready, - - // Snp Request - input wire snp_req_valid, - input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, - input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - output wire snp_rsp_valid, - output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready + // Snoop Response + output wire snp_rsp_valid, + output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready ); `ifdef DBG_CORE_REQ_INFO @@ -137,51 +132,51 @@ module VX_bank #( wire snrq_invalidate_st0; wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0; + wire snp_req_fire = snp_req_valid && snp_req_ready; + assign snp_req_ready = !snrq_full; + VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH), .SIZE(SNRQ_SIZE) ) snp_req_queue ( .clk (clk), .reset (reset), - .push (snp_req_valid && snp_req_ready), - .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), + .push (snp_req_fire), .pop (snrq_pop), + .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), .data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}), .empty (snrq_empty), .full (snrq_full), `UNUSED_PIN (size) ); - assign snp_req_ready = !snrq_full; - wire dfpq_pop; wire dfpq_empty; wire dfpq_full; wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; + wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready; + assign dram_rsp_ready = !dfpq_full; + VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)), - .SIZE(DFPQ_SIZE) + .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), + .SIZE(DRPQ_SIZE) ) dfp_queue ( .clk (clk), .reset (reset), - .push (dram_fill_rsp_valid && dram_fill_rsp_ready), - .data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}), + .push (dram_rsp_fire), .pop (dfpq_pop), + .data_in ({dram_rsp_addr, dram_rsp_data}), .data_out({dfpq_addr_st0, dfpq_filldata_st0}), .empty (dfpq_empty), .full (dfpq_full), `UNUSED_PIN (size) ); - assign dram_fill_rsp_ready = !dfpq_full; - wire reqq_pop; - wire reqq_push; wire reqq_empty; wire reqq_full; - wire reqq_req_st0; wire [`REQS_BITS-1:0] reqq_req_tid_st0; wire reqq_req_rw_st0; wire [WORD_SIZE-1:0] reqq_req_byteen_st0; @@ -191,6 +186,9 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0; wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0; + wire core_req_fire = (| core_req_valid) && core_req_ready; + assign core_req_ready = !reqq_full; + VX_bank_core_req_arb #( .WORD_SIZE (WORD_SIZE), .NUM_REQUESTS (NUM_REQUESTS), @@ -201,7 +199,7 @@ module VX_bank #( .clk (clk), .reset (reset), // Enqueue - .reqq_push (reqq_push), + .reqq_push (core_req_fire), .bank_valids (core_req_valid), .bank_rw (core_req_rw), .bank_byteen (core_req_byteen), @@ -211,7 +209,6 @@ module VX_bank #( // Dequeue .reqq_pop (reqq_pop), - .reqq_req_st0 (reqq_req_st0), .reqq_req_tid_st0 (reqq_req_tid_st0), .reqq_req_rw_st0 (reqq_req_rw_st0), .reqq_req_byteen_st0 (reqq_req_byteen_st0), @@ -222,26 +219,21 @@ module VX_bank #( .reqq_full (reqq_full) ); - assign core_req_ready = !reqq_full; - assign reqq_push = (| core_req_valid) && core_req_ready; - - wire mrvq_pop; - wire mrvq_full; - wire mrvq_stop; - wire mrvq_valid_st0; - wire[`REQS_BITS-1:0] mrvq_tid_st0; - wire [`LINE_ADDR_WIDTH-1:0] mrvq_addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] mrvq_wsel_st0; - wire [`WORD_WIDTH-1:0] mrvq_writeword_st0; - wire [`REQ_TAG_WIDTH-1:0] mrvq_tag_st0; - wire mrvq_rw_st0; - wire [WORD_SIZE-1:0] mrvq_byteen_st0; - wire mrvq_is_snp_st0; - wire mrvq_snp_invalidate_st0; - - wire mrvq_pending_hazard_st1; - wire st2_pending_hazard_st1; - wire force_request_miss_st1; + wire msrq_pop; + wire msrq_full; + wire msrq_almfull; + wire msrq_valid_st0; + wire[`REQS_BITS-1:0] msrq_tid_st0; + wire [`LINE_ADDR_WIDTH-1:0] msrq_addr_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] msrq_wsel_st0; + wire [`WORD_WIDTH-1:0] msrq_writeword_st0; + wire [`REQ_TAG_WIDTH-1:0] msrq_tag_st0; + wire msrq_rw_st0; + wire [WORD_SIZE-1:0] msrq_byteen_st0; + wire msrq_is_snp_st0; + wire msrq_snp_invalidate_st0; + wire msrq_pending_hazard_st0; + wire msrq_pending_hazard_st1; wire[`REQS_BITS-1:0] miss_add_tid; wire[`REQ_TAG_WIDTH-1:0] miss_add_tag; @@ -249,48 +241,41 @@ module VX_bank #( wire[WORD_SIZE-1:0] miss_add_byteen; wire[`LINE_ADDR_WIDTH-1:0] addr_st2; - wire is_fill_st2; - wire recover_mrvq_state_st2; + wire is_msrq_miss_st2; - wire mrvq_push_stall; + wire msrq_push_stall; wire cwbq_push_stall; wire dwbq_push_stall; - wire dram_fill_req_stall; wire stall_bank_pipe; wire is_fill_st1; -`DEBUG_BEGIN - wire going_to_write_st1; -`DEBUG_END - //determines if the if it is time to pop a req from the queues - //unqual - the req does NOT qualify for execution in the bank. - wire mrvq_pop_unqual = mrvq_valid_st0; - wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty; - wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1; - wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty && !reqq_req_st0; // if there's any reqq_req, don't schedule snrq. + // determine which queue to pop next in piority order + wire msrq_pop_unqual = msrq_valid_st0; + wire dfpq_pop_unqual = !msrq_pop_unqual && !dfpq_empty; + wire reqq_pop_unqual = !msrq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && !msrq_almfull; + wire snrq_pop_unqual = !msrq_pop_unqual && !dfpq_pop_unqual && !reqq_pop_unqual && !snrq_empty && !msrq_almfull; - assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2; + assign msrq_pop = msrq_pop_unqual && !stall_bank_pipe + && !is_msrq_miss_st2; // stop if previous request was a miss assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe; assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe; assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe; - //signals to progress to the next stage - wire qual_is_fill_st0; - wire qual_valid_st0; - wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] qual_wsel_st0; - wire qual_is_mrvq_st0; + wire is_fill_st0; + wire valid_st0; + wire [`LINE_ADDR_WIDTH-1:0] addr_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0; + wire is_msrq_st0; - wire [`WORD_WIDTH-1:0] qual_writeword_st0; - wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0; - wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0; - wire qual_going_to_write_st0; - wire qual_is_snp_st0; - wire qual_snp_invalidate_st0; + wire [`WORD_WIDTH-1:0] writeword_st0; + wire [`BANK_LINE_WIDTH-1:0] writedata_st0; + wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st0; + wire is_snp_st0; + wire snp_invalidate_st0; + wire msrq_pending_hazard_unqual_st0; - //signals to be *used* in the next stage wire valid_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st1; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; @@ -299,64 +284,56 @@ module VX_bank #( wire [`BANK_LINE_WIDTH-1:0] writedata_st1; wire is_snp_st1; wire snp_invalidate_st1; - wire is_mrvq_st1; + wire is_msrq_st1; + wire msrq_pending_hazard_st1; - //Determine which req will progress to the next stage - assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request + assign is_msrq_st0 = msrq_pop_unqual; - assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped + assign is_fill_st0 = dfpq_pop_unqual; - //Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req - assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 : - dfpq_pop_unqual ? dfpq_addr_st0 : - reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : - snrq_pop_unqual ? snrq_addr_st0 : - 0; + assign valid_st0 = dfpq_pop || msrq_pop || reqq_pop || snrq_pop; + + assign addr_st0 = msrq_pop_unqual ? msrq_addr_st0 : + dfpq_pop_unqual ? dfpq_addr_st0 : + reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : + snrq_pop_unqual ? snrq_addr_st0 : + 0; - //Word select does ? Does this just pick a specific word from the line instead of the whole line? if (`WORD_SELECT_WIDTH != 0) begin - assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : - mrvq_pop_unqual ? mrvq_wsel_st0 : - 0; + assign wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : + msrq_pop_unqual ? msrq_wsel_st0 : + 0; end else begin - `UNUSED_VAR(mrvq_wsel_st0) - assign qual_wsel_st0 = 0; + `UNUSED_VAR(msrq_wsel_st0) + assign wsel_st0 = 0; end - //if you are filling from dram then that is the write data? What about core? What is 57? - assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57; + assign writedata_st0 = dfpq_filldata_st0; - //note that this is stored even if a DRAM fill is processed - assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} : - reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} : - snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : - 0; - - - assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 : - (mrvq_pop_unqual && mrvq_rw_st0) ? 1 : - (reqq_pop_unqual && reqq_req_rw_st0) ? 1 : - 0; + assign inst_meta_st0 = msrq_pop_unqual ? {`REQ_TAG_WIDTH'(msrq_tag_st0) , msrq_rw_st0, msrq_byteen_st0, msrq_tid_st0} : + reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} : + snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : + 0; - //snp signals check to see if the miss reserve as a snp in it first. - assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 : - snrq_pop_unqual ? 1 : - 0; - //if we are popping from the miss reserve then assign to the mrvq invalidate. If not and popping from the snoop queue use the snoop invalidate. Else this is 0 - assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 : - snrq_pop_unqual ? snrq_invalidate_st0 : - 0; - //choose which word of the lien is being written to - assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 : + assign is_snp_st0 = msrq_pop_unqual ? msrq_is_snp_st0 : + snrq_pop_unqual ? 1 : + 0; + + assign snp_invalidate_st0 = msrq_pop_unqual ? msrq_snp_invalidate_st0 : + snrq_pop_unqual ? snrq_invalidate_st0 : + 0; + + assign writeword_st0 = msrq_pop_unqual ? msrq_writeword_st0 : reqq_pop_unqual ? reqq_req_writeword_st0 : - 0; + 0; - - assign qual_is_mrvq_st0 = mrvq_pop_unqual; + // we have a miss in msrq or going into it for the current address + wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0 + || (miss_add_unqual && (addr_st2 == addr_st0)); `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0; + assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0; end `endif @@ -367,10 +344,16 @@ module VX_bank #( .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) + .in ({is_msrq_st0, is_snp_st0, snp_invalidate_st0, msrq_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), + .out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); +`ifdef DBG_CORE_REQ_INFO + if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin + assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; + end +`endif + wire[`WORD_WIDTH-1:0] readword_st1; wire[`BANK_LINE_WIDTH-1:0] readdata_st1; wire[`TAG_SELECT_BITS-1:0] readtag_st1; @@ -382,25 +365,21 @@ module VX_bank #( wire [`REQS_BITS-1:0] tid_st1; `DEBUG_END wire mem_rw_st1; - wire [WORD_SIZE-1:0] mem_byteen_st1; - wire fill_saw_dirty_st1; - wire snp_to_mrvq_st1; - wire mrvq_init_ready_state_st1; - wire miss_add_because_miss; - wire mrvq_recover_ready_state_st1; + wire [WORD_SIZE-1:0] mem_byteen_st1; + wire miss_add_unqual; assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; - assign st2_pending_hazard_st1 = (miss_add_because_miss) - && ((addr_st2 == addr_st1) && !is_fill_st2); + // we have a miss in st2 for the current address + wire st2_pending_hazard_st1 = miss_add_unqual && (addr_st2 == addr_st1); - assign force_request_miss_st1 = (valid_st1 && !is_mrvq_st1 && (mrvq_pending_hazard_st1 || st2_pending_hazard_st1)) - || (valid_st1 && is_mrvq_st1 && recover_mrvq_state_st2); + // force miss to ensure commit order when a new request has pending previous requests to same block + // also force a miss for msrq requests when previous request in st2 got a miss + wire force_miss_st1 = (valid_st1 && !is_msrq_st1 && ~is_fill_st1 && (msrq_pending_hazard_st1 || st2_pending_hazard_st1)) + || (valid_st1 && is_msrq_st1 && is_msrq_miss_st2); - assign mrvq_recover_ready_state_st1 = valid_st1 - && is_mrvq_st1 - && recover_mrvq_state_st2 - && (addr_st2 == addr_st1); + // access the tag data store + wire tag_data_fire = valid_st1 && !stall_bank_pipe; VX_tag_data_access #( .BANK_ID (BANK_ID), @@ -423,27 +402,19 @@ module VX_bank #( .debug_tagid_st1(debug_tagid_st1), `endif - .stall (stall_bank_pipe), - .stall_bank_pipe(stall_bank_pipe), - - .force_request_miss_st1(force_request_miss_st1), - - // Initial Read - .readaddr_st1(addr_st1[`LINE_SELECT_BITS-1:0]), - // Actual Read/Write - .valid_req_st1 (valid_st1), + .valid_req_st1 (tag_data_fire), .writefill_st1 (is_fill_st1), - .writeaddr_st1 (addr_st1), + .addr_st1 (addr_st1), .wordsel_st1 (wsel_st1), .writeword_st1 (writeword_st1), .writedata_st1 (writedata_st1), .mem_rw_st1 (mem_rw_st1), .mem_byteen_st1 (mem_byteen_st1), - .is_snp_st1 (is_snp_st1), .snp_invalidate_st1(snp_invalidate_st1), + .force_miss_st1 (force_miss_st1), // Read Data .readword_st1 (readword_st1), @@ -451,23 +422,9 @@ module VX_bank #( .readtag_st1 (readtag_st1), .miss_st1 (miss_st1), .dirty_st1 (dirty_st1), - .dirtyb_st1 (dirtyb_st1), - .fill_saw_dirty_st1(fill_saw_dirty_st1), - .snp_to_mrvq_st1(snp_to_mrvq_st1), - .mrvq_init_ready_state_st1(mrvq_init_ready_state_st1) + .dirtyb_st1 (dirtyb_st1) ); - -`ifdef DBG_CORE_REQ_INFO - if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; - end else begin - assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0; - end -`endif - wire qual_valid_st1_2 = valid_st1 && !is_fill_st1; - wire is_mrvq_st1_st2 = is_mrvq_st1; - wire valid_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; wire [`WORD_WIDTH-1:0] writeword_st2; @@ -478,26 +435,21 @@ module VX_bank #( wire [BANK_LINE_SIZE-1:0] dirtyb_st2; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2; - wire fill_saw_dirty_st2; + wire is_fill_st2; wire is_snp_st2; wire snp_invalidate_st2; - wire snp_to_mrvq_st2; - wire is_mrvq_st2; - wire mrvq_init_ready_state_st2; - wire mrvq_recover_ready_state_st2; - wire mrvq_init_ready_state_unqual_st2; - wire mrvq_init_ready_state_hazard_st0_st1; - wire mrvq_init_ready_state_hazard_st1_st1; + wire force_miss_st2; + wire is_msrq_st2; VX_generic_register #( - .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) + .N(1+ 1+ 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}), - .out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2}) + .in ({is_msrq_st1, force_miss_st1, is_snp_st1, snp_invalidate_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}), + .out ({is_msrq_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2}) ); `ifdef DBG_CORE_REQ_INFO @@ -507,39 +459,50 @@ module VX_bank #( `endif // Enqueue to miss reserv if it's a valid miss - assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2; - wire miss_add_because_pending = snp_to_mrvq_st2; - wire miss_add_unqual = (miss_add_because_miss || miss_add_because_pending); - assign mrvq_push_stall = miss_add_unqual && mrvq_full; + assign miss_add_unqual = miss_st2 || force_miss_st2; + assign msrq_push_stall = miss_add_unqual && msrq_full; wire miss_add = miss_add_unqual - && !mrvq_full - && !(cwbq_push_stall - || dwbq_push_stall - || dram_fill_req_stall); + && !msrq_full + && !cwbq_push_stall + && !dwbq_push_stall; - assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls + // we have a recurrent msrq miss + assign is_msrq_miss_st2 = miss_add_unqual && is_msrq_st2; - wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; + wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; - wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; + wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2; - wire miss_add_is_snp = is_snp_st2; + wire miss_add_is_snp = is_snp_st2; wire miss_add_snp_invalidate = snp_invalidate_st2; - wire miss_add_is_mrvq = valid_st2 && is_mrvq_st2 && !stall_bank_pipe; + wire msrq_real_pop_st2 = valid_st2 && is_msrq_st2 && !miss_add_unqual && !stall_bank_pipe; - assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == dfpq_addr_st0); // Doesn't need to be muxed to qual, only care about fills - assign mrvq_init_ready_state_hazard_st1_st1 = miss_add_unqual && is_fill_st1 && (miss_add_addr == addr_st1); + // mark msrq entry that match DRAM fill as 'ready' + wire update_ready_st0 = dfpq_pop; - assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 // When req was in st1e, either matched with an mrvq entery OR mrvq recovering state - || mrvq_init_ready_state_hazard_st0_st1 // If there's a fill in st0 that has the same address as miss_add_addr - || mrvq_init_ready_state_hazard_st1_st1; // If there's a fill in st1 that has the same address as miss_add_addr + // push missed requests as 'ready' + // if it didn't actually missed but had to abort because of pending requets in msrq + // if matching fill request to the block is in stage 0 + // if matching fill request to the block is in stage 1 + wire match_st0_fill_st2 = is_fill_st0 && (miss_add_addr == addr_st0); + wire match_st1_fill_st2 = is_fill_st1 && (miss_add_addr == addr_st1); + wire msrq_init_ready_state_st2 = !miss_st2 + || match_st0_fill_st2 + || match_st1_fill_st2; + + always @(*) begin + if (miss_st2 && (match_st0_fill_st2 || match_st1_fill_st2)) begin + $display("%t: incoming fill - addr=%0h, st0=%b, st1=%b", $time, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), match_st0_fill_st2, match_st1_fill_st2); + end + end VX_cache_miss_resrv #( .BANK_ID (BANK_ID), - .CACHE_ID (CACHE_ID), + .CACHE_ID (CACHE_ID), + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), @@ -548,12 +511,22 @@ module VX_bank #( .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) cache_miss_resrv ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), - // Enqueue - .miss_add (miss_add), - .is_mrvq (miss_add_is_mrvq), + `ifdef DBG_CORE_REQ_INFO + .debug_pc_st0 (debug_pc_st0), + .debug_rd_st0 (debug_rd_st0), + .debug_wid_st0 (debug_wid_st0), + .debug_tagid_st0(debug_tagid_st0), + .debug_pc_st2 (debug_pc_st2), + .debug_rd_st2 (debug_rd_st2), + .debug_wid_st2 (debug_wid_st2), + .debug_tagid_st2(debug_tagid_st2), + `endif + + // enqueue + .miss_add (miss_add), .miss_add_addr (miss_add_addr), .miss_add_wsel (miss_add_wsel), .miss_add_data (miss_add_data), @@ -563,27 +536,30 @@ module VX_bank #( .miss_add_byteen (miss_add_byteen), .miss_add_is_snp (miss_add_is_snp), .miss_add_snp_invalidate (miss_add_snp_invalidate), - .miss_resrv_full (mrvq_full), - .miss_resrv_stop (mrvq_stop), - .mrvq_init_ready_state (mrvq_init_ready_state_st2), + .is_msrq_st2 (is_msrq_st2), + .init_ready_state_st2 (msrq_init_ready_state_st2), - // Broadcast - .is_fill_st1 (is_fill_st1), - .fill_addr_st1 (addr_st1), - .pending_hazard_st1 (mrvq_pending_hazard_st1), + .miss_resrv_full (msrq_full), + .miss_resrv_almfull (msrq_almfull), - // Dequeue - .miss_resrv_pop (mrvq_pop), - .miss_resrv_valid_st0 (mrvq_valid_st0), - .miss_resrv_addr_st0 (mrvq_addr_st0), - .miss_resrv_wsel_st0 (mrvq_wsel_st0), - .miss_resrv_data_st0 (mrvq_writeword_st0), - .miss_resrv_tid_st0 (mrvq_tid_st0), - .miss_resrv_tag_st0 (mrvq_tag_st0), - .miss_resrv_rw_st0 (mrvq_rw_st0), - .miss_resrv_byteen_st0 (mrvq_byteen_st0), - .miss_resrv_is_snp_st0 (mrvq_is_snp_st0), - .miss_resrv_snp_invalidate_st0 (mrvq_snp_invalidate_st0) + // fill + .update_ready_st0 (update_ready_st0), + .fill_addr_st0 (addr_st0), + .pending_hazard_st0 (msrq_pending_hazard_unqual_st0), + + // dequeue + .miss_resrv_schedule_st0 (msrq_pop), + .miss_resrv_valid_st0 (msrq_valid_st0), + .miss_resrv_addr_st0 (msrq_addr_st0), + .miss_resrv_wsel_st0 (msrq_wsel_st0), + .miss_resrv_data_st0 (msrq_writeword_st0), + .miss_resrv_tid_st0 (msrq_tid_st0), + .miss_resrv_tag_st0 (msrq_tag_st0), + .miss_resrv_rw_st0 (msrq_rw_st0), + .miss_resrv_byteen_st0 (msrq_byteen_st0), + .miss_resrv_is_snp_st0 (msrq_is_snp_st0), + .miss_resrv_snp_invalidate_st0 (msrq_snp_invalidate_st0), + .miss_resrv_pop_st2 (msrq_real_pop_st2) ); // Enqueue core response @@ -591,15 +567,13 @@ module VX_bank #( wire cwbq_push, cwbq_pop; wire cwbq_empty, cwbq_full; - wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2; + wire cwbq_push_unqual = valid_st2 && !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2 && !miss_add_rw; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; assign cwbq_push = cwbq_push_unqual - && !cwbq_full - && (miss_add_rw == 0) - && !(dwbq_push_stall - || mrvq_push_stall - || dram_fill_req_stall); + && !cwbq_full + && !dwbq_push_stall + && !msrq_push_stall; assign cwbq_pop = core_rsp_valid && core_rsp_ready; @@ -613,11 +587,9 @@ module VX_bank #( ) cwb_queue ( .clk (clk), .reset (reset), - .push (cwbq_push), - .data_in ({cwbq_tid, cwbq_tag, cwbq_data}), - .pop (cwbq_pop), + .data_in ({cwbq_tid, cwbq_tag, cwbq_data}), .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), .empty (cwbq_empty), .full (cwbq_full), @@ -626,90 +598,92 @@ module VX_bank #( assign core_rsp_valid = !cwbq_empty; - // Enqueue DRAM fill request - - wire dram_fill_req_fast = miss_add_unqual; // Completely unqualified hint that we might send a dram_fill_req - wire dram_fill_req_unqual = dram_fill_req_fast - && (!mrvq_init_ready_state_st2 - || (is_mrvq_st2 && !mrvq_recover_ready_state_st2)); // If this is set, then we are sure we will be sending a dram_fill_req - - assign dram_fill_req_valid = dram_fill_req_unqual - && !(dwbq_push_stall - || mrvq_push_stall - || cwbq_push_stall); - - assign dram_fill_req_addr = addr_st2; - assign dram_fill_req_stall = dram_fill_req_fast && !dram_fill_req_ready; // Uses dram_fill_req_fast for critical path - - // Enqueue DRAM writeback request + // Enqueue DRAM / Snoop request wire dwbq_push, dwbq_pop; wire dwbq_empty, dwbq_full; - wire dwbq_is_dwb_in, dwbq_is_snp_in; - wire dwbq_is_dwb_out, dwbq_is_snp_out; + wire dwbq_is_dram_out, dwbq_is_snp_out; - assign dwbq_is_snp_in = is_snp_st2 && valid_st2 && !snp_to_mrvq_st2; - assign dwbq_is_dwb_in = (valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2; - wire dwbq_push_unqual = dwbq_is_dwb_in || dwbq_is_snp_in; + wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr; + wire [SNP_REQ_TAG_WIDTH-1:0] dwbq_snp_tag; + + wire dwbq_is_dfl_in = miss_st2 && !msrq_init_ready_state_st2 && (!force_miss_st2 || is_msrq_st2); + wire dwbq_is_dwb_in = dirty_st2 && !force_miss_st2 && (is_fill_st2 || is_snp_st2); + wire dwbq_is_snp_in = valid_st2 && !force_miss_st2 && is_snp_st2; + + wire dwbq_is_dram_in = dwbq_is_dfl_in || dwbq_is_dwb_in; + + always @(posedge clk) begin + assert(!is_msrq_st2 || !is_fill_st2); + assert(!dwbq_is_dfl_in || !dwbq_is_dwb_in); + end + + wire dwbq_push_unqual = dwbq_is_dram_in || dwbq_is_snp_in; assign dwbq_push_stall = dwbq_push_unqual && dwbq_full; assign dwbq_push = dwbq_push_unqual - && !dwbq_full - && !(cwbq_push_stall - || mrvq_push_stall - || dram_fill_req_stall); + && !dwbq_full + && !cwbq_push_stall + && !msrq_push_stall; - wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; - - wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st2 = SNP_REQ_TAG_WIDTH'(miss_add_tag); + assign dwbq_req_addr = dwbq_is_dwb_in ? {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]} : addr_st2; + assign dwbq_snp_tag = SNP_REQ_TAG_WIDTH'(miss_add_tag); VX_generic_queue #( - .DATAW(1 + 1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH), - .SIZE(DWBQ_SIZE) + .DATAW(1 + 1 + 1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH), + .SIZE(DREQ_SIZE) ) dwb_queue ( .clk (clk), .reset (reset), - .push (dwbq_push), - .data_in ({dwbq_is_dwb_in, dwbq_is_snp_in, dirtyb_st2, dwbq_req_addr, readdata_st2, snrq_tag_st2}), - .pop (dwbq_pop), - .data_out({dwbq_is_dwb_out, dwbq_is_snp_out, dram_wb_req_byteen, dram_wb_req_addr, dram_wb_req_data, snp_rsp_tag}), + .data_in ({dwbq_is_dram_in, dwbq_is_snp_in, dwbq_is_dwb_in, dirtyb_st2, dwbq_req_addr, readdata_st2, dwbq_snp_tag}), + .data_out({dwbq_is_dram_out, dwbq_is_snp_out, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data, snp_rsp_tag}), .empty (dwbq_empty), .full (dwbq_full), `UNUSED_PIN (size) ); - wire dram_wb_req_fire = dram_wb_req_valid && dram_wb_req_ready; - wire snp_rsp_fire = snp_rsp_valid && snp_rsp_ready; + wire dram_req_fire = dram_req_valid && dram_req_ready; + wire snp_rsp_fire = snp_rsp_valid && snp_rsp_ready; - reg dwbq_dual_valid_sel; + reg dwbq_out_sel_snp; always @(posedge clk) begin if (reset) begin - dwbq_dual_valid_sel <= 0; - end else if (dwbq_is_dwb_out + dwbq_out_sel_snp <= 0; + end else if (dwbq_is_dram_out && dwbq_is_snp_out - && (dram_wb_req_fire || snp_rsp_fire)) begin - dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel; + && (dram_req_fire || snp_rsp_fire)) begin + dwbq_out_sel_snp <= ~dwbq_out_sel_snp; end end // when both dwb and snp are asserted, first release the cwb, then release the snp. - assign dram_wb_req_valid = !dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0); - assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1); + assign dram_req_valid = !dwbq_empty && dwbq_is_dram_out && (~dwbq_is_snp_out || !dwbq_out_sel_snp); + assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dram_out || dwbq_out_sel_snp); - assign dwbq_pop = (dwbq_is_dwb_out && !dwbq_is_snp_out && dram_wb_req_fire) - || (dwbq_is_snp_out && !dwbq_is_dwb_out && snp_rsp_fire) - || (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire); + assign dwbq_pop = (dwbq_is_dram_out && !dwbq_is_snp_out && dram_req_fire) + || (dwbq_is_snp_out && snp_rsp_fire); // bank pipeline stall - assign stall_bank_pipe = cwbq_push_stall - || dwbq_push_stall - || mrvq_push_stall - || dram_fill_req_stall; + assign stall_bank_pipe = (cwbq_push_stall || dwbq_push_stall || msrq_push_stall); + + `SCOPE_ASSIGN (valid_st0, valid_st0); + `SCOPE_ASSIGN (valid_st1, valid_st1); + `SCOPE_ASSIGN (valid_st2, valid_st2); + + `SCOPE_ASSIGN (is_msrq_st1, is_msrq_st1); + `SCOPE_ASSIGN (miss_st1, miss_st1); + `SCOPE_ASSIGN (dirty_st1, dirty_st1); + `SCOPE_ASSIGN (force_miss_st1, force_miss_st1); + `SCOPE_ASSIGN (stall_pipe, stall_bank_pipe); + + `SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); + `SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + `SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin @@ -719,14 +693,11 @@ module VX_bank #( if (core_rsp_valid && core_rsp_ready) begin $display("%t: cache%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); end - if (dram_fill_req_valid && dram_fill_req_ready) begin - $display("%t: cache%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); + if (dram_req_valid && dram_req_ready) begin + $display("%t: cache%0d:%0d dram req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_req_addr, BANK_ID), dram_req_data); end - if (dram_wb_req_valid && dram_wb_req_ready) begin - $display("%t: cache%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); - end - if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin - $display("%t: cache%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); + if (dram_rsp_valid && dram_rsp_ready) begin + $display("%t: cache%0d:%0d dram rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data); end if (snp_req_valid && snp_req_ready) begin $display("%t: cache%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag); @@ -734,21 +705,25 @@ module VX_bank #( if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: cache%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); end + if (msrq_pop) begin + $display("%t: cache%0d:%0d msrq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); + end + if (dfpq_pop) begin + $display("%t: cache%0d:%0d dfpq_pop: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); + end + if (reqq_pop) begin + $display("%t: cache%0d:%0d reqq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); + end + if (snrq_pop) begin + $display("%t: cache%0d:%0d snrq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); + end + if (cwbq_push) begin + $display("%t: cache%0d:%0d cwbq_push: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2); + end + if (dwbq_push) begin + $display("%t: cache%0d:%0d dwbq_push: addr=%0h wid=%0d, PC=%0h, fill=%b, wb=%b, snp=%b", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dwbq_is_dfl_in, dwbq_is_dwb_in, dwbq_is_snp_in); + end end `endif -`SCOPE_ASSIGN (valid_st0, qual_valid_st0); -`SCOPE_ASSIGN (valid_st1, valid_st1); -`SCOPE_ASSIGN (valid_st2, valid_st2); - -`SCOPE_ASSIGN (is_mrvq_st1, is_mrvq_st1); -`SCOPE_ASSIGN (miss_st1, miss_st1); -`SCOPE_ASSIGN (dirty_st1, dirty_st1); -`SCOPE_ASSIGN (force_miss_st1, force_request_miss_st1); -`SCOPE_ASSIGN (stall_pipe, stall_bank_pipe); - -`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); -`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); -`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); - endmodule diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index fb700a33..311fb9ff 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -26,7 +26,6 @@ module VX_bank_core_req_arb #( // Dequeue Data input wire reqq_pop, - output wire reqq_req_st0, output wire [`REQS_BITS-1:0] reqq_req_tid_st0, output wire reqq_req_rw_st0, output wire [WORD_SIZE-1:0] reqq_req_byteen_st0, @@ -107,7 +106,6 @@ module VX_bank_core_req_arb #( ); assign reqq_empty = !qual_has_request; - assign reqq_req_st0 = qual_has_request; assign reqq_req_tid_st0 = qual_request_index; assign reqq_req_byteen_st0 = qual_byteen[qual_request_index]; assign reqq_req_addr_st0 = qual_addr[qual_request_index]; diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 869c32bf..b8ffaafc 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -19,18 +19,16 @@ module VX_cache #( parameter CREQ_SIZE = 8, // Miss Reserv Queue Knob parameter MRVQ_SIZE = 16, - // Dram Fill Rsp Queue Size - parameter DFPQ_SIZE = 16, + // DRAM Response Queue Size + parameter DRPQ_SIZE = 16, // Snoop Req Queue Size parameter SNRQ_SIZE = 16, // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size parameter CWBQ_SIZE = 8, - // Dram Writeback Queue Size - parameter DWBQ_SIZE = 4, - // Dram Fill Req Queue Size - parameter DFQQ_SIZE = 8, + // DRAM Request Queue Size + parameter DREQ_SIZE = 4, // Enable cache writeable parameter WRITE_ENABLE = 1, @@ -144,17 +142,14 @@ module VX_cache #( wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid; - wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr; - wire dram_fill_req_ready; + wire [NUM_BANKS-1:0] per_bank_dram_req_valid; + wire [NUM_BANKS-1:0] per_bank_dram_req_rw; + wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen; + wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr; + wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data; + wire [NUM_BANKS-1:0] per_bank_dram_req_ready; - wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready; - - wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready; - wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid; - wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_wb_req_byteen; - wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr; - wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data; + wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready; wire [NUM_BANKS-1:0] per_bank_snp_req_ready; @@ -236,7 +231,7 @@ module VX_cache #( ); assign dram_req_tag = dram_req_addr; - assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); + assign dram_rsp_ready = (| per_bank_dram_rsp_ready); for (genvar i = 0; i < NUM_BANKS; i++) begin wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid; @@ -245,6 +240,7 @@ module VX_cache #( wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; + wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; @@ -252,20 +248,17 @@ module VX_cache #( wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; wire curr_bank_core_rsp_ready; - wire curr_bank_dram_fill_rsp_valid; - wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr; - wire curr_bank_dram_fill_rsp_ready; + wire curr_bank_dram_req_valid; + wire curr_bank_dram_req_rw; + wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen; + wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr; + wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data; + wire curr_bank_dram_req_ready; - wire curr_bank_dram_fill_req_valid; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr; - wire curr_bank_dram_fill_req_ready; - - wire curr_bank_dram_wb_req_valid; - wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr; - wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data; - wire curr_bank_dram_wb_req_ready; + wire curr_bank_dram_rsp_valid; + wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; + wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; + wire curr_bank_dram_rsp_ready; wire curr_bank_snp_req_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; @@ -277,8 +270,6 @@ module VX_cache #( wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag; wire curr_bank_snp_rsp_ready; - wire curr_bank_core_req_ready; - // Core Req assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}}); assign curr_bank_core_req_addr = core_req_addr; @@ -295,36 +286,28 @@ module VX_cache #( assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag; assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data; - // Dram fill request - assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid; - if (NUM_BANKS == 1) begin - assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr; - end else begin - assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i); - end - assign curr_bank_dram_fill_req_ready = dram_fill_req_ready; - - // Dram fill response - if (NUM_BANKS == 1) begin - assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid; - assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag; - end else begin - assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); - assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag); - end - assign curr_bank_dram_fill_rsp_data = dram_rsp_data; - assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready; - - // Dram writeback request - assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid; - assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen; + // DRAM request + assign per_bank_dram_req_valid[i] = curr_bank_dram_req_valid; + assign per_bank_dram_req_rw[i] = curr_bank_dram_req_rw; + assign per_bank_dram_req_byteen[i] = curr_bank_dram_req_byteen; if (NUM_BANKS == 1) begin - assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr; + assign per_bank_dram_req_addr[i] = curr_bank_dram_req_addr; end else begin - assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i); + assign per_bank_dram_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_req_addr, i); end - assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data; - assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i]; + assign per_bank_dram_req_data[i] = curr_bank_dram_req_data; + assign curr_bank_dram_req_ready = per_bank_dram_req_ready[i]; + + // DRAM response + if (NUM_BANKS == 1) begin + assign curr_bank_dram_rsp_valid = dram_rsp_valid; + assign curr_bank_dram_rsp_addr = dram_rsp_tag; + end else begin + assign curr_bank_dram_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); + assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag); + end + assign curr_bank_dram_rsp_data = dram_rsp_data; + assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; // Snoop request if (NUM_BANKS == 1) begin @@ -353,11 +336,10 @@ module VX_cache #( .NUM_REQUESTS (NUM_REQUESTS), .CREQ_SIZE (CREQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE), - .DFPQ_SIZE (DFPQ_SIZE), + .DRPQ_SIZE (DRPQ_SIZE), .SNRQ_SIZE (SNRQ_SIZE), .CWBQ_SIZE (CWBQ_SIZE), - .DWBQ_SIZE (DWBQ_SIZE), - .DFQQ_SIZE (DFQQ_SIZE), + .DREQ_SIZE (DREQ_SIZE), .DRAM_ENABLE (DRAM_ENABLE), .WRITE_ENABLE (WRITE_ENABLE), .SNOOP_FORWARDING (SNOOP_FORWARDING), @@ -367,80 +349,52 @@ module VX_cache #( ) bank ( `SCOPE_BIND_VX_cache_bank(i) - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), // Core request - .core_req_valid (curr_bank_core_req_valid), - .core_req_rw (curr_bank_core_req_rw), - .core_req_byteen (curr_bank_core_req_byteen), - .core_req_addr (curr_bank_core_req_addr), - .core_req_data (curr_bank_core_req_data), - .core_req_tag (curr_bank_core_req_tag), - .core_req_ready (curr_bank_core_req_ready), + .core_req_valid (curr_bank_core_req_valid), + .core_req_rw (curr_bank_core_req_rw), + .core_req_byteen (curr_bank_core_req_byteen), + .core_req_addr (curr_bank_core_req_addr), + .core_req_data (curr_bank_core_req_data), + .core_req_tag (curr_bank_core_req_tag), + .core_req_ready (curr_bank_core_req_ready), // Core response - .core_rsp_valid (curr_bank_core_rsp_valid), - .core_rsp_tid (curr_bank_core_rsp_tid), - .core_rsp_data (curr_bank_core_rsp_data), - .core_rsp_tag (curr_bank_core_rsp_tag), - .core_rsp_ready (curr_bank_core_rsp_ready), + .core_rsp_valid (curr_bank_core_rsp_valid), + .core_rsp_tid (curr_bank_core_rsp_tid), + .core_rsp_data (curr_bank_core_rsp_data), + .core_rsp_tag (curr_bank_core_rsp_tag), + .core_rsp_ready (curr_bank_core_rsp_ready), - // Dram fill request - .dram_fill_req_valid (curr_bank_dram_fill_req_valid), - .dram_fill_req_addr (curr_bank_dram_fill_req_addr), - .dram_fill_req_ready (curr_bank_dram_fill_req_ready), + // DRAM request + .dram_req_valid (curr_bank_dram_req_valid), + .dram_req_rw (curr_bank_dram_req_rw), + .dram_req_byteen (curr_bank_dram_req_byteen), + .dram_req_addr (curr_bank_dram_req_addr), + .dram_req_data (curr_bank_dram_req_data), + .dram_req_ready (curr_bank_dram_req_ready), - // Dram fill response - .dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid), - .dram_fill_rsp_data (curr_bank_dram_fill_rsp_data), - .dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr), - .dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready), - - // Dram writeback request - .dram_wb_req_valid (curr_bank_dram_wb_req_valid), - .dram_wb_req_byteen (curr_bank_dram_wb_req_byteen), - .dram_wb_req_addr (curr_bank_dram_wb_req_addr), - .dram_wb_req_data (curr_bank_dram_wb_req_data), - .dram_wb_req_ready (curr_bank_dram_wb_req_ready), + // DRAM response + .dram_rsp_valid (curr_bank_dram_rsp_valid), + .dram_rsp_data (curr_bank_dram_rsp_data), + .dram_rsp_addr (curr_bank_dram_rsp_addr), + .dram_rsp_ready (curr_bank_dram_rsp_ready), // Snoop request - .snp_req_valid (curr_bank_snp_req_valid), - .snp_req_addr (curr_bank_snp_req_addr), - .snp_req_invalidate (curr_bank_snp_req_invalidate), - .snp_req_tag (curr_bank_snp_req_tag), - .snp_req_ready (curr_bank_snp_req_ready), + .snp_req_valid (curr_bank_snp_req_valid), + .snp_req_addr (curr_bank_snp_req_addr), + .snp_req_invalidate (curr_bank_snp_req_invalidate), + .snp_req_tag (curr_bank_snp_req_tag), + .snp_req_ready (curr_bank_snp_req_ready), // Snoop response - .snp_rsp_valid (curr_bank_snp_rsp_valid), - .snp_rsp_tag (curr_bank_snp_rsp_tag), - .snp_rsp_ready (curr_bank_snp_rsp_ready) + .snp_rsp_valid (curr_bank_snp_rsp_valid), + .snp_rsp_tag (curr_bank_snp_rsp_tag), + .snp_rsp_ready (curr_bank_snp_rsp_ready) ); end - VX_cache_dram_req_arb #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .DFQQ_SIZE (DFQQ_SIZE) - ) cache_dram_req_arb ( - .clk (clk), - .reset (reset), - .per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid), - .per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr), - .dram_fill_req_ready (dram_fill_req_ready), - .per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid), - .per_bank_dram_wb_req_byteen (per_bank_dram_wb_req_byteen), - .per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr), - .per_bank_dram_wb_req_data (per_bank_dram_wb_req_data), - .per_bank_dram_wb_req_ready (per_bank_dram_wb_req_ready), - .dram_req_valid (dram_req_valid), - .dram_req_rw (dram_req_rw), - .dram_req_byteen (dram_req_byteen), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_ready (dram_req_ready) - ); - VX_cache_core_rsp_merge #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), @@ -461,6 +415,27 @@ module VX_cache #( .core_rsp_ready (core_rsp_ready) ); + VX_cache_dram_req_arb #( + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE) + ) cache_dram_req_arb ( + .clk (clk), + .reset (reset), + .per_bank_dram_req_valid (per_bank_dram_req_valid), + .per_bank_dram_req_rw (per_bank_dram_req_rw), + .per_bank_dram_req_byteen (per_bank_dram_req_byteen), + .per_bank_dram_req_addr (per_bank_dram_req_addr), + .per_bank_dram_req_data (per_bank_dram_req_data), + .per_bank_dram_req_ready (per_bank_dram_req_ready), + .dram_req_valid (dram_req_valid), + .dram_req_rw (dram_req_rw), + .dram_req_byteen (dram_req_byteen), + .dram_req_addr (dram_req_addr), + .dram_req_data (dram_req_data), + .dram_req_ready (dram_req_ready) + ); + VX_snp_rsp_arb #( .NUM_BANKS (NUM_BANKS), .BANK_LINE_SIZE (BANK_LINE_SIZE), diff --git a/hw/rtl/cache/VX_cache_dram_fill_arb.v b/hw/rtl/cache/VX_cache_dram_fill_arb.v deleted file mode 100644 index 8b579f7e..00000000 --- a/hw/rtl/cache/VX_cache_dram_fill_arb.v +++ /dev/null @@ -1,92 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_cache_dram_fill_arb #( - // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, - // Dram Fill Req Queue Size - parameter DFQQ_SIZE = 0 -) ( - input wire clk, - input wire reset, - input wire dfqq_push, - input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid, - input wire[NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr, - - input wire dfqq_pop, - output wire dfqq_req, - output wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr, - output wire dfqq_empty, - output wire dfqq_full -); - reg [NUM_BANKS-1:0] use_per_bank_dram_fill_req_valid; - reg [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] use_per_bank_dram_fill_req_addr; - - wire [NUM_BANKS-1:0] out_per_bank_dram_fill_req_valid; - wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] out_per_bank_dram_fill_req_addr; - - wire [NUM_BANKS-1:0] use_per_bqual_bank_dram_fill_req_valid; - wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] qual_bank_dram_fill_req_addr; - - wire [NUM_BANKS-1:0] updated_bank_dram_fill_req_valid; - - wire o_empty; - - wire use_empty = !(| use_per_bank_dram_fill_req_valid); - wire out_empty = !(| out_per_bank_dram_fill_req_valid) || o_empty; - - wire push_qual = dfqq_push && !dfqq_full; - wire pop_qual = dfqq_pop && use_empty && !out_empty; - - VX_generic_queue #( - .DATAW(NUM_BANKS * (1+`DRAM_ADDR_WIDTH)), - .SIZE(DFQQ_SIZE) - ) dfqq_queue ( - .clk (clk), - .reset (reset), - .push (push_qual), - .data_in ({per_bank_dram_fill_req_valid, per_bank_dram_fill_req_addr}), - .pop (pop_qual), - .data_out({out_per_bank_dram_fill_req_valid, out_per_bank_dram_fill_req_addr}), - .empty (o_empty), - .full (dfqq_full), - `UNUSED_PIN (size) - ); - - assign use_per_bqual_bank_dram_fill_req_valid = use_empty ? (out_per_bank_dram_fill_req_valid & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req_valid & {NUM_BANKS{!use_empty}}); - assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr; - - wire[`BANK_BITS-1:0] qual_request_index; - wire qual_has_request; - - VX_fixed_arbiter #( - .N(NUM_BANKS) - ) sel_bank ( - .clk (clk), - .reset (reset), - .requests (use_per_bqual_bank_dram_fill_req_valid), - .grant_index (qual_request_index), - .grant_valid (qual_has_request), - `UNUSED_PIN (grant_onehot) - ); - - assign dfqq_empty = !qual_has_request; - assign dfqq_req = use_per_bqual_bank_dram_fill_req_valid [qual_request_index]; - assign dfqq_req_addr = qual_bank_dram_fill_req_addr[qual_request_index]; - - assign updated_bank_dram_fill_req_valid = use_per_bqual_bank_dram_fill_req_valid & (~(1 << qual_request_index)); - - always @(posedge clk) begin - if (reset) begin - use_per_bank_dram_fill_req_valid <= 0; - use_per_bank_dram_fill_req_addr <= 0; - end else begin - if (dfqq_pop && qual_has_request) begin - use_per_bank_dram_fill_req_valid <= updated_bank_dram_fill_req_valid; - use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr; - end - end - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index 340a85ed..3251b4d3 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -6,88 +6,50 @@ module VX_cache_dram_req_arb #( // Number of banks {1, 2, 4, 8,...} parameter NUM_BANKS = 0, // Size of a word in bytes - parameter WORD_SIZE = 0, - // Dram Fill Req Queue Size - parameter DFQQ_SIZE = 0 + parameter WORD_SIZE = 0 ) ( input wire clk, input wire reset, - - // Fill Request - input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid, - input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr, - output wire dram_fill_req_ready, - // Writeback Request - input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid, - input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_wb_req_byteen, - input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr, - input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data, - output wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready, + // Inputs + input wire [NUM_BANKS-1:0] per_bank_dram_req_valid, + input wire [NUM_BANKS-1:0] per_bank_dram_req_rw, + input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen, + input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr, + input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data, + output wire [NUM_BANKS-1:0] per_bank_dram_req_ready, - // Merged Request + // Output output wire dram_req_valid, output wire dram_req_rw, output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, - input wire dram_req_ready ); - wire dwb_valid; - wire dfqq_req; - - wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr; - -`DEBUG_BEGIN - wire dfqq_empty; -`DEBUG_END - - wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop - wire dfqq_push = (| per_bank_dram_fill_req_valid); - wire dfqq_full; - - VX_cache_dram_fill_arb #( - .BANK_LINE_SIZE(BANK_LINE_SIZE), - .NUM_BANKS(NUM_BANKS), - .DFQQ_SIZE(DFQQ_SIZE) - ) dram_fill_arb ( - .clk (clk), - .reset (reset), - .dfqq_push (dfqq_push), - .per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid), - .per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr), - .dfqq_pop (dfqq_pop), - .dfqq_req (dfqq_req), - .dfqq_req_addr (dfqq_req_addr), - .dfqq_empty (dfqq_empty), - .dfqq_full (dfqq_full) - ); - - assign dram_fill_req_ready = !dfqq_full; - - wire [`BANK_BITS-1:0] dwb_bank; + wire [`BANK_BITS-1:0] sel_bank; + wire sel_valid; VX_fixed_arbiter #( .N(NUM_BANKS) - ) sel_dwb ( + ) sel_arb ( .clk (clk), .reset (reset), - .requests (per_bank_dram_wb_req_valid), - .grant_index (dwb_bank), - .grant_valid (dwb_valid), + .requests (per_bank_dram_req_valid), + .grant_index (sel_bank), + .grant_valid (sel_valid), `UNUSED_PIN (grant_onehot) ); + assign dram_req_valid = sel_valid; + assign dram_req_rw = per_bank_dram_req_rw[sel_bank]; + assign dram_req_byteen = per_bank_dram_req_byteen[sel_bank]; + assign dram_req_addr = per_bank_dram_req_addr[sel_bank]; + assign dram_req_data = per_bank_dram_req_data[sel_bank]; + for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i)); + assign per_bank_dram_req_ready[i] = dram_req_ready && (sel_bank == `BANK_BITS'(i)); end - assign dram_req_valid = dwb_valid || dfqq_req; - assign dram_req_rw = dwb_valid; - assign dram_req_byteen = dwb_valid ? per_bank_dram_wb_req_byteen[dwb_bank] : {BANK_LINE_SIZE{1'b1}}; - assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr; - assign {dram_req_data} = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0; - endmodule diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index f56d638e..67fb728b 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -2,7 +2,8 @@ module VX_cache_miss_resrv #( parameter CACHE_ID = 0, - parameter BANK_ID = 0, + parameter BANK_ID = 0, + parameter CORE_TAG_ID_BITS = 0, // Size of line inside a bank in bytes parameter BANK_LINE_SIZE = 0, // Number of banks {1, 2, 4, 8,...} @@ -21,30 +22,43 @@ module VX_cache_miss_resrv #( input wire clk, input wire reset, - // Miss enqueue - input wire miss_add, - input wire is_mrvq, +`ifdef DBG_CORE_REQ_INFO +`IGNORE_WARNINGS_BEGIN + input wire[31:0] debug_pc_st0, + input wire[`NR_BITS-1:0] debug_rd_st0, + input wire[`NW_BITS-1:0] debug_wid_st0, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0, + input wire[31:0] debug_pc_st2, + input wire[`NR_BITS-1:0] debug_rd_st2, + input wire[`NW_BITS-1:0] debug_wid_st2, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2, +`IGNORE_WARNINGS_END +`endif + + // enqueue + input wire miss_add, input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr, input wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel, input wire[`WORD_WIDTH-1:0] miss_add_data, input wire[`REQS_BITS-1:0] miss_add_tid, input wire[`REQ_TAG_WIDTH-1:0] miss_add_tag, input wire miss_add_rw, - input wire[WORD_SIZE-1:0] miss_add_byteen, - input wire mrvq_init_ready_state, + input wire[WORD_SIZE-1:0] miss_add_byteen, input wire miss_add_is_snp, input wire miss_add_snp_invalidate, + input wire is_msrq_st2, + input wire init_ready_state_st2, + output wire miss_resrv_full, - output wire miss_resrv_stop, + output wire miss_resrv_almfull, - // Broadcast Address - input wire is_fill_st1, - input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1, + // fill + input wire update_ready_st0, + input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st0, + output wire pending_hazard_st0, - output wire pending_hazard_st1, - - // Miss dequeue - input wire miss_resrv_pop, + // dequeue + input wire miss_resrv_schedule_st0, output wire miss_resrv_valid_st0, output wire[`LINE_ADDR_WIDTH-1:0] miss_resrv_addr_st0, output wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_resrv_wsel_st0, @@ -54,43 +68,38 @@ module VX_cache_miss_resrv #( output wire miss_resrv_rw_st0, output wire[WORD_SIZE-1:0] miss_resrv_byteen_st0, output wire miss_resrv_is_snp_st0, - output wire miss_resrv_snp_invalidate_st0 + output wire miss_resrv_snp_invalidate_st0, + input wire miss_resrv_pop_st2 ); + localparam FULL_DISTANCE = 2; // need 2 cycles window to prevent pipeline lock + wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table; - reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; + `NO_RW_RAM_CHECK reg [`LINE_ADDR_WIDTH-1:0] addr_table [MRVQ_SIZE-1:0]; reg [MRVQ_SIZE-1:0] valid_table; reg [MRVQ_SIZE-1:0] ready_table; - reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr; + reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr, restore_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr; reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size; - `STATIC_ASSERT(MRVQ_SIZE > 5, ("invalid size")) + `STATIC_ASSERT(MRVQ_SIZE > FULL_DISTANCE, ("invalid size")) assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); - assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock + assign miss_resrv_almfull = (size >= $bits(size)'(MRVQ_SIZE-FULL_DISTANCE)); - wire enqueue_possible = !miss_resrv_full; - wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; - - reg [MRVQ_SIZE-1:0] make_ready; - reg [MRVQ_SIZE-1:0] make_ready_push; - reg [MRVQ_SIZE-1:0] valid_address_match; - + wire [MRVQ_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MRVQ_SIZE; i++) begin - assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0; - assign make_ready[i] = is_fill_st1 && valid_address_match[i]; + assign valid_address_match[i] = valid_table[i] && (addr_table[i] == fill_addr_st0); end - assign pending_hazard_st1 = |(valid_address_match); + assign pending_hazard_st0 = (| valid_address_match); - wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; - wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; + wire dequeue_ready = valid_table[schedule_ptr] && ready_table[schedule_ptr]; - assign miss_resrv_valid_st0 = dequeue_possible; - assign miss_resrv_addr_st0 = addr_table[dequeue_index]; + assign miss_resrv_valid_st0 = dequeue_ready; + assign miss_resrv_addr_st0 = addr_table[schedule_ptr]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, @@ -100,56 +109,51 @@ module VX_cache_miss_resrv #( miss_resrv_is_snp_st0, miss_resrv_snp_invalidate_st0} = metadata_table; - wire mrvq_push = miss_add && enqueue_possible && !is_mrvq; - wire mrvq_pop = miss_resrv_pop && dequeue_possible; + wire msrq_push = miss_add && !is_msrq_st2; - wire recover_state = miss_add && is_mrvq; - wire increment_head = !miss_add && is_mrvq; - - wire update_ready = (|make_ready); - - wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state; - - assign make_ready_push = (MRVQ_SIZE'(qual_mrvq_init)) << enqueue_index; + wire [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); always @(posedge clk) begin if (reset) begin valid_table <= 0; ready_table <= 0; - size <= 0; - schedule_ptr <= 0; + schedule_ptr <= 0; + restore_ptr <= 0; head_ptr <= 0; tail_ptr <= 0; - end else begin - if (mrvq_push) begin - valid_table[enqueue_index] <= 1; - ready_table[enqueue_index] <= mrvq_init_ready_state; - addr_table[enqueue_index] <= miss_add_addr; - tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); - end else if (increment_head) begin - valid_table[head_ptr] <= 0; - head_ptr <= head_ptr + $bits(head_ptr)'(1); - end else if (recover_state) begin - schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1); + size <= 0; + end else begin + + if (update_ready_st0) begin + ready_table <= ready_table | valid_address_match; end - // update entry as 'ready' during DRAM fill response - if (update_ready) begin - ready_table <= ready_table | make_ready | make_ready_push; - end - - if (mrvq_pop) begin - ready_table[dequeue_index] <= 0; - schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); - end - - if (!(mrvq_push && increment_head)) begin - if (mrvq_push) begin + if (miss_add) begin + assert(!miss_resrv_full); + if (is_msrq_st2) begin + // returning missed msrq entry, restore schedule + valid_table[restore_ptr] <= 1; + ready_table[restore_ptr] <= init_ready_state_st2; + restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); + schedule_ptr <= head_ptr; + end else begin + valid_table[tail_ptr] <= 1; + ready_table[tail_ptr] <= init_ready_state_st2; + addr_table[tail_ptr] <= miss_add_addr; + tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); size <= size + $bits(size)'(1); end - if (increment_head) begin - size <= size - $bits(size)'(1); - end + end else if (miss_resrv_pop_st2) begin + head_ptr <= head_ptr_n; + restore_ptr <= head_ptr_n; + valid_table[head_ptr] <= 0; + size <= size - $bits(size)'(1); + end + + if (miss_resrv_schedule_st0) begin + assert(miss_resrv_valid_st0); + valid_table[schedule_ptr] <= 0; + schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); end end end @@ -160,11 +164,11 @@ module VX_cache_miss_resrv #( .BYTEENW(1), .BUFFERED(0), .RWCHECK(1) - ) metadata_ram ( + ) metadata ( .clk(clk), - .waddr(enqueue_index), - .raddr(dequeue_index), - .wren(mrvq_push), + .waddr(tail_ptr), + .raddr(schedule_ptr), + .wren(msrq_push), .rden(1'b1), .din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}), .dout(metadata_table) @@ -172,8 +176,16 @@ module VX_cache_miss_resrv #( `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin - if (mrvq_push || mrvq_pop || increment_head || recover_state) begin - $write("%t: cache%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); + if (miss_add || miss_resrv_schedule_st0 || miss_resrv_pop_st2) begin + if (miss_add) + if (is_msrq_st2) + $write("%t: cache%0d:%0d msrq-restore addr%0d=%0h ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2); + else + $write("%t: cache%0d:%0d msrq-push addr%0d=%0h ready=%b wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2, debug_wid_st2, debug_pc_st2); + else if (miss_resrv_schedule_st0) + $write("%t: cache%0d:%0d msrq-schedule wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st0, debug_pc_st0); + else if (miss_resrv_pop_st2) + $write("%t: cache%0d:%0d msrq-pop addr%0d wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st2, debug_pc_st2); for (integer j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin $write(" "); @@ -181,6 +193,11 @@ module VX_cache_miss_resrv #( if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); end + else if (schedule_ptr == $bits(schedule_ptr)'(j)) begin + $write(" *"); + if (~ready_table[j]) $write("!"); + $write("[addr%0d=%0h]", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); + end end $write("\n"); end diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v index 331b73a1..50c98d0d 100644 --- a/hw/rtl/cache/VX_snp_rsp_arb.v +++ b/hw/rtl/cache/VX_snp_rsp_arb.v @@ -17,25 +17,25 @@ module VX_snp_rsp_arb #( input wire snp_rsp_ready ); - wire [`BANK_BITS-1:0] fsq_bank; - wire fsq_valid; + wire [`BANK_BITS-1:0] sel_bank; + wire sel_valid; VX_fixed_arbiter #( .N(NUM_BANKS) - ) sel_ffsq ( + ) sel_arb ( .clk (clk), .reset (reset), .requests (per_bank_snp_rsp_valid), - .grant_index (fsq_bank), - .grant_valid (fsq_valid), + .grant_index (sel_bank), + .grant_valid (sel_valid), `UNUSED_PIN (grant_onehot) ); - assign snp_rsp_valid = fsq_valid; - assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank]; + assign snp_rsp_valid = sel_valid; + assign snp_rsp_tag = per_bank_snp_rsp_tag[sel_bank]; for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i)); + assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (sel_bank == `BANK_BITS'(i)); end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 6d6d8572..c9bdc81e 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -31,15 +31,10 @@ module VX_tag_data_access #( `IGNORE_WARNINGS_END `endif - input wire stall, input wire is_snp_st1, input wire snp_invalidate_st1, - input wire stall_bank_pipe, - input wire force_request_miss_st1, - - input wire[`LINE_SELECT_BITS-1:0] readaddr_st1, - input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1, + input wire[`LINE_ADDR_WIDTH-1:0] addr_st1, input wire valid_req_st1, input wire writefill_st1, @@ -52,18 +47,16 @@ module VX_tag_data_access #( input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1, `IGNORE_WARNINGS_END + input wire force_miss_st1, + output wire[`WORD_WIDTH-1:0] readword_st1, output wire[`BANK_LINE_WIDTH-1:0] readdata_st1, output wire[`TAG_SELECT_BITS-1:0] readtag_st1, output wire miss_st1, output wire dirty_st1, - output wire[BANK_LINE_SIZE-1:0] dirtyb_st1, - output wire fill_saw_dirty_st1, - output wire snp_to_mrvq_st1, - output wire mrvq_init_ready_state_st1 + output wire[BANK_LINE_SIZE-1:0] dirtyb_st1 ); - `UNUSED_VAR (stall) - + wire qual_read_valid_st1; wire qual_read_dirty_st1; wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_st1; @@ -78,15 +71,11 @@ module VX_tag_data_access #( wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_write_enable; wire[`BANK_LINE_WIDTH-1:0] use_write_data; - wire fill_sent; - wire invalidate_line; + wire use_invalidate; wire tags_match; - wire real_writefill = valid_req_st1 && writefill_st1 - && ((~use_read_valid_st1) || (use_read_valid_st1 && ~tags_match)); - - wire[`TAG_SELECT_BITS-1:0] writetag_st1 = writeaddr_st1[`TAG_LINE_ADDR_RNG]; - wire[`LINE_SELECT_BITS-1:0] writeladdr_st1 = writeaddr_st1[`LINE_SELECT_BITS-1:0]; + wire[`TAG_SELECT_BITS-1:0] addrtag_st1 = addr_st1[`TAG_LINE_ADDR_RNG]; + wire[`LINE_SELECT_BITS-1:0] addrline_st1 = addr_st1[`LINE_SELECT_BITS-1:0]; VX_tag_data_store #( .CACHE_SIZE (CACHE_SIZE), @@ -96,27 +85,25 @@ module VX_tag_data_access #( ) tag_data_store ( .clk (clk), .reset (reset), - .stall_bank_pipe(stall_bank_pipe), - .read_addr (readaddr_st1), + .read_addr (addrline_st1), .read_valid (qual_read_valid_st1), .read_dirty (qual_read_dirty_st1), .read_dirtyb (qual_read_dirtyb_st1), .read_tag (qual_read_tag_st1), .read_data (qual_read_data_st1), - .invalidate (invalidate_line), + .invalidate (use_invalidate), .write_enable(use_write_enable), - .write_fill (real_writefill), - .write_addr (writeladdr_st1), - .tag_index (writetag_st1), - .write_data (use_write_data), - .fill_sent (fill_sent) + .write_fill (writefill_st1), + .write_addr (addrline_st1), + .tag_index (addrtag_st1), + .write_data (use_write_data) ); assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache - assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : writetag_st1; // Tag is always the same in SM + assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : addrtag_st1; // Tag is always the same in SM assign use_read_dirtyb_st1= qual_read_dirtyb_st1; assign use_read_data_st1 = qual_read_data_st1; @@ -131,67 +118,69 @@ module VX_tag_data_access #( end end - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we; + wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] write_enable; wire [`BANK_LINE_WIDTH-1:0] data_write; - wire should_write = mem_rw_st1 - && valid_req_st1 - && use_read_valid_st1 - && ~miss_st1 - && ~is_snp_st1 - && ~real_writefill; + wire normal_write = valid_req_st1 + && !writefill_st1 + && !is_snp_st1 + && !miss_st1 + && !force_miss_st1 + && mem_rw_st1 + && use_read_valid_st1; + + wire fill_write = valid_req_st1 && writefill_st1 && !force_miss_st1; for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i))) - && should_write; + wire normal_write_w = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i))) + && normal_write; - assign we[i] = real_writefill ? {WORD_SIZE{1'b1}} : - normal_write ? mem_byteen_st1 : + assign write_enable[i] = fill_write ? {WORD_SIZE{1'b1}} : + normal_write_w ? mem_byteen_st1 : {WORD_SIZE{1'b0}}; - assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1; + assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = writefill_st1 ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1; end - assign use_write_enable = (writefill_st1 && ~real_writefill) ? 0 : we; - assign use_write_data = data_write; - // use "case equality" to handle uninitialized tag when block entry is not valid - assign tags_match = (writetag_st1 === use_read_tag_st1); + assign tags_match = (addrtag_st1 === use_read_tag_st1); - wire snoop_hit_no_pending = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match && (use_read_dirty_st1 || snp_invalidate_st1) && ~force_request_miss_st1; - wire req_invalid = valid_req_st1 && ~is_snp_st1 && ~use_read_valid_st1 && ~writefill_st1; - wire req_miss = valid_req_st1 && ~is_snp_st1 && use_read_valid_st1 && ~writefill_st1 && ~tags_match; - wire real_miss = req_invalid || req_miss; - wire force_core_miss = (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1 && ~real_miss); - assign snp_to_mrvq_st1 = valid_req_st1 && is_snp_st1 && force_request_miss_st1; + assign use_write_enable = write_enable; + assign use_write_data = data_write; + assign use_invalidate = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match + && (use_read_dirty_st1 || snp_invalidate_st1) // block is dirty or need to force invalidation + && !force_miss_st1; - // The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss - assign mrvq_init_ready_state_st1 = snp_to_mrvq_st1 - || (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1); + wire core_req_miss = valid_req_st1 && !is_snp_st1 && !writefill_st1 // is core request + && (!use_read_valid_st1 || !tags_match); // block missing or has wrong tag - assign miss_st1 = real_miss || snoop_hit_no_pending || force_core_miss; - assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1; - assign dirtyb_st1 = use_read_dirtyb_st1; - assign readdata_st1 = use_read_data_st1; - assign readtag_st1 = use_read_tag_st1; - assign fill_sent = miss_st1; - assign fill_saw_dirty_st1 = real_writefill && dirty_st1; - assign invalidate_line = snoop_hit_no_pending; + assign miss_st1 = core_req_miss; + assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1; + assign dirtyb_st1 = use_read_dirtyb_st1; + assign readdata_st1 = use_read_data_st1; + assign readtag_st1 = use_read_tag_st1; -`ifdef DBG_PRINT_CACHE_BANK + always @(*) begin + if (valid_req_st1 && writefill_st1) begin + if (!(!use_read_valid_st1 || !tags_match)) begin + $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + end + end + end + +`ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin - if (valid_req_st1) begin - if ((| use_write_enable)) begin - if (writefill_st1) begin - $display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); - end else begin - $display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); - end - end else + if (valid_req_st1) begin if (miss_st1) begin - $display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); + $display("%t: cache%0d:%0d data-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, tagmatch=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, use_read_dirty_st1, tags_match, addrline_st1, addrtag_st1); + end else if ((| use_write_enable)) begin + if (writefill_st1) begin + $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), dirty_st1, addrline_st1, addrtag_st1, use_write_data); + end else begin + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, addrtag_st1, wordsel_st1, writeword_st1); + end end else begin - $display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index d3a022b2..e351bf20 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -12,7 +12,6 @@ module VX_tag_data_store #( ) ( input wire clk, input wire reset, - input wire stall_bank_pipe, input wire[`LINE_SELECT_BITS-1:0] read_addr, output wire read_valid, @@ -26,20 +25,14 @@ module VX_tag_data_store #( input wire write_fill, input wire[`LINE_SELECT_BITS-1:0] write_addr, input wire[`TAG_SELECT_BITS-1:0] tag_index, - input wire[`BANK_LINE_WIDTH-1:0] write_data, - input wire fill_sent + input wire[`BANK_LINE_WIDTH-1:0] write_data ); - - reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0]; - reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; - reg [`BANK_LINE_COUNT-1:0] dirty; - reg [`BANK_LINE_COUNT-1:0] valid; - - assign read_valid = valid [read_addr]; - assign read_dirty = dirty [read_addr]; - assign read_dirtyb = dirtyb [read_addr]; - assign read_tag = tag [read_addr]; + reg [`BANK_LINE_COUNT-1:0] dirty; + reg [`BANK_LINE_COUNT-1:0] valid; + assign read_valid = valid[read_addr]; + assign read_dirty = dirty[read_addr]; + wire do_write = (| write_enable); always @(posedge clk) begin @@ -48,30 +41,40 @@ module VX_tag_data_store #( valid[i] <= 0; dirty[i] <= 0; end - end else if (!stall_bank_pipe) begin - if (do_write) begin + end else begin + if (do_write) begin + assert(!invalidate); + dirty[write_addr] <= !write_fill; valid[write_addr] <= 1; - tag [write_addr] <= tag_index; - if (write_fill) begin - dirty[write_addr] <= 0; - dirtyb[write_addr] <= 0; - end else begin - dirty[write_addr] <= 1; - dirtyb[write_addr] <= dirtyb[write_addr] | write_enable; - end - end else if (fill_sent) begin - dirty[write_addr] <= 0; - dirtyb[write_addr] <= 0; - end - - if (invalidate) begin + end else if (invalidate) begin valid[write_addr] <= 0; end end end - wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren; - assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}}; + reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; + always @(posedge clk) begin + if (do_write) begin + dirtyb[write_addr] <= write_fill ? 0 : (dirtyb[write_addr] | write_enable); + end + end + assign read_dirtyb = dirtyb [read_addr]; + + VX_dp_ram #( + .DATAW(`TAG_SELECT_BITS), + .SIZE(`BANK_LINE_COUNT), + .BYTEENW(1), + .BUFFERED(0), + .RWCHECK(1) + ) tags ( + .clk(clk), + .waddr(write_addr), + .raddr(read_addr), + .wren(do_write), + .rden(1'b1), + .din(tag_index), + .dout(read_tag) + ); VX_dp_ram #( .DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8), @@ -79,11 +82,11 @@ module VX_tag_data_store #( .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BUFFERED(0), .RWCHECK(1) - ) dp_ram ( + ) data ( .clk(clk), .waddr(write_addr), .raddr(read_addr), - .wren(ram_wren), + .wren(write_enable), .rden(1'b1), .din(write_data), .dout(read_data) diff --git a/hw/rtl/libs/VX_fair_arbiter.v b/hw/rtl/libs/VX_fair_arbiter.v index 8f9cbbff..1656e830 100644 --- a/hw/rtl/libs/VX_fair_arbiter.v +++ b/hw/rtl/libs/VX_fair_arbiter.v @@ -52,8 +52,8 @@ module VX_fair_arbiter #( .N(N) ) priority_encoder ( .data_in (requests_use), - .data_out (grant_index ), - .valid_out (grant_valid ) + .data_out (grant_index), + .valid_out (grant_valid) ); always @(*) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index bb5010b7..3c5c9a78 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -3,7 +3,7 @@ module VX_generic_queue #( parameter DATAW = 1, parameter SIZE = 2, - parameter BUFFERED = 1, + parameter BUFFERED = 0, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1) ) ( diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index 6d8361ae..f83aeb5b 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -8,21 +8,18 @@ module VX_priority_encoder #( output wire valid_out ); reg [`LOG2UP(N)-1:0] data_out_r; - reg valid_out_r; always @(*) begin - data_out_r = 0; - valid_out_r = 0; + data_out_r = 0; for (integer i = 0; i < N; i++) begin if (data_in[i]) begin data_out_r = `LOG2UP(N)'(i); - valid_out_r = 1; break; end end end assign data_out = data_out_r; - assign valid_out = valid_out_r; + assign valid_out = (| data_in); endmodule \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 88ac722c..dfa3891d 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -10,6 +10,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE @@ -44,7 +45,7 @@ gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG) + verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs --trace-threads 1 $(DBG) gen-st: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) @@ -53,7 +54,7 @@ gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs --trace-threads 1 $(DBG) gen-mt: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 4f6403e7..ffa49e82 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -30,9 +30,9 @@ Simulator::Simulator() { #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedFstC(); + trace_ = new VerilatedVcdC(); vortex_->trace(trace_, 99); - trace_->open("trace.fst"); + trace_->open("trace.vcd"); #endif // reset the device diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index 0dcf8a3b..ba9fee7c 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -5,7 +5,7 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include @@ -76,6 +76,6 @@ private: RAM *ram_; VVortex *vortex_; #ifdef VCD_OUTPUT - VerilatedFstC *trace_; + VerilatedVcdC *trace_; #endif }; \ No newline at end of file diff --git a/hw/unit_tests/cache/Makefile b/hw/unit_tests/cache/Makefile index 9aba4010..93f1fe64 100644 --- a/hw/unit_tests/cache/Makefile +++ b/hw/unit_tests/cache/Makefile @@ -8,6 +8,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ -DDBG_PRINT_CACHE_BANK \ -DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_MSRQ \ + -DDBG_PRINT_CACHE_DATA \ -DDBG_PRINT_DRAM \ -DDBG_PRINT_OPAE