From 7560202f8bd9e95ae09456d10655e997d6e606fa Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 21 Feb 2021 21:47:46 -0800 Subject: [PATCH] cache bank refactoring - removing unecessary core response fifo & restoring single port data access --- hw/rtl/VX_cluster.v | 1 - hw/rtl/VX_config.vh | 25 ------ hw/rtl/VX_mem_unit.v | 3 - hw/rtl/VX_scope.vh | 2 +- hw/rtl/Vortex.v | 1 - hw/rtl/cache/VX_bank.v | 158 +++++++++++++--------------------- hw/rtl/cache/VX_cache.v | 4 - hw/rtl/cache/VX_data_access.v | 44 ++++------ hw/rtl/cache/VX_shared_mem.v | 68 +++++++-------- hw/scripts/scope.json | 2 +- hw/syn/quartus/project.tcl | 36 ++++---- hw/syn/quartus/top/Makefile | 79 ----------------- 12 files changed, 129 insertions(+), 294 deletions(-) delete mode 100644 hw/syn/quartus/top/Makefile diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index e87e4dd3..466512e7 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -170,7 +170,6 @@ module VX_cluster #( .CREQ_SIZE (`L2CREQ_SIZE), .MSHR_SIZE (`L2MSHR_SIZE), .DRSQ_SIZE (`L2DRSQ_SIZE), - .CRSQ_SIZE (`L2CRSQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 5b98ab49..5157f33e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -264,11 +264,6 @@ `define ICREQ_SIZE 4 `endif -// Core Response Queue Size -`ifndef ICRSQ_SIZE -`define ICRSQ_SIZE 4 -`endif - // Miss Handling Register Size `ifndef IMSHR_SIZE `define IMSHR_SIZE `NUM_WARPS @@ -306,11 +301,6 @@ `define DCREQ_SIZE 4 `endif -// Core Response Queue Size -`ifndef DCRSQ_SIZE -`define DCRSQ_SIZE 4 -`endif - // Miss Handling Register Size `ifndef DMSHR_SIZE `define DMSHR_SIZE `LSUQ_SIZE @@ -348,11 +338,6 @@ `define SCREQ_SIZE 4 `endif -// Core Response Queue Size -`ifndef SCRSQ_SIZE -`define SCRSQ_SIZE 4 -`endif - // L2cache Configurable Knobs ///////////////////////////////////////////////// // Size of cache in bytes @@ -370,11 +355,6 @@ `define L2CREQ_SIZE 4 `endif -// Core Response Queue Size -`ifndef L2CRSQ_SIZE -`define L2CRSQ_SIZE 4 -`endif - // Miss Handling Register Size `ifndef L2MSHR_SIZE `define L2MSHR_SIZE 16 @@ -407,11 +387,6 @@ `define L3CREQ_SIZE 4 `endif -// Core Response Queue Size -`ifndef L3CRSQ_SIZE -`define L3CRSQ_SIZE 4 -`endif - // Miss Handling Register Size `ifndef L3MSHR_SIZE `define L3MSHR_SIZE 16 diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 5f499b3f..a8201d6e 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -101,7 +101,6 @@ module VX_mem_unit # ( .CREQ_SIZE (`ICREQ_SIZE), .MSHR_SIZE (`IMSHR_SIZE), .DRSQ_SIZE (`IDRSQ_SIZE), - .CRSQ_SIZE (`ICRSQ_SIZE), .DREQ_SIZE (`IDREQ_SIZE), .WRITE_ENABLE (0), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), @@ -161,7 +160,6 @@ module VX_mem_unit # ( .CREQ_SIZE (`DCREQ_SIZE), .MSHR_SIZE (`DMSHR_SIZE), .DRSQ_SIZE (`DDRSQ_SIZE), - .CRSQ_SIZE (`DCRSQ_SIZE), .DREQ_SIZE (`DDREQ_SIZE), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), @@ -227,7 +225,6 @@ module VX_mem_unit # ( .WORD_SIZE (`SWORD_SIZE), .NUM_REQS (`SNUM_REQUESTS), .CREQ_SIZE (`SCREQ_SIZE), - .CRSQ_SIZE (`SCRSQ_SIZE), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 609b6ccb..27344d77 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -7,7 +7,7 @@ `define SCOPE_ASSIGN(d,s) assign scope_``d = s -`define SCOPE_SIZE 16384 +`define SCOPE_SIZE 4096 `else diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 18e619fc..17c67db5 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -172,7 +172,6 @@ module Vortex ( .CREQ_SIZE (`L3CREQ_SIZE), .MSHR_SIZE (`L3MSHR_SIZE), .DRSQ_SIZE (`L3DRSQ_SIZE), - .CRSQ_SIZE (`L3CRSQ_SIZE), .DREQ_SIZE (`L3DREQ_SIZE), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 23fee003..0d1792a7 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -24,9 +24,6 @@ module VX_bank #( parameter MSHR_SIZE = 1, // DRAM Response Queue Size parameter DRSQ_SIZE = 1, - - // Core Response Queue Size - parameter CRSQ_SIZE = 1, // DRAM Request Queue Size parameter DREQ_SIZE = 1, @@ -154,7 +151,7 @@ module VX_bank #( wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1; wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1; wire [NUM_PORTS-1:0] pmask_st0, pmask_st1; - wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st0, rdata_st1; + wire [`CACHE_LINE_WIDTH-1:0] rdata_st1; wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1; wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1; wire valid_st0, valid_st1; @@ -169,9 +166,11 @@ module VX_bank #( wire mshr_pending_st0; wire is_flush_st0; - wire crsq_alm_full, crsq_push, crsq_pop; - wire dreq_alm_full, dreq_push, dreq_pop; + wire crsq_in_valid, crsq_in_ready, crsq_in_stall; + wire dreq_alm_full; wire drsq_pop; + + wire crsq_in_fire = crsq_in_valid && crsq_in_ready; VX_pending_size #( .SIZE (MSHR_SIZE) @@ -179,7 +178,7 @@ module VX_bank #( .clk (clk), .reset (reset), .push (creq_pop && !creq_rw), - .pop (crsq_push), + .pop (crsq_in_fire), .full (mshr_alm_full), `UNUSED_PIN (empty), `UNUSED_PIN (size) @@ -193,15 +192,16 @@ module VX_bank #( wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1); assign mshr_pop = mshr_pop_unqual - && !crsq_alm_full // ensure core response ready - && !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed + && !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed + && !crsq_in_stall; // ensure core response ready - assign drsq_pop = drsq_pop_unqual; + assign drsq_pop = drsq_pop_unqual + && !crsq_in_stall; // ensure core response ready assign creq_pop = creq_pop_unqual - && !dreq_alm_full // ensure dram request ready - && !crsq_alm_full // ensure core response ready - && !mshr_alm_full; // ensure mshr enqueue ready + && !dreq_alm_full // ensure dram request ready + && !mshr_alm_full // ensure mshr enqueue ready + && !crsq_in_stall; // ensure core response ready assign dram_rsp_ready = drsq_pop; @@ -252,7 +252,7 @@ module VX_bank #( ) pipe_reg0 ( .clk (clk), .reset (reset), - .enable (1'b1), + .enable (!crsq_in_stall), .data_in ({ flush_enable || mshr_pop || drsq_pop || creq_pop, flush_enable, @@ -326,52 +326,15 @@ module VX_bank #( assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (!IN_ORDER_DRAM && is_mshr_st0 && !prev_miss_dep_st0)); - wire [`CACHE_LINE_WIDTH-1:0] rdata_unqual; - - wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); - - wire rw_hazard = valid_st1 && writeen_st1 && (addr_st0 == addr_st1) - && ((`WORDS_PER_LINE == 1) || (is_fill_st1 || (wsel_st0 == wsel_st1))); - - if (`WORDS_PER_LINE > 1) begin - for (genvar p = 0; p < NUM_PORTS; p++) begin - reg [`WORD_WIDTH-1:0] read_data_r; - wire [`WORD_WIDTH-1:0] write_data = wdata_st1[wsel_st0 * `WORD_WIDTH +: `WORD_WIDTH]; - always @(*) begin - read_data_r = rdata_unqual[wsel_st0[p] * `WORD_WIDTH +: `WORD_WIDTH]; - for (integer i = 0; i < WORD_SIZE; i++) begin - if (rw_hazard - && (is_fill_st1 || (WRITE_ENABLE && byteen_st1[p][i])) - && ((NUM_PORTS == 1) || pmask_st1[p])) begin - read_data_r[i * 8 +: 8] = write_data[i * 8 +: 8]; - end - end - end - assign rdata_st0[p] = read_data_r; - end - end else begin - reg [`WORD_WIDTH-1:0] read_data_r; - always @(*) begin - read_data_r = rdata_unqual; - for (integer i = 0; i < WORD_SIZE; i++) begin - if (rw_hazard - && (is_fill_st1 || (WRITE_ENABLE && byteen_st1[0][i]))) begin - read_data_r[i * 8 +: 8] = wdata_st1[i * 8 +: 8]; - end - end - end - assign rdata_st0[0] = read_data_r; - end - VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`WORD_WIDTH + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), - .enable (1'b1), - .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, rdata_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, rdata_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) + .enable (!crsq_in_stall), + .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -382,6 +345,8 @@ module VX_bank #( end `endif + wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); + wire crsq_push_st1 = !is_fill_st1 && !mem_rw_st1 && !miss_st1 && !force_miss_st1; wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1); @@ -424,27 +389,25 @@ module VX_bank #( .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .debug_pc_r (debug_pc_st0), - .debug_wid_r (debug_wid_st0), - .debug_pc_w (debug_pc_st1), - .debug_wid_w (debug_wid_st1), + .debug_pc (debug_pc_st1), + .debug_wid (debug_wid_st1), `endif + .addr (addr_st1), + // reading - .readen (valid_st0 && !is_fill_st0 && !mem_rw_st0), - .raddr (addr_st0), - .rdata (rdata_unqual), + .readen (valid_st1 && !is_fill_st1 && !mem_rw_st1), + .rdata (rdata_st1), // writing .writeen (valid_st1 && writeen_st1), .is_fill (is_fill_st1), - .byteen (line_byteen_st1), - .waddr (addr_st1), + .byteen (line_byteen_st1), .wdata (wdata_st1) ); assign mshr_push = valid_st1 && mshr_push_st1; - wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1; + wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready; wire mshr_restore = !IN_ORDER_DRAM && is_mshr_st1; `RUNTIME_ASSERT(!IN_ORDER_DRAM || !(mshr_push && mshr_restore), ("Oops!")) @@ -508,50 +471,49 @@ module VX_bank #( wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data; wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid; wire [CORE_TAG_WIDTH-1:0] crsq_tag; - wire crsq_empty; - assign crsq_push = valid_st1 && crsq_push_st1; - assign crsq_pop = core_rsp_valid && core_rsp_ready; - assign crsq_data = rdata_st1; + assign crsq_in_valid = valid_st1 && crsq_push_st1; + assign crsq_in_stall = crsq_in_valid && !crsq_in_ready; + assign crsq_pmask = pmask_st1; assign crsq_tid = req_tid_st1; assign crsq_tag = tag_st1; - VX_fifo_queue #( - .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), - .SIZE (CRSQ_SIZE), - .ALM_FULL (CRSQ_SIZE-2), - .BUFFERED (1) - ) core_rsp_queue ( - .clk (clk), - .reset (reset), - .push (crsq_push), - .pop (crsq_pop), - .data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}), - .data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}), - .empty (crsq_empty), - .alm_full (crsq_alm_full), - `UNUSED_PIN (full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (size) - ); + if (`WORDS_PER_LINE > 1) begin + for (genvar p = 0; p < NUM_PORTS; ++p) begin + assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH]; + end + end else begin + assign crsq_data = rdata_st1; + end - assign core_rsp_valid = !crsq_empty; + VX_skid_buffer #( + .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), + .BUFFERED (1) + ) core_rsp_req ( + .clk (clk), + .reset (reset), + .valid_in (crsq_in_valid), + .data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}), + .ready_in (crsq_in_ready), + .valid_out (core_rsp_valid), + .data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}), + .ready_out (core_rsp_ready) + ); // Enqueue DRAM request wire [CACHE_LINE_SIZE-1:0] dreq_byteen; wire [`LINE_ADDR_WIDTH-1:0] dreq_addr; wire [`CACHE_LINE_WIDTH-1:0] dreq_data; - wire dreq_empty, writeback; + wire dreq_push, dreq_pop, dreq_empty, dreq_rw; assign dreq_push = valid_st1 && dreq_push_st1; assign dreq_pop = dram_req_valid && dram_req_ready; - assign writeback = WRITE_ENABLE && do_writeback_st1; - - assign dreq_byteen = writeback ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}}; + assign dreq_rw = WRITE_ENABLE && do_writeback_st1; + assign dreq_byteen = dreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}}; assign dreq_addr = addr_st1; assign dreq_data = wdata_st1; @@ -564,7 +526,7 @@ module VX_bank #( .reset (reset), .push (dreq_push), .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), + .data_in ({dreq_rw, dreq_byteen, dreq_addr, dreq_data}), .data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), .alm_full (dreq_alm_full), @@ -582,7 +544,7 @@ module VX_bank #( `SCOPE_ASSIGN (miss_st0, miss_st0); `SCOPE_ASSIGN (force_miss_st0, force_miss_st0); `SCOPE_ASSIGN (mshr_push, mshr_push); - `SCOPE_ASSIGN (crsq_alm_full, crsq_alm_full); + `SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall); `SCOPE_ASSIGN (dreq_alm_full, dreq_alm_full); `SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full); `SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); @@ -591,7 +553,7 @@ module VX_bank #( `ifdef PERF_ENABLE assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1; assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1; - assign perf_pipe_stalls = crsq_alm_full || dreq_alm_full || mshr_alm_full; + assign perf_pipe_stalls = crsq_in_stall || dreq_alm_full || mshr_alm_full; assign perf_mshr_stalls = mshr_alm_full; `endif @@ -604,8 +566,8 @@ module VX_bank #( $display("%t: cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); assert(!is_mshr_st1); end - if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin - $display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_alm_full, dreq_alm_full, mshr_alm_full); + if (crsq_in_stall || dreq_alm_full || mshr_alm_full) begin + $display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, dreq_alm_full, mshr_alm_full); end if (flush_enable) begin $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID)); @@ -622,7 +584,7 @@ module VX_bank #( else $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel); end - if (crsq_push) begin + if (crsq_in_fire) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1); end if (dreq_push) begin diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index f3f3bf11..f0b1bbff 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -23,9 +23,6 @@ module VX_cache #( parameter MSHR_SIZE = 16, // DRAM Response Queue Size parameter DRSQ_SIZE = 4, - - // Core Response Queue Size - parameter CRSQ_SIZE = 4, // DRAM Request Queue Size parameter DREQ_SIZE = 4, @@ -298,7 +295,6 @@ module VX_cache #( .CREQ_SIZE (CREQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .DRSQ_SIZE (DRSQ_SIZE), - .CRSQ_SIZE (CRSQ_SIZE), .DREQ_SIZE (DREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 9783ef1c..01d600a1 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -19,49 +19,43 @@ module VX_data_access #( `ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc_r, - input wire[`NW_BITS-1:0] debug_wid_r, - input wire[31:0] debug_pc_w, - input wire[`NW_BITS-1:0] debug_wid_w, + input wire[31:0] debug_pc, + input wire[`NW_BITS-1:0] debug_wid, `IGNORE_WARNINGS_END `endif +`IGNORE_WARNINGS_BEGIN + input wire[`LINE_ADDR_WIDTH-1:0] addr, +`IGNORE_WARNINGS_END + // reading input wire readen, -`IGNORE_WARNINGS_BEGIN - input wire[`LINE_ADDR_WIDTH-1:0] raddr, -`IGNORE_WARNINGS_END output wire [`CACHE_LINE_WIDTH-1:0] rdata, // writing input wire writeen, input wire is_fill, - input wire [CACHE_LINE_SIZE-1:0] byteen, -`IGNORE_WARNINGS_BEGIN - input wire[`LINE_ADDR_WIDTH-1:0] waddr, -`IGNORE_WARNINGS_END + input wire [CACHE_LINE_SIZE-1:0] byteen, input wire [`CACHE_LINE_WIDTH-1:0] wdata ); `UNUSED_VAR (reset) `UNUSED_VAR (readen) - wire [`LINE_SELECT_BITS-1:0] line_raddr, line_waddr; + wire [`LINE_SELECT_BITS-1:0] line_addr; wire [CACHE_LINE_SIZE-1:0] byte_enable; - assign line_raddr = raddr[`LINE_SELECT_BITS-1:0]; - assign line_waddr = waddr[`LINE_SELECT_BITS-1:0]; + assign line_addr = addr[`LINE_SELECT_BITS-1:0]; assign byte_enable = (WRITE_ENABLE && !is_fill) ? byteen : {CACHE_LINE_SIZE{1'b1}}; - VX_dp_ram #( - .DATAW(CACHE_LINE_SIZE * 8), - .SIZE(`LINES_PER_BANK), - .BYTEENW(CACHE_LINE_SIZE), - .RWCHECK(1) + VX_sp_ram #( + .DATAW (CACHE_LINE_SIZE * 8), + .SIZE (`LINES_PER_BANK), + .BYTEENW (CACHE_LINE_SIZE), + .RWCHECK (1) ) data_store ( .clk(clk), - .raddr(line_raddr), - .waddr(line_waddr), - .wren(writeen), + .addr(line_addr), + .wren(writeen), .byteen(byte_enable), .rden(1'b1), .din(wdata), @@ -72,13 +66,13 @@ module VX_data_access #( always @(posedge clk) begin if (writeen) begin if (is_fill) begin - $display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), line_waddr, wdata); + $display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), debug_wid_w, debug_pc_w, byte_enable, line_waddr, wdata); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata); end end if (readen) begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr, BANK_ID), debug_wid_r, debug_pc_r, line_raddr, rdata); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata); end end `endif diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 544de776..b3d192c7 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -15,9 +15,6 @@ module VX_shared_mem #( // Core Request Queue Size parameter CREQ_SIZE = 4, - // Core Response Queue Size - parameter CRSQ_SIZE = 4, - // core request tag size parameter CORE_TAG_WIDTH = 1, @@ -113,10 +110,10 @@ module VX_shared_mem #( wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire creq_push, creq_pop, creq_empty, creq_full; - wire crsq_full; + wire crsq_in_ready; assign creq_push = (| core_req_valid) && !creq_full; - assign creq_pop = ~creq_empty && ~crsq_full; + assign creq_pop = ~creq_empty && crsq_in_ready; assign per_bank_core_req_ready_unqual = ~creq_full; @@ -167,7 +164,7 @@ module VX_shared_mem #( ) data ( .clk (clk), .addr (per_bank_core_req_addr[i]), - .wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && ~crsq_full), + .wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready), .byteen (per_bank_core_req_byteen[i]), .rden (1'b1), .din (per_bank_core_req_data[i]), @@ -175,58 +172,53 @@ module VX_shared_mem #( ); end - reg [NUM_REQS-1:0] core_rsp_valid_unqual; - reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; - reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; + reg [NUM_REQS-1:0] core_rsp_valids_in; + reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in; + reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in; always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_data_unqual = 'x; - core_rsp_tag_unqual = 'x; + core_rsp_valids_in = 0; + core_rsp_data_in = 'x; + core_rsp_tag_in = 'x; for (integer i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_req_valid[i]) begin - core_rsp_valid_unqual[per_bank_core_req_tid[i]] = 1; - core_rsp_data_unqual[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; - core_rsp_tag_unqual = per_bank_core_req_tag[i]; + core_rsp_valids_in[per_bank_core_req_tid[i]] = 1; + core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; + core_rsp_tag_in = per_bank_core_req_tag[i]; end end end `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_unqual[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin assign {debug_pc_st0, debug_wid_st0} = 0; end `endif - wire [NUM_REQS-1:0] core_rsp_valid_tmask; - wire crsq_push, crsq_pop, crsq_empty; + wire [NUM_REQS-1:0] core_rsp_valids_out; + wire core_rsp_valid_out; wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw); - assign crsq_push = ~creq_empty && ~core_rsp_rw && ~crsq_full; - assign crsq_pop = ~crsq_empty && core_rsp_ready; - - VX_fifo_queue #( - .DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH), - .SIZE (CRSQ_SIZE), + wire crsq_in_valid = ~creq_empty && ~core_rsp_rw; + + VX_skid_buffer #( + .DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH), .BUFFERED (1) - ) core_rsp_queue ( - .clk (clk), - .reset (reset), - .push (crsq_push), - .pop (crsq_pop), - .data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), - .data_out ({core_rsp_valid_tmask, core_rsp_data, core_rsp_tag}), - .empty (crsq_empty), - .full (crsq_full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (size) + ) core_rsp_req ( + .clk (clk), + .reset (reset), + .valid_in (crsq_in_valid), + .data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}), + .ready_in (crsq_in_ready), + .valid_out (core_rsp_valid_out), + .data_out ({core_rsp_valids_out, core_rsp_data, core_rsp_tag}), + .ready_out (core_rsp_ready) ); - assign core_rsp_valid = core_rsp_valid_tmask & {NUM_REQS{~crsq_empty}}; + assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}}; `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin @@ -280,4 +272,4 @@ module VX_shared_mem #( assign perf_cache_if.crsp_stalls = perf_crsp_stalls; `endif -endmodule +endmodule \ No newline at end of file diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index b39c71cf..945f9db6 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -212,7 +212,7 @@ "miss_st0": 1, "force_miss_st0": 1, "mshr_push": 1, - "?crsq_alm_full": 1, + "?crsq_in_stall": 1, "?dreq_alm_full": 1, "?mshr_alm_full": 1 } diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index f581bf90..fccc5439 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -41,25 +41,25 @@ set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON -set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON -set_global_assignment -name USE_HIGH_SPEED_ADDER ON -set_global_assignment -name MUX_RESTRUCTURE ON +#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON +#set_global_assignment -name USE_HIGH_SPEED_ADDER ON +#set_global_assignment -name MUX_RESTRUCTURE ON -set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED -set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" -set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS -set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0 -set_global_assignment -name FITTER_EFFORT "STANDARD FIT" -set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM -set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON -set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON -set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON -set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON -set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 -set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 -set_global_assignment -name SEED 1 +#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" +#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS +#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0 +#set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON +#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +#set_global_assignment -name SEED 1 switch $opts(family) { "Arria 10" { diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile deleted file mode 100644 index 36d350b9..00000000 --- a/hw/syn/quartus/top/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG -FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 - -#FAMILY = "Stratix 10" -#DEVICE = 1SX280HN2F43E2VG -#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 - -PROJECT = vortex_afu -TOP_LEVEL_ENTITY = vortex_afu -SRC_FILE = vortex_afu.sv - -RTL_DIR=../../../rtl -FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) - -PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf - -# Executable Configuration -SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 -FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on -ASM_ARGS = -STA_ARGS = --parallel --do_report_timing - -# Build targets -all: $(PROJECT).sta.rpt - -syn: $(PROJECT).syn.rpt - -fit: $(PROJECT).fit.rpt - -asm: $(PROJECT).asm.rpt - -sta: $(PROJECT).sta.rpt - -smart: smart.log - -# Target implementations -STAMP = echo done > - -$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) - quartus_syn $(PROJECT) $(SYN_ARGS) - $(STAMP) fit.chg - -$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt - quartus_fit $(PROJECT) $(FIT_ARGS) - $(STAMP) asm.chg - $(STAMP) sta.chg - -$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt - quartus_asm $(PROJECT) $(ASM_ARGS) - -$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt - quartus_sta $(PROJECT) $(STA_ARGS) - -smart.log: $(PROJECT_FILES) - quartus_sh --determine_smart_action $(PROJECT) > smart.log - -# Project initialization -$(PROJECT_FILES): - quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" - -syn.chg: - $(STAMP) syn.chg - -fit.chg: - $(STAMP) fit.chg - -sta.chg: - $(STAMP) sta.chg - -asm.chg: - $(STAMP) asm.chg - -program: $(PROJECT).sof - quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" - -clean: - rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox