From 762b8e2e3e0fe8707e2c90d9e16f9e193d18182e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 4 Jan 2021 12:49:40 -0500 Subject: [PATCH] fixed cache mshr critical path --- driver/opae/vlsim/opae_sim.cpp | 2 +- hw/rtl/afu/vortex_afu.sv | 17 +- hw/rtl/afu/vortex_afu.vh | 1008 ++------------------------------ hw/rtl/cache/VX_bank.v | 108 ++-- hw/rtl/cache/VX_data_access.v | 50 +- hw/rtl/cache/VX_data_store.v | 3 +- hw/rtl/cache/VX_miss_resrv.v | 123 ++-- hw/simulate/simulator.cpp | 2 +- 8 files changed, 210 insertions(+), 1103 deletions(-) diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index b0391efd..4f713358 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -11,7 +11,7 @@ #define RESET_DELAY 2 #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 24 +#define DRAM_LATENCY 300 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16 diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 933473f4..a39b8135 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -37,6 +37,8 @@ module vortex_afu #( output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select ); +localparam RESET_DELAY = 2; + localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH); @@ -324,6 +326,15 @@ wire cmd_write_done; wire cmd_csr_done; wire cmd_run_done; +reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr; +always @(posedge clk) begin + if (state == STATE_IDLE) begin + vx_reset_ctr <= 0; + end else if (state == STATE_START) begin + vx_reset_ctr <= vx_reset_ctr + 1; + end +end + always @(posedge clk) begin if (reset) begin state <= STATE_IDLE; @@ -392,8 +403,10 @@ always @(posedge clk) begin end end - STATE_START: begin // vortex reset cycle - state <= STATE_RUN; + STATE_START: begin + // vortex reset cycles + if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) + state <= STATE_RUN; end STATE_RUN: begin diff --git a/hw/rtl/afu/vortex_afu.vh b/hw/rtl/afu/vortex_afu.vh index a39b8135..564fd96c 100644 --- a/hw/rtl/afu/vortex_afu.vh +++ b/hw/rtl/afu/vortex_afu.vh @@ -1,972 +1,36 @@ -`ifndef NOPAE -`include "platform_if.vh" -import local_mem_cfg_pkg::*; -`include "afu_json_info.vh" -`else -`include "vortex_afu.vh" -/* verilator lint_off IMPORTSTAR */ -import ccip_if_pkg::*; -import local_mem_cfg_pkg::*; -/* verilator lint_on IMPORTSTAR */ -`endif - -`include "VX_define.vh" - -module vortex_afu #( - parameter NUM_LOCAL_MEM_BANKS = 2 -) ( - // global signals - input clk, - input reset, - - // IF signals between CCI and AFU - input t_if_ccip_Rx cp2af_sRxPort, - output t_if_ccip_Tx af2cp_sTxPort, - - // Avalon signals for local memory access - output t_local_mem_data avs_writedata, - input t_local_mem_data avs_readdata, - output t_local_mem_addr avs_address, - input logic avs_waitrequest, - output logic avs_write, - output logic avs_read, - output t_local_mem_byte_mask avs_byteenable, - output t_local_mem_burst_cnt avs_burstcount, - input avs_readdatavalid, - - output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select -); - -localparam RESET_DELAY = 2; - -localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); -localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); -localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH); - -localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH); -localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW); - -localparam AVS_RD_QUEUE_SIZE = 16; -localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX; - -localparam CCI_RD_WINDOW_SIZE = 8; -localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; -localparam CCI_RW_QUEUE_SIZE = 1024; - -localparam AFU_ID_L = 16'h0002; // AFU ID Lower -localparam AFU_ID_H = 16'h0004; // AFU ID Higher - -localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; -localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; -localparam CMD_RUN = `AFU_IMAGE_CMD_RUN; -localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ; -localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE; - -localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE; -localparam MMIO_IO_ADDR = `AFU_IMAGE_MMIO_IO_ADDR; -localparam MMIO_MEM_ADDR = `AFU_IMAGE_MMIO_MEM_ADDR; -localparam MMIO_DATA_SIZE = `AFU_IMAGE_MMIO_DATA_SIZE; -localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS; - -localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ; -localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE; - -localparam MMIO_CSR_CORE = `AFU_IMAGE_MMIO_CSR_CORE; -localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; -localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; -localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; - -localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); -localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW; - -localparam STATE_IDLE = 0; -localparam STATE_READ = 1; -localparam STATE_WRITE = 2; -localparam STATE_START = 3; -localparam STATE_RUN = 4; -localparam STATE_CSR_READ = 5; -localparam STATE_CSR_WRITE = 6; -localparam STATE_MAX_VALUE = 7; -localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); - -`ifdef SCOPE -`SCOPE_DECL_SIGNALS -`endif - -wire [127:0] afu_id = `AFU_ACCEL_UUID; - -reg [STATE_WIDTH-1:0] state; - -// Vortex ports /////////////////////////////////////////////////////////////// - -wire vx_dram_req_valid; -wire vx_dram_req_rw; -wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; -wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; -wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; -wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; -wire vx_dram_req_ready; - -wire vx_dram_rsp_valid; -wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; -wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; -wire vx_dram_rsp_ready; - -wire vx_csr_io_req_valid; -wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; -wire [11:0] vx_csr_io_req_addr; -wire vx_csr_io_req_rw; -wire [31:0] vx_csr_io_req_data; -wire vx_csr_io_req_ready; - -wire vx_csr_io_rsp_valid; -wire [31:0] vx_csr_io_rsp_data; -wire vx_csr_io_rsp_ready; - -wire vx_busy; - -reg vx_reset; -reg vx_enabled; - -// CMD variables ////////////////////////////////////////////////////////////// - -t_ccip_clAddr cmd_io_addr; -reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; -reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; - -`ifdef SCOPE -wire [63:0] cmd_scope_rdata; -wire [63:0] cmd_scope_wdata; -wire cmd_scope_read; -wire cmd_scope_write; -`endif - -reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -reg [11:0] cmd_csr_addr; -reg [31:0] cmd_csr_rdata; -reg [31:0] cmd_csr_wdata; - -// MMIO controller //////////////////////////////////////////////////////////// - -`IGNORE_WARNINGS_BEGIN -t_ccip_c0_ReqMmioHdr mmio_hdr; -`IGNORE_WARNINGS_END -assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); - -`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) - -t_if_ccip_c2_Tx mmio_tx; -assign af2cp_sTxPort.c2 = mmio_tx; - -`ifdef SCOPE -assign cmd_scope_wdata = 64'(cp2af_sRxPort.c0.data); -assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmio_hdr.address); -assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address); -`endif - -/* -`DEBUG_BEGIN -wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; -wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; -wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid; -wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid; -wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull; -wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull; -wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; -wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; -wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; -wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata; -`DEBUG_END -*/ - -wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; - -`ifdef SCOPE -reg scope_start; -`endif - -// disable assertions until reset -`ifndef VERILATOR -initial begin - $assertoff; -end -`endif - -always @(posedge clk) begin - if (reset) begin - `ifndef VERILATOR - $asserton; // enable assertions - `endif - mmio_tx.mmioRdValid <= 0; - mmio_tx.hdr <= 0; - `ifdef SCOPE - scope_start <= 0; - `endif - end else begin - mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; - mmio_tx.hdr.tid <= mmio_hdr.tid; - `ifdef SCOPE - scope_start <= cp2af_sRxPort.c0.mmioWrValid; - `endif - end - - // serve MMIO write request - if (cp2af_sRxPort.c0.mmioWrValid) begin - case (mmio_hdr.address) - MMIO_IO_ADDR: begin - cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_MEM_ADDR: begin - cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_DATA_SIZE: begin - cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CMD_TYPE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_WRITE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)); - `endif - end - `endif - MMIO_CSR_CORE: begin - cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_ADDR: begin - cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_DATA: begin - cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); - `endif - end - default: begin - `ifdef DBG_PRINT_OPAE - $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); - `endif - end - endcase - end - - // serve MMIO read requests - if (cp2af_sRxPort.c0.mmioRdValid) begin - case (mmio_hdr.address) - // AFU header - 16'h0000: mmio_tx.data <= { - 4'b0001, // Feature type = AFU - 8'b0, // reserved - 4'b0, // afu minor revision = 0 - 7'b0, // reserved - 1'b1, // end of DFH list = 1 - 24'b0, // next DFH offset = 0 - 4'b0, // afu major revision = 0 - 12'b0 // feature ID = 0 - }; - AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low - AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi - 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved - MMIO_STATUS: begin - mmio_tx.data <= 64'(state); - `ifdef DBG_PRINT_OPAE - if (state != STATE_WIDTH'(mmio_tx.data)) begin - $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); - end - `endif - end - MMIO_CSR_READ: begin - mmio_tx.data <= 64'(cmd_csr_rdata); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_READ: begin - mmio_tx.data <= cmd_scope_rdata; - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata); - `endif - end - `endif - default: begin - mmio_tx.data <= 64'h0; - `ifdef DBG_PRINT_OPAE - $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); - `endif - end - endcase - end -end - -// COMMAND FSM //////////////////////////////////////////////////////////////// - -wire cmd_read_done; -wire cmd_write_done; -wire cmd_csr_done; -wire cmd_run_done; - -reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr; -always @(posedge clk) begin - if (state == STATE_IDLE) begin - vx_reset_ctr <= 0; - end else if (state == STATE_START) begin - vx_reset_ctr <= vx_reset_ctr + 1; - end -end - -always @(posedge clk) begin - if (reset) begin - state <= STATE_IDLE; - vx_reset <= 0; - vx_enabled <= 0; - end else begin - - vx_reset <= 0; - - case (state) - STATE_IDLE: begin - case (cmd_type) - CMD_MEM_READ: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE READ: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size); - `endif - state <= STATE_READ; - end - CMD_MEM_WRITE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE WRITE: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size); - `endif - state <= STATE_WRITE; - end - CMD_RUN: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE START", $time); - `endif - vx_reset <= 1; - vx_enabled <= 1; - state <= STATE_START; - end - CMD_CSR_READ: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr); - `endif - state <= STATE_CSR_READ; - end - CMD_CSR_WRITE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE CSR_WRITE: addr=%0h data=%0d", $time, cmd_csr_addr, cmd_csr_wdata); - `endif - state <= STATE_CSR_WRITE; - end - default: begin - state <= state; - end - endcase - end - - STATE_READ: begin - if (cmd_read_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - STATE_WRITE: begin - if (cmd_write_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - STATE_START: begin - // vortex reset cycles - if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) - state <= STATE_RUN; - end - - STATE_RUN: begin - if (cmd_run_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - STATE_CSR_READ: begin - if (cmd_csr_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - STATE_CSR_WRITE: begin - if (cmd_csr_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - - default: begin - state <= state; - end - - endcase - end -end - -// AVS Controller ///////////////////////////////////////////////////////////// - -wire dram_req_valid; -wire dram_req_rw; -t_local_mem_byte_mask dram_req_byteen; -t_local_mem_addr dram_req_addr; -t_local_mem_data dram_req_data; -wire [AVS_REQ_TAGW:0] dram_req_tag; -wire dram_req_ready; - -wire dram_rsp_valid; -t_local_mem_data dram_rsp_data; -wire [AVS_REQ_TAGW:0] dram_rsp_tag; -wire dram_rsp_ready; - -wire cci_dram_req_valid; -wire cci_dram_req_rw; -t_local_mem_byte_mask cci_dram_req_byteen; -t_local_mem_addr cci_dram_req_addr; -t_local_mem_data cci_dram_req_data; -wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag; -wire cci_dram_req_ready; - -wire cci_dram_rsp_valid; -t_local_mem_data cci_dram_rsp_data; -wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag; -wire cci_dram_rsp_ready; - -wire vx_dram_req_valid_qual; -t_local_mem_addr vx_dram_req_addr_qual; -t_local_mem_byte_mask vx_dram_req_byteen_qual; -t_local_mem_data vx_dram_req_data_qual; -wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual; - -wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual; -wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual; - -wire cci_dram_rd_req_valid, cci_dram_wr_req_valid; -wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; -wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; - -//-- - -assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid; - -assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr; - -assign cci_dram_req_rw = (CMD_MEM_WRITE == state); - -assign cci_dram_req_byteen = {64{1'b1}}; - -assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; - -assign cci_dram_req_tag = AVS_REQ_TAGW'(0); - -`UNUSED_VAR (cci_dram_rsp_tag) - -//-- - -assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled; - -assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; - -if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]; - wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0]; - assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3)); - assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW); - assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx}; - assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx]; -end else begin - assign vx_dram_req_byteen_qual = vx_dram_req_byteen; - assign vx_dram_req_tag_qual = vx_dram_req_tag; - assign vx_dram_req_data_qual = vx_dram_req_data; - assign vx_dram_rsp_data = vx_dram_rsp_data_unqual; -end - -assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX]; - -//-- - -VX_mem_arb #( - .NUM_REQS (2), - .DATA_WIDTH ($bits(t_local_mem_data)), - .ADDR_WIDTH ($bits(t_local_mem_addr)), - .TAG_IN_WIDTH (AVS_REQ_TAGW), - .TAG_OUT_WIDTH (AVS_REQ_TAGW+1) -) dram_arb ( - .clk (clk), - .reset (reset), - - // Source request - .req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}), - .req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}), - .req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}), - .req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}), - .req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}), - .req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}), - .req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}), - - // DRAM request - .req_valid_out (dram_req_valid), - .req_rw_out (dram_req_rw), - .req_byteen_out (dram_req_byteen), - .req_addr_out (dram_req_addr), - .req_data_out (dram_req_data), - .req_tag_out (dram_req_tag), - .req_ready_out (dram_req_ready), - - // Source response - .rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}), - .rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}), - .rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}), - .rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}), - - // DRAM response - .rsp_valid_in (dram_rsp_valid), - .rsp_tag_in (dram_rsp_tag), - .rsp_data_in (dram_rsp_data), - .rsp_ready_in (dram_rsp_ready) -); - -//-- - -VX_avs_wrapper #( - .AVS_DATAW ($bits(t_local_mem_data)), - .AVS_ADDRW ($bits(t_local_mem_addr)), - .AVS_BURSTW ($bits(t_local_mem_burst_cnt)), - .AVS_BANKS (NUM_LOCAL_MEM_BANKS), - .REQ_TAGW (AVS_REQ_TAGW+1), - .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE) -) avs_wrapper ( - .clk (clk), - .reset (reset), - - // AVS bus - .avs_writedata (avs_writedata), - .avs_readdata (avs_readdata), - .avs_address (avs_address), - .avs_waitrequest (avs_waitrequest), - .avs_write (avs_write), - .avs_read (avs_read), - .avs_byteenable (avs_byteenable), - .avs_burstcount (avs_burstcount), - .avs_readdatavalid (avs_readdatavalid), - .avs_bankselect (mem_bank_select), - - // DRAM request - .dram_req_valid (dram_req_valid), - .dram_req_rw (dram_req_rw), - .dram_req_byteen (dram_req_byteen), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_tag (dram_req_tag), - .dram_req_ready (dram_req_ready), - - // DRAM response - .dram_rsp_valid (dram_rsp_valid), - .dram_rsp_data (dram_rsp_data), - .dram_rsp_tag (dram_rsp_tag), - .dram_rsp_ready (dram_rsp_ready) -); - -// CCI-P Read Request /////////////////////////////////////////////////////////// - -reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; -wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr; -reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; -wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual; -wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; -reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; -t_ccip_clAddr cci_rd_req_addr; - -reg cci_rd_req_enable, cci_rd_req_wait; - -wire cci_rdq_push, cci_rdq_pop; -wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; -wire cci_rdq_empty; - -always @(*) begin - af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); - af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; - af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); -end - -wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready; - -wire cci_rd_req_fire = af2cp_sTxPort.c0.valid; -wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; - -assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); -assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); - -assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); - -assign cci_rdq_pop = cci_dram_wr_req_fire; -assign cci_rdq_push = cci_rd_rsp_fire; -assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; - -assign cci_pending_reads_next = cci_pending_reads - + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : - (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); - -assign cci_dram_wr_req_valid = !cci_rdq_empty; - -assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); - -assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; - -assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size); - -// Send read requests to CCI -always @(posedge clk) begin - if (reset) begin - cci_rd_req_addr <= 0; - cci_rd_req_ctr <= 0; - cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; - cci_rd_req_enable <= 0; - cci_rd_req_wait <= 0; - cci_dram_wr_req_ctr <= 0; - cci_dram_wr_req_addr_unqual <= 0; - end - else begin - if ((STATE_IDLE == state) - && (CMD_MEM_WRITE == cmd_type)) begin - cci_rd_req_addr <= cmd_io_addr; - cci_rd_req_ctr <= 0; - cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; - cci_rd_req_enable <= (cmd_data_size != 0); - cci_rd_req_wait <= 0; - cci_dram_wr_req_ctr <= 0; - cci_dram_wr_req_addr_unqual <= cmd_mem_addr; - end - - cci_rd_req_enable <= (STATE_WRITE == state) - && (cci_rd_req_ctr_next != cmd_data_size) - && (cci_pending_reads_next != CCI_RD_QUEUE_SIZE) - && !cp2af_sRxPort.c0TxAlmFull; - - if (cci_rd_req_fire) begin - cci_rd_req_addr <= cci_rd_req_addr + 1; - cci_rd_req_ctr <= cci_rd_req_ctr_next; - if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 1; // end current request batch - end - `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); - `endif - end - - if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); - if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 0; // restart new request batch - end - `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data); - `endif - end - - /*if (cci_rdq_pop) begin - `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next); - `endif - end*/ - - if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); - cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); - end - - cci_pending_reads <= cci_pending_reads_next; - end -end - -VX_fifo_queue #( - .DATAW (CCI_RD_RQ_DATAW), - .SIZE (CCI_RD_QUEUE_SIZE), - .FASTRAM (1) -) cci_rd_req_queue ( - .clk (clk), - .reset (reset), - .push (cci_rdq_push), - .pop (cci_rdq_pop), - .data_in (cci_rdq_din), - .data_out (cci_rdq_dout), - .empty (cci_rdq_empty), - `UNUSED_PIN (full), - `UNUSED_PIN (size) -); - -`ifdef VERILATOR -`DEBUG_BLOCK( - reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; - always @(posedge clk) begin - if (reset) begin - dbg_cci_rd_rsp_mask <= 0; - end else begin - if (cci_rd_rsp_fire) begin - if (cci_rd_rsp_ctr == 0) begin - dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); - end else begin - assert(!dbg_cci_rd_rsp_mask[cci_rd_rsp_tag]); - dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; - end - end - end - end -) -`endif - -// CCI-P Write Request ////////////////////////////////////////////////////////// - -reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; -wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr; -reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual; -t_ccip_clAddr cci_wr_req_addr; - -always @(*) begin - af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); - af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; - af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode - af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data); -end - -wire cci_wr_req_fire = af2cp_sTxPort.c1.valid; -wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; - -wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; - -assign cci_pending_writes_next = cci_pending_writes - + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : - (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); - -assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0); - -assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual; - -assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid; -assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull; - -assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); - -// Send write requests to CCI -always @(posedge clk) -begin - if (reset) begin - cci_wr_req_addr <= 0; - cci_wr_req_ctr <= 0; - cci_pending_writes <= 0; - cci_dram_rd_req_ctr <= 0; - cci_dram_rd_req_addr_unqual <= 0; - end - else begin - if ((STATE_IDLE == state) - && (CMD_MEM_READ == cmd_type)) begin - cci_wr_req_addr <= cmd_io_addr; - cci_wr_req_ctr <= cmd_data_size; - cci_pending_writes <= 0; - cci_dram_rd_req_ctr <= cmd_data_size; - cci_dram_rd_req_addr_unqual <= cmd_mem_addr; - end - - if (cci_wr_req_fire) begin - assert(cci_wr_req_ctr != 0); - cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); - cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); - `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data); - `endif - end - - /*`ifdef DBG_PRINT_OPAE - if (cci_wr_rsp_fire) begin - $display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next); - end - `endif*/ - - if (cci_dram_rd_req_fire) begin - cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1); - cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); - end - - cci_pending_writes <= cci_pending_writes_next; - end -end - -// CSRs /////////////////////////////////////////////////////////////////////// - -reg csr_io_req_sent; - -assign vx_csr_io_req_valid = !csr_io_req_sent - && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); -assign vx_csr_io_req_coreid = cmd_csr_core; -assign vx_csr_io_req_rw = (STATE_CSR_WRITE == state); -assign vx_csr_io_req_addr = cmd_csr_addr; -assign vx_csr_io_req_data = cmd_csr_wdata; - -assign vx_csr_io_rsp_ready = 1; - -assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; - -always @(posedge clk) begin - if (reset) begin - csr_io_req_sent <= 0; - end else begin - if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin - csr_io_req_sent <= 1; - end - if (cmd_csr_done) begin - csr_io_req_sent <= 0; - end - end - - if ((STATE_CSR_READ == state) - && vx_csr_io_rsp_ready - && vx_csr_io_rsp_valid) begin - cmd_csr_rdata <= vx_csr_io_rsp_data; - end -end - -// Vortex ///////////////////////////////////////////////////////////////////// - -assign cmd_run_done = !vx_busy; - -Vortex #() vortex ( - `SCOPE_BIND_afu_vortex - - .clk (clk), - .reset (reset | vx_reset), - - // DRAM request - .dram_req_valid (vx_dram_req_valid), - .dram_req_rw (vx_dram_req_rw), - .dram_req_byteen(vx_dram_req_byteen), - .dram_req_addr (vx_dram_req_addr), - .dram_req_data (vx_dram_req_data), - .dram_req_tag (vx_dram_req_tag), - .dram_req_ready (vx_dram_req_ready), - - // DRAM response - .dram_rsp_valid (vx_dram_rsp_valid), - .dram_rsp_data (vx_dram_rsp_data), - .dram_rsp_tag (vx_dram_rsp_tag), - .dram_rsp_ready (vx_dram_rsp_ready), - - // CSR Request - .csr_req_valid (vx_csr_io_req_valid), - .csr_req_coreid (vx_csr_io_req_coreid), - .csr_req_addr (vx_csr_io_req_addr), - .csr_req_rw (vx_csr_io_req_rw), - .csr_req_data (vx_csr_io_req_data), - .csr_req_ready (vx_csr_io_req_ready), - - // CSR Response - .csr_rsp_valid (vx_csr_io_rsp_valid), - .csr_rsp_data (vx_csr_io_rsp_data), - .csr_rsp_ready (vx_csr_io_rsp_ready), - - // status - .busy (vx_busy), - `UNUSED_PIN (ebreak) -); - -// SCOPE ////////////////////////////////////////////////////////////////////// - -`ifdef SCOPE - -`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid); -`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid); -`SCOPE_ASSIGN (mmio_hdr_address, mmio_hdr.address); -`SCOPE_ASSIGN (mmio_hdr_length, mmio_hdr.length); -`SCOPE_ASSIGN (ccip_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata); -`SCOPE_ASSIGN (ccip_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid); -`SCOPE_ASSIGN (ccip_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid); -`SCOPE_ASSIGN (ccip_sTxPort_c0_valid, af2cp_sTxPort.c0.valid); -`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address); -`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata); -`SCOPE_ASSIGN (ccip_sTxPort_c1_valid, af2cp_sTxPort.c1.valid); -`SCOPE_ASSIGN (ccip_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address); -`SCOPE_ASSIGN (ccip_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid); -`SCOPE_ASSIGN (ccip_sRxPort_c0TxAlmFull, cp2af_sRxPort.c0TxAlmFull); -`SCOPE_ASSIGN (ccip_sRxPort_c1TxAlmFull, cp2af_sRxPort.c1TxAlmFull); -`SCOPE_ASSIGN (avs_address, avs_address); -`SCOPE_ASSIGN (avs_waitrequest, avs_waitrequest); -`SCOPE_ASSIGN (avs_write_fire, avs_write && !avs_waitrequest); -`SCOPE_ASSIGN (avs_read_fire, avs_read && !avs_waitrequest); -`SCOPE_ASSIGN (avs_byteenable, avs_byteenable); -`SCOPE_ASSIGN (avs_burstcount, avs_burstcount); -`SCOPE_ASSIGN (avs_readdatavalid, avs_readdatavalid); -`SCOPE_ASSIGN (mem_bank_select, mem_bank_select); -`SCOPE_ASSIGN (ccip_dram_rd_req_ctr, cci_dram_rd_req_ctr); -`SCOPE_ASSIGN (ccip_dram_wr_req_ctr, cci_dram_wr_req_ctr); -`SCOPE_ASSIGN (ccip_rd_req_ctr, cci_rd_req_ctr); -`SCOPE_ASSIGN (ccip_rd_rsp_ctr, cci_rd_rsp_ctr); -`SCOPE_ASSIGN (ccip_wr_req_ctr, cci_wr_req_ctr); - -wire scope_changed = `SCOPE_TRIGGER; - -VX_scope #( - .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), - .BUSW (64), - .SIZE (`SCOPE_SIZE), - .UPDW ($bits({`SCOPE_UPDATE_LIST})) -) scope ( - .clk (clk), - .reset (reset), - .start (scope_start), - .stop (1'b0), - .changed (scope_changed), - .data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}), - .bus_in (cmd_scope_wdata), - .bus_out (cmd_scope_rdata), - .bus_read (cmd_scope_read), - .bus_write(cmd_scope_write) -); - -`endif - -endmodule \ No newline at end of file +`ifndef __VORTEX_AFU__ +`define __VORTEX_AFU__ + +`include "ccip_if_pkg.sv" + +`define PLATFORM_PROVIDES_LOCAL_MEMORY +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26 +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512 +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4 + +`include "local_mem_cfg_pkg.sv" + +`define AFU_ACCEL_NAME "vortex_afu" +`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c + +`define AFU_IMAGE_CMD_CSR_READ 4 +`define AFU_IMAGE_CMD_CSR_WRITE 5 +`define AFU_IMAGE_CMD_MEM_READ 1 +`define AFU_IMAGE_CMD_MEM_WRITE 2 +`define AFU_IMAGE_CMD_RUN 3 +`define AFU_IMAGE_MMIO_CMD_TYPE 10 +`define AFU_IMAGE_MMIO_CSR_CORE 24 +`define AFU_IMAGE_MMIO_CSR_ADDR 26 +`define AFU_IMAGE_MMIO_CSR_DATA 28 +`define AFU_IMAGE_MMIO_CSR_READ 30 +`define AFU_IMAGE_MMIO_DATA_SIZE 16 +`define AFU_IMAGE_MMIO_IO_ADDR 12 +`define AFU_IMAGE_MMIO_MEM_ADDR 14 +`define AFU_IMAGE_MMIO_SCOPE_READ 20 +`define AFU_IMAGE_MMIO_SCOPE_WRITE 22 +`define AFU_IMAGE_MMIO_STATUS 18 + +`define AFU_IMAGE_POWER 0 +`define AFU_TOP_IFC "ccip_std_afu_avalon_mm" + +`endif \ No newline at end of file diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 082f7f66..610935f7 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -99,8 +99,8 @@ module VX_bank #( wire drsq_pop; wire drsq_empty; - - wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_st0; + + wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata; wire drsq_push = dram_rsp_valid && dram_rsp_ready; @@ -119,7 +119,7 @@ module VX_bank #( .push (drsq_push), .pop (drsq_pop), .data_in (dram_rsp_data), - .data_out(drsq_filldata_st0), + .data_out(drsq_filldata), .empty (drsq_empty), .full (drsq_full), `UNUSED_PIN (size) @@ -127,9 +127,9 @@ module VX_bank #( end else begin `UNUSED_VAR (dram_rsp_valid) `UNUSED_VAR (dram_rsp_data) - assign drsq_empty = 1; - assign drsq_filldata_st0 = 0; - assign dram_rsp_ready = 0; + assign drsq_empty = 1; + assign drsq_filldata = 0; + assign dram_rsp_ready = 0; end wire creq_pop; @@ -194,10 +194,9 @@ module VX_bank #( wire is_mshr_st0, is_mshr_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; - wire [`WORD_WIDTH-1:0] readword_st0, readword_st1; wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1; wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1; - wire [`CACHE_LINE_WIDTH-1:0] writedata_st0, writedata_st1; + wire [`CACHE_LINE_WIDTH-1:0] filldata_st0, filldata_st1; wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1; wire miss_st0, miss_st1; wire force_miss_st0, force_miss_st1; @@ -259,14 +258,14 @@ module VX_bank #( assign is_mshr_st0 = mshr_pop_unqual; assign is_fill_st0 = drsq_pop_unqual; - assign valid_st0 = drsq_pop || mshr_pop || creq_pop; + assign valid_st0 = mshr_pop || drsq_pop || creq_pop; assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0; assign tag_st0 = creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : `REQ_TAG_WIDTH'(mshr_tag_st0); assign mem_rw_st0 = creq_pop_unqual ? creq_rw_st0 : mshr_rw_st0; assign byteen_st0 = creq_pop_unqual ? creq_byteen_st0 : mshr_byteen_st0; assign req_tid_st0 = creq_pop_unqual ? creq_tid_st0 : mshr_tid_st0; assign writeword_st0 = creq_pop_unqual ? creq_writeword_st0 : mshr_writeword_st0; - assign writedata_st0 = drsq_filldata_st0; + assign filldata_st0 = drsq_filldata; if (`WORD_SELECT_BITS != 0) begin assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0; @@ -307,9 +306,9 @@ if (DRAM_ENABLE) begin .stall (pipeline_stall), // read/Fill - .lookup_in (valid_st0 && !is_fill_st0), + .lookup_in (creq_pop || mshr_pop), .raddr_in (addr_st0), - .do_fill_in (valid_st0 && is_fill_st0), + .do_fill_in (drsq_pop), .miss_out (miss_st0), .readtag_out (readtag_st0), .dirty_out (dirty_st0), @@ -388,36 +387,16 @@ end else begin end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (!pipeline_stall), - .data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, readword_st0, writeword_st0, readtag_st0, miss_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) + .data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, dirtyb_st0, readdata_st0, writeword_st0, readtag_st0, miss_st0, filldata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), + .data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, dirtyb_st1, readdata_st1, writeword_st1, readtag_st1, miss_st1, filldata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) ); - if (WRITE_THROUGH) begin - - assign dirtyb_st1 = dirtyb_st0; - assign readdata_st1 = readdata_st0; - - end else begin - - VX_pipe_register #( - .DATAW (CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), - .RESETW (0) - ) pipe_reg2b ( - .clk (clk), - .reset (reset), - .enable (!pipeline_stall), - .data_in ({dirtyb_st0, readdata_st0}), - .data_out ({dirtyb_st1, readdata_st1}) - ); - - end - `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin assign {debug_pc_st01, debug_wid_st01} = tag_st01[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; @@ -452,10 +431,7 @@ end // reading .readen_in (valid_st0 && !mem_rw_st0 && !is_fill_st0), - .raddr_in (addr_st0), - .rwsel_in (wsel_st0), - .rbyteen_in (byteen_st0), - .readword_out (readword_st0), + .raddr_in (addr_st0), .readdata_out (readdata_st0), .dirtyb_out (dirtyb_st0), @@ -466,7 +442,8 @@ end .wwsel_in (wsel_st01), .wbyteen_in (byteen_st01), .writeword_in (writeword_st01), - .writedata_in (writedata_st1) + .readdata_in (readdata_st1), + .filldata_in (filldata_st1) ); `ifdef DBG_CACHE_REQ_INFO @@ -490,7 +467,7 @@ end wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall; - // push missed requests as 'ready' if it was a forced miss that actually had a hit + // push a missed request as 'ready' if it was a forced miss that actually had a hit // or the fill request for this block is comming wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1; @@ -521,7 +498,6 @@ end .enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), .enqueue_is_mshr (is_mshr_st1), .enqueue_ready (mshr_init_ready_state_st1), - `UNUSED_PIN (enqueue_full), // lookup .lookup_ready (drsq_pop), @@ -570,9 +546,20 @@ end wire crsq_pop = core_rsp_valid && core_rsp_ready; - wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1; - wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1); - wire [`WORD_WIDTH-1:0] crsq_data_st1 = readword_st1; + wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1; + wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1); + wire [`WORD_WIDTH-1:0] crsq_data_st1; + + if (`WORD_SELECT_BITS != 0) begin + wire [`WORD_WIDTH-1:0] readword = readdata_st1[wsel_st1 * `WORD_WIDTH +: `WORD_WIDTH]; + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign crsq_data_st1[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_st1[i]}}; + end + end else begin + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign crsq_data_st1[i * 8 +: 8] = readdata_st1[i * 8 +: 8] & {8{byteen_st1[i]}}; + end + end VX_fifo_queue #( .DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), @@ -612,13 +599,33 @@ end wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st1 : {readtag_st1, addr_st1[`LINE_SELECT_BITS-1:0]}; - wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st1 : {CACHE_LINE_SIZE{1'b1}}; + wire [`CACHE_LINE_WIDTH-1:0] dreq_data; + wire [CACHE_LINE_SIZE-1:0] dreq_byteen, dreq_byteen_unqual; + + if (WRITE_THROUGH) begin + `UNUSED_VAR (dirtyb_st1) + if (`WORD_SELECT_BITS != 0) begin + for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin + assign dreq_byteen_unqual[i * WORD_SIZE +: WORD_SIZE] = (wsel_st1 == `WORD_SELECT_BITS'(i)) ? byteen_st1 : {WORD_SIZE{1'b0}}; + assign dreq_data[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_st1; + end + end else begin + assign dreq_byteen_unqual = byteen_st1; + assign dreq_data = writeword_st1; + end + end else begin + assign dreq_byteen_unqual = dirtyb_st1; + assign dreq_data = readdata_st1; + end + + assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}}; if (DRAM_ENABLE) begin always @(posedge clk) begin assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1)) else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); - end + end + VX_fifo_queue #( .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE), @@ -629,7 +636,7 @@ end .reset (reset), .push (dreq_push), .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st1}), + .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), .full (dreq_full), @@ -639,6 +646,7 @@ end `UNUSED_VAR (dreq_push) `UNUSED_VAR (dreq_pop) `UNUSED_VAR (dreq_addr) + `UNUSED_VAR (dreq_data) `UNUSED_VAR (dreq_byteen) `UNUSED_VAR (readtag_st1) `UNUSED_VAR (dirtyb_st1) @@ -685,7 +693,7 @@ end $display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); end if (drsq_pop) begin - $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata_st0); + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata); end if (creq_pop) begin if (creq_rw_st0) @@ -698,7 +706,7 @@ end end if (dreq_push) begin if (do_writeback_st1) - $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st1, dreq_byteen, debug_wid_st1, debug_pc_st1); + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), dreq_data, dreq_byteen, debug_wid_st1, debug_pc_st1); else $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1); end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 389a66a6..1135495f 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -44,9 +44,6 @@ module VX_data_access #( `IGNORE_WARNINGS_BEGIN input wire[`LINE_ADDR_WIDTH-1:0] raddr_in, `IGNORE_WARNINGS_END - input wire [`UP(`WORD_SELECT_BITS)-1:0] rwsel_in, - input wire [WORD_SIZE-1:0] rbyteen_in, - output wire[`WORD_WIDTH-1:0] readword_out, output wire [`CACHE_LINE_WIDTH-1:0] readdata_out, output wire [CACHE_LINE_SIZE-1:0] dirtyb_out, @@ -59,11 +56,12 @@ module VX_data_access #( input wire [WORD_SIZE-1:0] wbyteen_in, input wire wfill_in, input wire [`WORD_WIDTH-1:0] writeword_in, - input wire [`CACHE_LINE_WIDTH-1:0] writedata_in + input wire [`CACHE_LINE_WIDTH-1:0] readdata_in, + input wire [`CACHE_LINE_WIDTH-1:0] filldata_in ); - wire [CACHE_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual; - wire [`CACHE_LINE_WIDTH-1:0] read_data, readdata_qual; + wire [CACHE_LINE_SIZE-1:0] read_dirtyb; + wire [`CACHE_LINE_WIDTH-1:0] read_data; wire [CACHE_LINE_SIZE-1:0] byte_enable; wire [`CACHE_LINE_WIDTH-1:0] write_data; @@ -96,49 +94,29 @@ module VX_data_access #( ); wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wbyteen_qual; - wire [`CACHE_LINE_WIDTH-1:0] writeword_qual; + wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] writedata_qual; if (`WORD_SELECT_BITS != 0) begin for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin - assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; - assign writeword_qual[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in; + assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; + assign writedata_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? writeword_in : readdata_in[i * `WORD_WIDTH +: `WORD_WIDTH]; end end else begin `UNUSED_VAR (wwsel_in) + `UNUSED_VAR (readdata_in) assign wbyteen_qual = wbyteen_in; - assign writeword_qual = writeword_in; + assign writedata_qual = writeword_in; end assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual; - assign write_data = wfill_in ? writedata_in : writeword_qual; + assign write_data = wfill_in ? filldata_in : writedata_qual; - assign write_enable = writeen_in && !stall; + assign write_enable = writeen_in && !stall; wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in; for (genvar i = 0; i < CACHE_LINE_SIZE; i++) begin - assign dirtyb_qual[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i]; - assign readdata_qual[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8]; - end - - if (WRITE_THROUGH) begin - `UNUSED_VAR (dirtyb_qual) - assign dirtyb_out = wbyteen_qual; - assign readdata_out = writeword_qual; - end else begin - assign dirtyb_out = dirtyb_qual; - assign readdata_out = readdata_qual; - end - - if (`WORD_SELECT_BITS != 0) begin - wire [`WORD_WIDTH-1:0] readword = readdata_qual[rwsel_in * `WORD_WIDTH +: `WORD_WIDTH]; - for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{rbyteen_in[i]}}; - end - end else begin - `UNUSED_VAR (rwsel_in) - for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_out[i * 8 +: 8] = readdata_qual[i * 8 +: 8] & {8{rbyteen_in[i]}}; - end + assign dirtyb_out[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i]; + assign readdata_out[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8]; end `ifdef DBG_PRINT_CACHE_DATA @@ -152,7 +130,7 @@ module VX_data_access #( end end if (readen_in) begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), rdebug_wid, rdebug_pc, dirtyb_out, raddr, rwsel_in, read_data); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), rdebug_wid, rdebug_pc, dirtyb_out, raddr, read_data); end end end diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index 55d2f6be..4ab10bb0 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -45,14 +45,13 @@ module VX_data_store #( VX_dp_ram #( .DATAW(CACHE_LINE_SIZE * 8), .SIZE(`LINES_PER_BANK), - .BYTEENW(CACHE_LINE_SIZE), .RWCHECK(1) ) data ( .clk(clk), .waddr(write_addr), .raddr(read_addr), .wren(write_enable), - .byteen(byte_enable), + .byteen(1'b1), .rden(1'b1), .din(write_data), .dout(read_data) diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index ff6a358f..8c9c1057 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -37,7 +37,6 @@ module VX_miss_resrv #( input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data, input wire enqueue_is_mshr, input wire enqueue_ready, - output wire enqueue_full, // lookup input wire lookup_ready, @@ -55,80 +54,89 @@ module VX_miss_resrv #( ); `USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; - reg [MSHR_SIZE-1:0] valid_table; - reg [MSHR_SIZE-1:0] ready_table; - reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, restore_ptr; - reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr; - reg [`LOG2UP(MSHR_SIZE+1)-1:0] size; - - assign enqueue_full = (size == $bits(size)'(MSHR_SIZE)); + reg [MSHR_SIZE-1:0] valid_table; + reg [MSHR_SIZE-1:0] ready_table; + reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, schedule_n_ptr, restore_ptr; + reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr; + reg [`LOG2UP(MSHR_SIZE)-1:0] used_r; + reg full_r; + reg [`MSHR_DATA_WIDTH-1:0] dout_r; + reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r; + reg schedule_valid_r; + wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr); end assign lookup_match = (| valid_address_match); - - wire dequeue_ready = ready_table[schedule_ptr]; - - assign schedule_valid = dequeue_ready; - assign schedule_addr = addr_table[schedule_ptr]; - wire mshr_push = enqueue && !enqueue_is_mshr; + wire push_new = enqueue && !enqueue_is_mshr; + wire restore = enqueue && enqueue_is_mshr; wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); always @(posedge clk) begin if (reset) begin - valid_table <= 0; - ready_table <= 0; - schedule_ptr <= 0; - restore_ptr <= 0; - head_ptr <= 0; - tail_ptr <= 0; - size <= 0; + valid_table <= 0; + ready_table <= 0; + schedule_ptr <= 0; + schedule_n_ptr <= 1; + restore_ptr <= 0; + head_ptr <= 0; + tail_ptr <= 0; end else begin - if (lookup_ready) begin - ready_table <= ready_table | valid_address_match; - end - - if (enqueue) begin - assert(!enqueue_full); + if (enqueue) begin if (enqueue_is_mshr) begin - // returning missed msrq entry, restore schedule + // restore schedule, returning missed msrq entry valid_table[restore_ptr] <= 1; ready_table[restore_ptr] <= enqueue_ready; - restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); - schedule_ptr <= head_ptr; + restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); + schedule_ptr <= head_ptr; + schedule_n_ptr <= head_ptr_n; end else begin + // push new entry + assert(!full_r); valid_table[tail_ptr] <= 1; ready_table[tail_ptr] <= enqueue_ready; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); - size <= size + $bits(size)'(1); end end else if (dequeue) begin - head_ptr <= head_ptr_n; + // remove scheduled entry from buffer + head_ptr <= head_ptr_n; restore_ptr <= head_ptr_n; valid_table[head_ptr] <= 0; - size <= size - $bits(size)'(1); + end + + if (lookup_ready) begin + ready_table <= ready_table | valid_address_match; end if (schedule) begin + // schedule next entry assert(schedule_valid); valid_table[schedule_ptr] <= 0; - ready_table[schedule_ptr] <= 0; - schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); + ready_table[schedule_ptr] <= 0; + + schedule_ptr <= schedule_n_ptr; + if (MSHR_SIZE > 2) begin + schedule_n_ptr <= schedule_ptr + $bits(schedule_ptr)'(2); + end else begin // (SIZE == 2); + schedule_n_ptr <= ~schedule_n_ptr; + end end end end always @(posedge clk) begin - if (enqueue && !enqueue_is_mshr) begin + if (push_new) begin addr_table[tail_ptr] <= enqueue_addr; end end + wire [`MSHR_DATA_WIDTH-1:0] dout; + VX_dp_ram #( .DATAW(`MSHR_DATA_WIDTH), .SIZE(MSHR_SIZE), @@ -137,14 +145,51 @@ module VX_miss_resrv #( ) entries ( .clk(clk), .waddr(tail_ptr), - .raddr(schedule_ptr), - .wren(mshr_push), + .raddr(schedule_n_ptr), + .wren(push_new), .byteen(1'b1), .rden(1'b1), .din(enqueue_data), - .dout(schedule_data) + .dout(dout) ); + always @(posedge clk) begin + if (reset) begin + used_r <= 0; + full_r <= 0; + end else begin + used_r <= used_r + $bits(used_r)'($signed(2'(enqueue) - 2'(schedule))); + full_r <= (used_r == $bits(used_r)'(MSHR_SIZE-1)) && enqueue; + end + end + + always @(posedge clk) begin + if (reset) begin + schedule_valid_r <= 0; + end else begin + if (lookup_ready) begin + schedule_valid_r <= 1; + end else if (schedule) begin + schedule_valid_r <= ready_table[schedule_n_ptr]; + end + end + end + + always @(posedge clk) begin + if ((push_new && (used_r == 0 || (used_r == 1 && schedule))) + || restore) begin + schedule_addr_r <= enqueue_addr; + dout_r <= enqueue_data; + end else if (schedule) begin + schedule_addr_r <= addr_table[schedule_n_ptr]; + dout_r <= dout; + end + end + + assign schedule_valid = schedule_valid_r; + assign schedule_addr = schedule_addr_r; + assign schedule_data = dout_r; + `ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin if (lookup_ready || schedule || enqueue || dequeue) begin diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index d16aa406..21197e16 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -6,7 +6,7 @@ #define RESET_DELAY 2 #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 24 +#define DRAM_LATENCY 300 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16