diff --git a/ci/regression.sh b/ci/regression.sh index 9266a19c..a8196516 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -72,6 +72,9 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood # using FPNEW FPU core FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood +# using AXI bus +AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo + # adjust l1 block size to match l2 CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1" diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 65eb1ac0..75b77884 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -28,7 +28,12 @@ CFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread #LDFLAGS += -dynamiclib -pthread -TOP = Vortex +ifdef AXI_BUS + TOP = Vortex_axi + CFLAGS += -DAXI_BUS +else + TOP = Vortex +endif RTL_DIR = ../../hw/rtl DPI_DIR = ../../hw/dpi diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 358a6d2e..4bc65591 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -38,9 +38,9 @@ module VX_ibuffer #( wire going_empty = empty_r[i] || (alm_empty_r[i] && reading); VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (`IBUF_SIZE), - .OUTPUT_REG (1) + .DATAW (DATAW), + .SIZE (`IBUF_SIZE), + .OUT_REG (1) ) queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1d48bf3e..59dc1970 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -22,7 +22,7 @@ module VX_icache_stage #( `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) - localparam OUTPUT_REG = 0; + localparam OUT_REG = 0; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; @@ -64,12 +64,12 @@ module VX_icache_stage #( wire [`NW_BITS-1:0] rsp_wid = rsp_tag; - wire stall_out = ~ifetch_rsp_if.ready && (0 == OUTPUT_REG && ifetch_rsp_if.valid); + wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid); VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32), .RESETW (1), - .DEPTH (OUTPUT_REG) + .DEPTH (OUT_REG) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 19ff353b..3e59f28d 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -42,7 +42,8 @@ module VX_instr_demux ( wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type); VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)), + .OUT_REG (1) ) alu_buffer ( .clk (clk), .reset (reset), @@ -61,7 +62,8 @@ module VX_instr_demux ( wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod); VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)), + .OUT_REG (1) ) lsu_buffer ( .clk (clk), .reset (reset), @@ -82,7 +84,8 @@ module VX_instr_demux ( wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid]; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32), + .OUT_REG (1) ) csr_buffer ( .clk (clk), .reset (reset), @@ -101,7 +104,8 @@ module VX_instr_demux ( wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type); VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)), + .OUT_REG (1) ) fpu_buffer ( .clk (clk), .reset (reset), @@ -123,7 +127,8 @@ module VX_instr_demux ( wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid]; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)), + .OUT_REG (1) ) gpu_buffer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 06065d5d..a889216e 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -207,7 +207,7 @@ module VX_mem_unit # ( .DATA_SIZE (4), .TAG_IN_WIDTH (`DCORE_TAG_WIDTH), .TYPE ("P"), - .BUFFERED_REQ (1), + .BUFFERED_REQ (2), .BUFFERED_RSP (1) ) smem_arb ( .clk (clk), @@ -319,7 +319,7 @@ module VX_mem_unit # ( .TYPE ("R"), .TAG_SEL_IDX (1), // Skip 0 for NC flag .BUFFERED_REQ (1), - .BUFFERED_RSP (1) + .BUFFERED_RSP (2) ) mem_arb ( .clk (clk), .reset (mem_arb_reset), diff --git a/hw/rtl/Vortex_axi.v b/hw/rtl/Vortex_axi.v new file mode 100644 index 00000000..48432203 --- /dev/null +++ b/hw/rtl/Vortex_axi.v @@ -0,0 +1,124 @@ +`include "VX_define.vh" + +module Vortex_axi #( + parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, + localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) +)( + // Clock + input wire clk, + input wire reset, + + // AXI write request + output wire m_axi_wvalid, + output wire m_axi_awvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_awid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [7:0] m_axi_awlen, + output wire [2:0] m_axi_awsize, + output wire [1:0] m_axi_awburst, + output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, + output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, + input wire m_axi_wready, + input wire m_axi_awready, + + // AXI read request + output wire m_axi_arvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_arid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output wire [7:0] m_axi_arlen, + output wire [2:0] m_axi_arsize, + output wire [1:0] m_axi_arburst, + input wire m_axi_arready, + + // AXI read response + input wire m_axi_rvalid, + input wire [AXI_TID_WIDTH-1:0] m_axi_rid, + input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + output wire m_axi_rready, + + // Status + output wire busy +); + wire mem_req_valid; + wire mem_req_rw; + wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen; + wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data; + wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag; + wire mem_req_ready; + + wire mem_rsp_valid; + wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data; + wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; + wire mem_rsp_ready; + + VX_axi_adapter #( + .VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_TID_WIDTH (AXI_TID_WIDTH) + ) axi_adapter ( + .mem_req_valid (mem_req_valid), + .mem_req_rw (mem_req_rw), + .mem_req_byteen (mem_req_byteen), + .mem_req_addr (mem_req_addr), + .mem_req_data (mem_req_data), + .mem_req_tag (mem_req_tag), + .mem_req_ready (mem_req_ready), + + .mem_rsp_valid (mem_rsp_valid), + .mem_rsp_data (mem_rsp_data), + .mem_rsp_tag (mem_rsp_tag), + .mem_rsp_ready (mem_rsp_ready), + + .m_axi_wvalid (m_axi_wvalid), + .m_axi_awvalid (m_axi_awvalid), + .m_axi_awid (m_axi_awid), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awlen (m_axi_awlen), + .m_axi_awsize (m_axi_awsize), + .m_axi_awburst (m_axi_awburst), + .m_axi_wdata (m_axi_wdata), + .m_axi_wstrb (m_axi_wstrb), + .m_axi_wready (m_axi_wready), + .m_axi_awready (m_axi_awready), + + .m_axi_arvalid (m_axi_arvalid), + .m_axi_arid (m_axi_arid), + .m_axi_araddr (m_axi_araddr), + .m_axi_arlen (m_axi_arlen), + .m_axi_arsize (m_axi_arsize), + .m_axi_arburst (m_axi_arburst), + .m_axi_arready (m_axi_arready), + + .m_axi_rvalid (m_axi_rvalid), + .m_axi_rid (m_axi_rid), + .m_axi_rdata (m_axi_rdata), + .m_axi_rready (m_axi_rready) + ); + + Vortex vortex ( + .clk (clk), + .reset (reset), + + .mem_req_valid (mem_req_valid), + .mem_req_rw (mem_req_rw), + .mem_req_byteen (mem_req_byteen), + .mem_req_addr (mem_req_addr), + .mem_req_data (mem_req_data), + .mem_req_tag (mem_req_tag), + .mem_req_ready (mem_req_ready), + + .mem_rsp_valid (mem_rsp_valid), + .mem_rsp_data (mem_rsp_data), + .mem_rsp_tag (mem_rsp_tag), + .mem_rsp_ready (mem_rsp_ready), + + .busy (busy) + ); + +endmodule \ No newline at end of file diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index 211aff00..34431b91 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -42,7 +42,7 @@ module VX_avs_wrapper #( ); localparam BANK_ADDRW = `LOG2UP(AVS_BANKS); - localparam OUTPUT_REG = (AVS_BANKS > 2); + localparam OUT_REG = (AVS_BANKS > 2); // Requests handling @@ -78,9 +78,9 @@ module VX_avs_wrapper #( `UNUSED_VAR (req_queue_size) VX_fifo_queue #( - .DATAW (REQ_TAG_WIDTH), - .SIZE (RD_QUEUE_SIZE), - .OUTPUT_REG (!OUTPUT_REG) + .DATAW (REQ_TAG_WIDTH), + .SIZE (RD_QUEUE_SIZE), + .OUT_REG (!OUT_REG) ) rd_req_queue ( .clk (clk), .reset (reset), @@ -122,9 +122,9 @@ module VX_avs_wrapper #( for (genvar i = 0; i < AVS_BANKS; i++) begin VX_fifo_queue #( - .DATAW (AVS_DATA_WIDTH), - .SIZE (RD_QUEUE_SIZE), - .OUTPUT_REG (!OUTPUT_REG) + .DATAW (AVS_DATA_WIDTH), + .SIZE (RD_QUEUE_SIZE), + .OUT_REG (!OUT_REG) ) rd_rsp_queue ( .clk (clk), .reset (reset), @@ -150,7 +150,7 @@ module VX_avs_wrapper #( .NUM_REQS (AVS_BANKS), .DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH), .TYPE ("R"), - .BUFFERED (OUTPUT_REG ? 1 : 0) + .BUFFERED (OUT_REG ? 1 : 0) ) rsp_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index b46a3804..1ef81393 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -520,8 +520,8 @@ VX_mem_arb #( .ADDR_WIDTH (LMEM_ADDR_WIDTH), .TAG_IN_WIDTH (AVS_REQ_TAGW), .TYPE ("P"), - .BUFFERED_REQ (0), - .BUFFERED_RSP (0) + .BUFFERED_REQ (1), + .BUFFERED_RSP (1) ) mem_arb ( .clk (clk), .reset (mem_arb_reset), @@ -731,9 +731,9 @@ end `RESET_RELAY (cci_rdq_reset); VX_fifo_queue #( - .DATAW (CCI_RD_QUEUE_DATAW), - .SIZE (CCI_RD_QUEUE_SIZE), - .OUTPUT_REG (1) + .DATAW (CCI_RD_QUEUE_DATAW), + .SIZE (CCI_RD_QUEUE_SIZE), + .OUT_REG (1) ) cci_rd_req_queue ( .clk (clk), .reset (cci_rdq_reset), @@ -880,7 +880,7 @@ assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ assign cmd_run_done = !vx_busy; -Vortex #() vortex ( +Vortex vortex ( `SCOPE_BIND_afu_vortex .clk (clk), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 22db87f1..3577d3e6 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -148,7 +148,7 @@ module VX_bank #( wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1; wire [NUM_PORTS-1:0] pmask_st0, pmask_st1; wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1; - wire [`CACHE_LINE_WIDTH-1:0] rdata_st1; + wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1; wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire valid_st0, valid_st1; @@ -305,46 +305,15 @@ module VX_bank #( wire mreq_push_st1 = (read_st1 && miss_st1 && !mshr_pending_st1) || write_st1; - wire [`CACHE_LINE_WIDTH-1:0] line_wdata_st1; - wire [CACHE_LINE_SIZE-1:0] line_byteen_st1; - wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH]; - if (`WORDS_PER_LINE > 1) begin - reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r; - reg [CACHE_LINE_SIZE-1:0] line_byteen_r; - if (NUM_PORTS > 1) begin - always @(*) begin - line_wdata_r = 'x; - line_byteen_r = 0; - for (integer i = 0; i < NUM_PORTS; ++i) begin - if (pmask_st1[i]) begin - line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i]; - line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i]; - end - end - end - end else begin - always @(*) begin - line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}}; - line_byteen_r = 0; - line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; - end - end - assign line_wdata_st1 = line_wdata_r; - assign line_byteen_st1 = line_byteen_r; - end else begin - `UNUSED_VAR (wsel_st1) - assign line_wdata_st1 = creq_data_st1; - assign line_byteen_st1 = byteen_st1; - end - VX_data_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), .CACHE_SIZE (CACHE_SIZE), .CACHE_LINE_SIZE(CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE) ) data_access ( @@ -359,6 +328,8 @@ module VX_bank #( .stall (crsq_stall), .addr (addr_st1), + .wsel (wsel_st1), + .pmask (pmask_st1), // reading .readen (valid_st1 && read_st1), @@ -367,8 +338,8 @@ module VX_bank #( // writing .writeen (valid_st1 && writeen_st1), .is_fill (is_fill_st1), - .byteen (line_byteen_st1), - .write_data (line_wdata_st1), + .byteen (byteen_st1), + .write_data (creq_data_st1), .fill_data (wdata_st1) ); @@ -454,20 +425,13 @@ module VX_bank #( assign crsq_pmask = pmask_st1; assign crsq_tid = req_tid_st1; + assign crsq_data = rdata_st1; assign crsq_tag = tag_st1; - if (`WORDS_PER_LINE > 1) begin - for (genvar i = 0; i < NUM_PORTS; ++i) begin - assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH]; - end - end else begin - assign crsq_data = rdata_st1; - end - VX_elastic_buffer #( - .DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)), - .SIZE (CRSQ_SIZE), - .OUTPUT_REG (1 == NUM_BANKS) + .DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)), + .SIZE (CRSQ_SIZE), + .OUT_REG (1 == NUM_BANKS) ) core_rsp_req ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index b8644b1e..138b452f 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -314,9 +314,9 @@ module VX_cache #( `RESET_RELAY (mrsq_reset); VX_elastic_buffer #( - .DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH), - .SIZE (MRSQ_SIZE), - .OUTPUT_REG (MRSQ_SIZE > 2) + .DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH), + .SIZE (MRSQ_SIZE), + .OUT_REG (MRSQ_SIZE > 2) ) mem_rsp_queue ( .clk (clk), .reset (mrsq_reset), diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 36f33938..a504078a 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -9,10 +9,14 @@ module VX_data_access #( parameter CACHE_LINE_SIZE = 1, // Number of banks parameter NUM_BANKS = 1, + // Number of ports per banks + parameter NUM_PORTS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, // Enable cache writeable - parameter WRITE_ENABLE = 1 + parameter WRITE_ENABLE = 1, + + localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) ) ( input wire clk, input wire reset, @@ -30,15 +34,18 @@ module VX_data_access #( input wire[`LINE_ADDR_WIDTH-1:0] addr, `IGNORE_UNUSED_END + input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel, + input wire [NUM_PORTS-1:0] pmask, + // reading input wire readen, - output wire [`CACHE_LINE_WIDTH-1:0] read_data, + output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data, // writing input wire writeen, input wire is_fill, - input wire [CACHE_LINE_SIZE-1:0] byteen, - input wire [`CACHE_LINE_WIDTH-1:0] write_data, + input wire [WORD_SIZE-1:0] byteen, + input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data, input wire [`CACHE_LINE_WIDTH-1:0] fill_data ); @@ -50,25 +57,58 @@ module VX_data_access #( localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1; - wire [`LINE_SELECT_BITS-1:0] line_addr; + wire [`CACHE_LINE_WIDTH-1:0] rdata; wire [`CACHE_LINE_WIDTH-1:0] wdata; wire [BYTEENW-1:0] wren; - - assign line_addr = addr[`LINE_SELECT_BITS-1:0]; + + wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0]; if (WRITE_ENABLE) begin - assign wren = is_fill ? {BYTEENW{writeen}} : (byteen & {BYTEENW{writeen}}); - assign wdata = is_fill ? fill_data : write_data; - end else begin + wire [`CACHE_LINE_WIDTH-1:0] line_wdata; + wire [CACHE_LINE_SIZE-1:0] line_byteen; + if (`WORDS_PER_LINE > 1) begin + reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r; + reg [CACHE_LINE_SIZE-1:0] line_byteen_r; + if (NUM_PORTS > 1) begin + always @(*) begin + line_wdata_r = 'x; + line_byteen_r = 0; + for (integer i = 0; i < NUM_PORTS; ++i) begin + if (pmask[i]) begin + line_wdata_r[wsel[i] * `WORD_WIDTH +: `WORD_WIDTH] = write_data[i]; + line_byteen_r[wsel[i] * WORD_SIZE +: WORD_SIZE] = byteen[i]; + end + end + end + end else begin + `UNUSED_VAR (pmask) + always @(*) begin + line_wdata_r = {`WORDS_PER_LINE{write_data}}; + line_byteen_r = 0; + line_byteen_r[wsel * WORD_SIZE +: WORD_SIZE] = byteen; + end + end + assign line_wdata = line_wdata_r; + assign line_byteen = line_byteen_r; + end else begin + `UNUSED_VAR (wsel) + `UNUSED_VAR (pmask) + assign line_wdata = write_data; + assign line_byteen = byteen; + end + assign wren = is_fill ? {BYTEENW{writeen}} : ({BYTEENW{writeen}} & line_byteen); + assign wdata = is_fill ? fill_data : line_wdata; + end else begin `UNUSED_VAR (is_fill) - `UNUSED_VAR (byteen) + `UNUSED_VAR (byteen) + `UNUSED_VAR (pmask) `UNUSED_VAR (write_data) assign wren = writeen; assign wdata = fill_data; end VX_sp_ram #( - .DATAW (CACHE_LINE_SIZE * 8), + .DATAW (`CACHE_LINE_WIDTH), .SIZE (`LINES_PER_BANK), .BYTEENW (BYTEENW), .NO_RWCHECK (1) @@ -78,9 +118,17 @@ module VX_data_access #( .wren (wren), .wdata (wdata), .rden (1'b1), - .rdata (read_data) + .rdata (rdata) ); + if (`WORDS_PER_LINE > 1) begin + for (genvar i = 0; i < NUM_PORTS; ++i) begin + assign read_data = rdata[wsel[i] * `WORD_WIDTH +: `WORD_WIDTH]; + end + end else begin + assign read_data = rdata; + end + `UNUSED_VAR (stall) `ifdef DBG_PRINT_CACHE_DATA diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 23f7d8bd..8bcdfda2 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -127,9 +127,9 @@ module VX_shared_mem #( assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual); VX_elastic_buffer #( - .DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1), - .SIZE (CREQ_SIZE), - .OUTPUT_REG (1) // output should be registered for the data_store addr port + .DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1), + .SIZE (CREQ_SIZE), + .OUT_REG (1) // output should be registered for the data_store addr port ) core_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fp_cores/VX_fp_ncomp.v b/hw/rtl/fp_cores/VX_fp_ncomp.v index 49f0da77..df6c6b38 100644 --- a/hw/rtl/fp_cores/VX_fp_ncomp.v +++ b/hw/rtl/fp_cores/VX_fp_ncomp.v @@ -100,7 +100,7 @@ module VX_fp_ncomp #( VX_pipe_register #( .DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)), .RESETW (1), - .DEPTH (1) + .DEPTH (0) ) pipe_reg0 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_axi_adapter.v b/hw/rtl/libs/VX_axi_adapter.v new file mode 100644 index 00000000..6652401d --- /dev/null +++ b/hw/rtl/libs/VX_axi_adapter.v @@ -0,0 +1,88 @@ +`include "VX_define.vh" + +module VX_axi_adapter #( + parameter VX_DATA_WIDTH = 512, + parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)), + parameter VX_TAG_WIDTH = 8, + parameter AXI_DATA_WIDTH = VX_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = VX_TAG_WIDTH, + + localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8), + localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) +) ( + // Vortex request + input wire mem_req_valid, + input wire mem_req_rw, + input wire [VX_BYTEEN_WIDTH-1:0] mem_req_byteen, + input wire [VX_ADDR_WIDTH-1:0] mem_req_addr, + input wire [VX_DATA_WIDTH-1:0] mem_req_data, + input wire [VX_TAG_WIDTH-1:0] mem_req_tag, + + // Vortex response + input wire mem_rsp_ready, + output wire mem_rsp_valid, + output wire [VX_DATA_WIDTH-1:0] mem_rsp_data, + output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag, + output wire mem_req_ready, + + // AXI write request + output wire m_axi_wvalid, + output wire m_axi_awvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_awid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [7:0] m_axi_awlen, + output wire [2:0] m_axi_awsize, + output wire [1:0] m_axi_awburst, + output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, + output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, + input wire m_axi_wready, + input wire m_axi_awready, + + // AXI read request + output wire m_axi_arvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_arid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output wire [7:0] m_axi_arlen, + output wire [2:0] m_axi_arsize, + output wire [1:0] m_axi_arburst, + input wire m_axi_arready, + + // AXI read response + input wire m_axi_rvalid, + input wire [AXI_TID_WIDTH-1:0] m_axi_rid, + input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + output wire m_axi_rready +); + localparam AXSIZE = $clog2(VX_DATA_WIDTH/8); + + `STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter")) + `STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter")) + + // AXI write channel + assign m_axi_wvalid = mem_req_valid & mem_req_rw; + assign m_axi_awvalid = mem_req_valid & mem_req_rw; + assign m_axi_awid = mem_req_tag; + assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; + assign m_axi_awlen = 8'b00000000; + assign m_axi_awsize = 3'(AXSIZE); + assign m_axi_awburst = 2'b00; + assign m_axi_wdata = mem_req_data; + assign m_axi_wstrb = mem_req_byteen; + + // AXI read channel + assign m_axi_arvalid = mem_req_valid & ~mem_req_rw; + assign m_axi_arid = mem_req_tag; + assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; + assign m_axi_arlen = 8'b00000000; + assign m_axi_arsize = 3'(AXSIZE); + assign m_axi_arburst = 2'b00; + assign m_axi_rready = mem_rsp_ready; + + // Vortex inputs + assign mem_rsp_valid = m_axi_rvalid; + assign mem_rsp_tag = m_axi_rid; + assign mem_rsp_data = m_axi_rdata; + assign mem_req_ready = mem_req_rw ? (m_axi_awready && m_axi_wready) : m_axi_arready; + +endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index db8e99b8..0e14fa54 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -5,7 +5,7 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, parameter BYTEENW = 1, - parameter OUTPUT_REG = 0, + parameter OUT_REG = 0, parameter NO_RWCHECK = 0, parameter ADDRW = $clog2(SIZE), parameter LUTRAM = 0, @@ -35,7 +35,7 @@ module VX_dp_ram #( `ifdef SYNTHESIS if (LUTRAM) begin - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin `USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; @@ -90,7 +90,7 @@ module VX_dp_ram #( end end end else begin - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin @@ -173,7 +173,7 @@ module VX_dp_ram #( end end `else - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; diff --git a/hw/rtl/libs/VX_elastic_buffer.v b/hw/rtl/libs/VX_elastic_buffer.v index 66e8f7ef..ac36fc62 100644 --- a/hw/rtl/libs/VX_elastic_buffer.v +++ b/hw/rtl/libs/VX_elastic_buffer.v @@ -4,7 +4,7 @@ module VX_elastic_buffer #( parameter DATAW = 1, parameter SIZE = 2, - parameter OUTPUT_REG = 0, + parameter OUT_REG = 0, parameter LUTRAM = 0 ) ( input wire clk, @@ -32,8 +32,8 @@ module VX_elastic_buffer #( end else if (SIZE == 2) begin VX_skid_buffer #( - .DATAW (DATAW), - .OUTPUT_REG (OUTPUT_REG) + .DATAW (DATAW), + .OUT_REG (OUT_REG) ) queue ( .clk (clk), .reset (reset), @@ -53,10 +53,10 @@ module VX_elastic_buffer #( wire pop = valid_out && ready_out; VX_fifo_queue #( - .DATAW (DATAW), - .SIZE (SIZE), - .OUTPUT_REG (OUTPUT_REG), - .LUTRAM (LUTRAM) + .DATAW (DATAW), + .SIZE (SIZE), + .OUT_REG (OUT_REG), + .LUTRAM (LUTRAM) ) queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.v b/hw/rtl/libs/VX_fifo_queue.v index 5a6e63ae..cc812cfc 100644 --- a/hw/rtl/libs/VX_fifo_queue.v +++ b/hw/rtl/libs/VX_fifo_queue.v @@ -8,7 +8,7 @@ module VX_fifo_queue #( parameter ALM_EMPTY = 1, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1), - parameter OUTPUT_REG = 0, + parameter OUT_REG = 0, parameter LUTRAM = 1 ) ( input wire clk, @@ -103,7 +103,7 @@ module VX_fifo_queue #( if (SIZE == 2) begin - if (0 == OUTPUT_REG) begin + if (0 == OUT_REG) begin reg [DATAW-1:0] shift_reg [1:0]; @@ -138,7 +138,7 @@ module VX_fifo_queue #( end else begin - if (0 == OUTPUT_REG) begin + if (0 == OUT_REG) begin reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; @@ -154,10 +154,10 @@ module VX_fifo_queue #( end VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .OUTPUT_REG (0), - .LUTRAM (LUTRAM) + .DATAW (DATAW), + .SIZE (SIZE), + .OUT_REG (0), + .LUTRAM (LUTRAM) ) dp_ram ( .clk(clk), .wren (push), @@ -197,10 +197,10 @@ module VX_fifo_queue #( end VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .OUTPUT_REG (0), - .LUTRAM (LUTRAM) + .DATAW (DATAW), + .SIZE (SIZE), + .OUT_REG (0), + .LUTRAM (LUTRAM) ) dp_ram ( .clk (clk), .wren (push), diff --git a/hw/rtl/libs/VX_skid_buffer.v b/hw/rtl/libs/VX_skid_buffer.v index 67fd2cd0..d295db55 100644 --- a/hw/rtl/libs/VX_skid_buffer.v +++ b/hw/rtl/libs/VX_skid_buffer.v @@ -5,7 +5,7 @@ module VX_skid_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0, parameter NOBACKPRESSURE = 0, - parameter OUTPUT_REG = 0 + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -51,7 +51,7 @@ module VX_skid_buffer #( end else begin - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; diff --git a/hw/rtl/libs/VX_sp_ram.v b/hw/rtl/libs/VX_sp_ram.v index 65ec0837..2cf7bff0 100644 --- a/hw/rtl/libs/VX_sp_ram.v +++ b/hw/rtl/libs/VX_sp_ram.v @@ -5,7 +5,7 @@ module VX_sp_ram #( parameter DATAW = 1, parameter SIZE = 1, parameter BYTEENW = 1, - parameter OUTPUT_REG = 0, + parameter OUT_REG = 0, parameter NO_RWCHECK = 0, parameter ADDRW = $clog2(SIZE), parameter LUTRAM = 0, @@ -34,7 +34,7 @@ module VX_sp_ram #( `ifdef SYNTHESIS if (LUTRAM) begin - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin @@ -90,7 +90,7 @@ module VX_sp_ram #( end end end else begin - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin @@ -173,7 +173,7 @@ module VX_sp_ram #( end end `else - if (OUTPUT_REG) begin + if (OUT_REG) begin reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index f1d89b60..1c89bb4f 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -98,31 +98,13 @@ module VX_stream_arbiter #( if (LANES > 1) begin wire [NUM_REQS-1:0][(LANES * (1 + DATAW))-1:0] valid_data_in; - for (genvar i = 0; i < NUM_REQS; i++) begin assign valid_data_in[i] = {valid_in[i], data_in[i]}; end - - VX_mux #( - .DATAW (LANES * (1 + DATAW)), - .N (NUM_REQS) - ) data_in_mux ( - .data_in (valid_data_in), - .sel_in (sel_index), - .data_out ({valid_in_sel, data_in_sel}) - ); - + assign {valid_in_sel, data_in_sel} = valid_data_in[sel_index]; `UNUSED_VAR (sel_valid) end else begin - VX_mux #( - .DATAW (DATAW), - .N (NUM_REQS) - ) data_in_mux ( - .data_in (data_in), - .sel_in (sel_index), - .data_out (data_in_sel) - ); - + assign data_in_sel = data_in[sel_index]; assign valid_in_sel = sel_valid; end @@ -132,9 +114,9 @@ module VX_stream_arbiter #( for (genvar i = 0; i < LANES; ++i) begin VX_skid_buffer #( - .DATAW (DATAW), - .PASSTHRU (0 == BUFFERED), - .OUTPUT_REG (2 == BUFFERED) + .DATAW (DATAW), + .PASSTHRU (0 == BUFFERED), + .OUT_REG (2 == BUFFERED) ) out_buffer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_demux.v b/hw/rtl/libs/VX_stream_demux.v index e50d7a7c..e55007b8 100644 --- a/hw/rtl/libs/VX_stream_demux.v +++ b/hw/rtl/libs/VX_stream_demux.v @@ -37,9 +37,9 @@ module VX_stream_demux #( for (genvar i = 0; i < NUM_REQS; i++) begin VX_skid_buffer #( - .DATAW (DATAW), - .PASSTHRU (0 == BUFFERED), - .OUTPUT_REG (2 == BUFFERED) + .DATAW (DATAW), + .PASSTHRU (0 == BUFFERED), + .OUT_REG (2 == BUFFERED) ) out_buffer ( .clk (clk), .reset (reset), diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index ccb37bf2..6adf457c 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -66,7 +66,12 @@ Simulator::Simulator() { Verilated::assertOn(false); ram_ = nullptr; + +#ifdef AXI_BUS + vortex_ = new VVortex_axi(); +#else vortex_ = new VVortex(); +#endif #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -103,15 +108,18 @@ void Simulator::attach_ram(RAM* ram) { void Simulator::reset() { print_bufs_.clear(); + for (int b = 0; b < MEMORY_BANKS; ++b) { mem_rsp_vec_[b].clear(); } last_mem_rsp_bank_ = 0; - mem_rsp_active_ = false; - vortex_->mem_rsp_valid = 0; - vortex_->mem_req_ready = 0; +#ifdef AXI_BUS + this->reset_axi_bus(); +#else + this->reset_mem_bus(); +#endif vortex_->reset = 1; @@ -133,12 +141,20 @@ void Simulator::step() { vortex_->clk = 0; this->eval(); - mem_rsp_ready_ = vortex_->mem_rsp_ready; - +#ifdef AXI_BUS + this->eval_axi_bus(0); +#else + this->eval_mem_bus(0); +#endif + vortex_->clk = 1; this->eval(); - this->eval_mem_bus(); +#ifdef AXI_BUS + this->eval_axi_bus(1); +#else + this->eval_mem_bus(1); +#endif #ifndef NDEBUG fflush(stdout); @@ -155,7 +171,158 @@ void Simulator::eval() { ++timestamp; } -void Simulator::eval_mem_bus() { +#ifdef AXI_BUS + +void Simulator::reset_axi_bus() { + vortex_->m_axi_wready = 0; + vortex_->m_axi_awready = 0; + vortex_->m_axi_arready = 0; + vortex_->m_axi_rvalid = 0; +} + +void Simulator::eval_axi_bus(bool clk) { + if (!clk) { + mem_rsp_ready_ = vortex_->m_axi_rready; + return; + } + if (ram_ == nullptr) { + vortex_->m_axi_wready = 0; + vortex_->m_axi_awready = 0; + vortex_->m_axi_arready = 0; + return; + } + + // update memory responses schedule + for (int b = 0; b < MEMORY_BANKS; ++b) { + for (auto& rsp : mem_rsp_vec_[b]) { + if (rsp.cycles_left > 0) + rsp.cycles_left -= 1; + } + } + + bool has_response = false; + + // schedule memory responses that are ready + for (int i = 0; i < MEMORY_BANKS; ++i) { + uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS; + if (!mem_rsp_vec_[b].empty() + && (mem_rsp_vec_[b].begin()->cycles_left) <= 0) { + has_response = true; + last_mem_rsp_bank_ = b; + break; + } + } + + // send memory response + if (mem_rsp_active_ + && vortex_->m_axi_rvalid && mem_rsp_ready_) { + mem_rsp_active_ = false; + } + if (!mem_rsp_active_) { + if (has_response) { + vortex_->m_axi_rvalid = 1; + std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + /* + printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); + vortex_->m_axi_rid = mem_rsp_it->tag; + mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); + mem_rsp_active_ = true; + } else { + vortex_->m_axi_rvalid = 0; + } + } + + // select the memory bank + uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr; + uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0; + + // handle memory stalls + bool mem_stalled = false; +#ifdef ENABLE_MEM_STALLS + if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { + mem_stalled = true; + } else + if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) { + mem_stalled = true; + } +#endif + + // process memory requests + if (!mem_stalled) { + if (vortex_->m_axi_wvalid || vortex_->m_axi_arvalid) { + if (vortex_->m_axi_wvalid) { + uint64_t byteen = vortex_->m_axi_wstrb; + unsigned base_addr = vortex_->m_axi_awaddr; + uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata); + if (base_addr >= IO_COUT_ADDR + && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + /* + printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + } + } else { + mem_req_t mem_req; + mem_req.tag = vortex_->m_axi_arid; + mem_req.addr = vortex_->m_axi_araddr; + ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data()); + mem_req.cycles_left = MEM_LATENCY; + for (auto& rsp : mem_rsp_vec_[req_bank]) { + if (mem_req.addr == rsp.addr) { + // duplicate requests receive the same cycle delay + mem_req.cycles_left = rsp.cycles_left; + break; + } + } + mem_rsp_vec_[req_bank].emplace_back(mem_req); + } + } + } + + vortex_->m_axi_wready = !mem_stalled; + vortex_->m_axi_awready = !mem_stalled; + vortex_->m_axi_arready = !mem_stalled; +} + +#else + +void Simulator::reset_mem_bus() { + vortex_->mem_req_ready = 0; + vortex_->mem_rsp_valid = 0; +} + +void Simulator::eval_mem_bus(bool clk) { + if (!clk) { + mem_rsp_ready_ = vortex_->mem_rsp_ready; + return; + } + if (ram_ == nullptr) { vortex_->mem_req_ready = 0; return; @@ -276,6 +443,8 @@ void Simulator::eval_mem_bus() { vortex_->mem_req_ready = !mem_stalled; } +#endif + void Simulator::wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->step(); @@ -309,11 +478,19 @@ int Simulator::run() { } bool Simulator::get_ebreak() const { +#ifdef AXI_BUS + return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; +#else return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; +#endif } int Simulator::get_last_wb_value(int reg) const { +#ifdef AXI_BUS + return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; +#else return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; +#endif } void Simulator::load_bin(const char* program_file) { diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index fe64babe..d867ea83 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -1,8 +1,14 @@ #pragma once #include + +#ifdef AXI_BUS +#include "VVortex_axi.h" +#include "VVortex_axi__Syms.h" +#else #include "VVortex.h" #include "VVortex__Syms.h" +#endif #ifdef VCD_OUTPUT #include @@ -58,8 +64,14 @@ private: std::unordered_map print_bufs_; void eval(); - - void eval_mem_bus(); + +#ifdef AXI_BUS + void reset_axi_bus(); + void eval_axi_bus(bool clk); +#else + void reset_mem_bus(); + void eval_mem_bus(bool clk); +#endif int get_last_wb_value(int reg) const; @@ -73,7 +85,13 @@ private: bool mem_rsp_ready_; RAM *ram_; + +#ifdef AXI_BUS + VVortex_axi *vortex_; +#else VVortex *vortex_; +#endif + #ifdef VCD_OUTPUT VerilatedVcdC *trace_; #endif