From e770824d47fe4286ac06ec36cc6ded7fe1b3d886 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 10 Jan 2021 20:26:15 -0800 Subject: [PATCH] fixed afu cci write bug, fixed profile cache write miss bug, fixed bram byteenable inferance --- hw/rtl/VX_config.vh | 2 +- hw/rtl/afu/VX_avs_wrapper.v | 12 +++-- hw/rtl/afu/vortex_afu.sv | 99 ++++++++++++++++++----------------- hw/rtl/cache/VX_bank.v | 7 ++- hw/rtl/cache/VX_cache.v | 4 +- hw/rtl/cache/VX_data_access.v | 21 ++------ hw/rtl/cache/VX_data_store.v | 3 +- hw/rtl/fp_cores/VX_fp_cvt.v | 9 ++-- hw/rtl/libs/VX_dp_ram.v | 53 +++++++------------ hw/rtl/libs/VX_pending_size.v | 30 +++++++---- hw/syn/quartus/top16/Makefile | 12 ++--- 11 files changed, 122 insertions(+), 130 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index b3a1081f..a7a959e4 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -264,7 +264,7 @@ // Size of cache in bytes `ifndef ICACHE_SIZE -`define ICACHE_SIZE 2048 +`define ICACHE_SIZE 8192 `endif // Core Request Queue Size diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index e4167ad7..dcf19ee0 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -53,6 +53,7 @@ module VX_avs_wrapper #( wire avs_rspq_empty; wire rsp_queue_going_full; + wire [RD_QUEUE_ADDRW-1:0] rsp_queue_size; VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( @@ -60,8 +61,11 @@ module VX_avs_wrapper #( .reset (reset), .push (avs_reqq_push), .pop (avs_rspq_pop), - .full (rsp_queue_going_full) + `UNUSED_PIN (empty), + .full (rsp_queue_going_full), + .size (rsp_queue_size) ); + `UNUSED_VAR (rsp_queue_size) always @(posedge clk) begin avs_burstcount_r <= 1; @@ -71,7 +75,6 @@ module VX_avs_wrapper #( VX_fifo_queue #( .DATAW (REQ_TAGW), .SIZE (RD_QUEUE_SIZE), - .BUFFERED(1), .FASTRAM (1) ) rd_req_queue ( .clk (clk), @@ -88,7 +91,6 @@ module VX_avs_wrapper #( VX_fifo_queue #( .DATAW (AVS_DATAW), .SIZE (RD_QUEUE_SIZE), - .BUFFERED(1), .FASTRAM (1) ) rd_rsp_queue ( .clk (clk), @@ -119,10 +121,10 @@ module VX_avs_wrapper #( if (dram_req_rw) $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data); else - $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag); + $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, rsp_queue_size); end if (dram_rsp_valid && dram_rsp_ready) begin - $display("%t: AVS Rd Rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data); + $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, rsp_queue_size); end end `endif diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index b6ba25c6..a3c681d7 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -497,17 +497,12 @@ wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; //-- -assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid; - -assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr; - -assign cci_dram_req_rw = (CMD_MEM_WRITE == state); - +assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid; +assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr; +assign cci_dram_req_rw = (CMD_MEM_WRITE == state); assign cci_dram_req_byteen = {64{1'b1}}; - -assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; - -assign cci_dram_req_tag = AVS_REQ_TAGW'(0); +assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; +assign cci_dram_req_tag = AVS_REQ_TAGW'(0); `UNUSED_VAR (cci_dram_rsp_tag) @@ -619,8 +614,6 @@ VX_avs_wrapper #( // CCI-P Read Request /////////////////////////////////////////////////////////// -reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; -wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; @@ -653,11 +646,22 @@ assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; -assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; +assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; -assign cci_pending_reads_next = cci_pending_reads - + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : - (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); +wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; +wire cci_pending_reads_full; +VX_pending_size #( + .SIZE (CCI_RD_QUEUE_SIZE) +) cci_rd_pending_size ( + .clk (clk), + .reset (reset), + .push (cci_rd_req_fire), + .pop (cci_rdq_pop), + `UNUSED_PIN (empty), + .full (cci_pending_reads_full), + .size (cci_pending_reads) +); +`UNUSED_VAR (cci_pending_reads) assign cci_dram_wr_req_valid = !cci_rdq_empty; @@ -673,7 +677,6 @@ always @(posedge clk) begin cci_rd_req_addr <= 0; cci_rd_req_ctr <= 0; cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; cci_rd_req_enable <= 0; cci_rd_req_wait <= 0; cci_dram_wr_req_ctr <= 0; @@ -685,7 +688,6 @@ always @(posedge clk) begin cci_rd_req_addr <= cmd_io_addr; cci_rd_req_ctr <= 0; cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; cci_rd_req_enable <= (cmd_data_size != 0); cci_rd_req_wait <= 0; cci_dram_wr_req_ctr <= 0; @@ -694,7 +696,7 @@ always @(posedge clk) begin cci_rd_req_enable <= (STATE_WRITE == state) && (cci_rd_req_ctr_next != cmd_data_size) - && (cci_pending_reads_next != CCI_RD_QUEUE_SIZE) + && !cci_pending_reads_full && !cp2af_sRxPort.c0TxAlmFull; if (cci_rd_req_fire) begin @@ -704,7 +706,7 @@ always @(posedge clk) begin cci_rd_req_wait <= 1; // end current request batch end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); + $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads); `endif end @@ -720,7 +722,7 @@ always @(posedge clk) begin /*if (cci_rdq_pop) begin `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next); + $display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads); `endif end*/ @@ -728,8 +730,6 @@ always @(posedge clk) begin cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); end - - cci_pending_reads <= cci_pending_reads_next; end end @@ -771,11 +771,9 @@ VX_fifo_queue #( // CCI-P Write Request ////////////////////////////////////////////////////////// -reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; -wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_r; t_ccip_clAddr cci_wr_req_addr; always @(*) begin @@ -785,23 +783,34 @@ always @(*) begin af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data); end -wire cci_wr_req_fire = af2cp_sTxPort.c1.valid; +wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; +wire cci_dram_rd_rsp_fire = cci_dram_rsp_valid && cci_dram_rsp_ready; + +wire cci_wr_req_fire = cci_dram_rd_rsp_fire; wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; -wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; - -assign cci_pending_writes_next = cci_pending_writes - + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : - (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); +wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire cci_pending_writes_empty; +VX_pending_size #( + .SIZE (CCI_RW_QUEUE_SIZE) +) cci_wr_pending_size ( + .clk (clk), + .reset (reset), + .push (cci_wr_req_fire), + .pop (cci_wr_rsp_fire), + .empty (cci_pending_writes_empty), + `UNUSED_PIN (full), + .size (cci_pending_writes) +); +`UNUSED_VAR (cci_pending_writes) assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0); - -assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual; +assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_r; assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid; assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull; -assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); +assign cmd_read_done = (0 == cci_wr_req_ctr) && cci_pending_writes_empty; // Send write requests to CCI always @(posedge clk) @@ -809,18 +818,16 @@ begin if (reset) begin cci_wr_req_addr <= 0; cci_wr_req_ctr <= 0; - cci_pending_writes <= 0; cci_dram_rd_req_ctr <= 0; - cci_dram_rd_req_addr_unqual <= 0; + cci_dram_rd_req_addr_r <= 0; end else begin if ((STATE_IDLE == state) && (CMD_MEM_READ == cmd_type)) begin - cci_wr_req_addr <= cmd_io_addr; - cci_wr_req_ctr <= cmd_data_size; - cci_pending_writes <= 0; - cci_dram_rd_req_ctr <= cmd_data_size; - cci_dram_rd_req_addr_unqual <= cmd_mem_addr; + cci_wr_req_addr <= cmd_io_addr; + cci_wr_req_ctr <= cmd_data_size; + cci_dram_rd_req_ctr <= cmd_data_size; + cci_dram_rd_req_addr_r <= cmd_mem_addr; end if (cci_wr_req_fire) begin @@ -828,22 +835,20 @@ begin cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data); + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data); `endif end /*`ifdef DBG_PRINT_OPAE if (cci_wr_rsp_fire) begin - $display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next); + $display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes); end `endif*/ if (cci_dram_rd_req_fire) begin - cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1); + cci_dram_rd_req_addr_r <= cci_dram_rd_req_addr_r + DRAM_ADDR_WIDTH'(1); cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); end - - cci_pending_writes <= cci_pending_writes_next; end end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 9cc8f36c..6682933a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -441,7 +441,6 @@ end .wwsel_in (wsel_st01), .wbyteen_in (byteen_st01), .writeword_in (writeword_st01), - .readdata_in (readdata_st1), .filldata_in (filldata_st1) ); @@ -683,9 +682,9 @@ end `SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); `ifdef PERF_ENABLE - assign perf_read_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && !mem_rw_st1; - assign perf_write_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && mem_rw_st1; - assign perf_pipe_stalls = pipeline_stall || mshr_almost_full || dreq_going_full; + assign perf_read_misses = valid_st1 && !pipeline_stall && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1; + assign perf_write_misses = valid_st1 && !pipeline_stall && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1; + assign perf_pipe_stalls = pipeline_stall || mshr_almost_full || dreq_almost_full; assign perf_mshr_stalls = mshr_almost_full; `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index c67e3abd..42990f42 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -438,10 +438,10 @@ module VX_cache #( perf_pipe_stalls <= 0; perf_crsp_stalls <= 0; end else begin - perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle); + perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle); perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle); perf_read_misses <= perf_read_misses + 64'(perf_read_miss_per_cycle); - perf_write_misses <= perf_write_misses + 64'(perf_write_miss_per_cycle); + perf_write_misses <= perf_write_misses+ 64'(perf_write_miss_per_cycle); perf_mshr_stalls <= perf_mshr_stalls + 64'(perf_mshr_stall_per_cycle); perf_pipe_stalls <= perf_pipe_stalls + 64'(perf_pipe_stall_per_cycle); perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle); diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index e7d6332f..5c9c79d8 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -56,7 +56,6 @@ module VX_data_access #( input wire [WORD_SIZE-1:0] wbyteen_in, input wire wfill_in, input wire [`WORD_WIDTH-1:0] writeword_in, - input wire [`CACHE_LINE_WIDTH-1:0] readdata_in, input wire [`CACHE_LINE_WIDTH-1:0] filldata_in ); @@ -98,24 +97,14 @@ module VX_data_access #( wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] writedata_qual; if (`WORD_SELECT_BITS != 0) begin - for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin - wire [`WORD_WIDTH-1:0] readdata_sel = readdata_in[i * `WORD_WIDTH +: `WORD_WIDTH]; - wire [`WORD_WIDTH-1:0] writeword_qual; - for (genvar j = 0; j < WORD_SIZE; j++) begin - assign writeword_qual[j * 8 +: 8] = wbyteen_in[j] ? writeword_in[j * 8 +: 8] : readdata_sel[j * 8 +: 8]; - end - wire wenable = (wwsel_in == `WORD_SELECT_BITS'(i)); - assign wbyteen_qual[i] = wenable ? wbyteen_in : {WORD_SIZE{1'b0}}; - assign writedata_qual[i] = wenable ? writeword_qual : readdata_sel; + for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin + assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; + assign writedata_qual[i] = writeword_in; end end else begin `UNUSED_VAR (wwsel_in) - wire [`WORD_WIDTH-1:0] writeword_qual; - for (genvar i = 0; i < WORD_SIZE; i++) begin - assign writeword_qual[i * 8 +: 8] = wbyteen_in[i] ? writeword_in[i * 8 +: 8] : readdata_in[i * 8 +: 8]; - end assign wbyteen_qual = wbyteen_in; - assign writedata_qual = writeword_qual; + assign writedata_qual = writeword_in; end assign write_enable = writeen_in && !stall; @@ -141,7 +130,7 @@ module VX_data_access #( for (genvar i = 0; i < WORD_SIZE; i++) begin assign writeword_qual[i * 8 +: 8] = wbyteen_in[i] ? writeword_in[i * 8 +: 8] : read_data[i * 8 +: 8]; end - assign dirtyb_out = read_dirtyb | ({WORD_SIZE{rw_hazard}} & wbyteen_in); + assign dirtyb_out = read_dirtyb | ({WORD_SIZE{rw_hazard}} & wbyteen_in); assign readdata_out = rw_hazard ? (wfill_in ? filldata_in : writeword_qual) : read_data; end diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index 8afd6e02..fee94756 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -48,13 +48,14 @@ module VX_data_store #( VX_dp_ram #( .DATAW(CACHE_LINE_SIZE * 8), .SIZE(`LINES_PER_BANK), + .BYTEENW(CACHE_LINE_SIZE), .RWCHECK(1) ) data ( .clk(clk), .waddr(write_addr), .raddr(read_addr), .wren(write_enable), - .byteen(1'b1), + .byteen(byte_enable), .rden(1'b1), .din(write_data), .dout(read_data) diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index 3d99d178..43fd6dd3 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -121,8 +121,7 @@ module VX_fp_cvt #( VX_pipe_register #( .DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 +INT_EXP_WIDTH + INT_MAN_WIDTH + LZC_RESULT_WIDTH + 1)), - .RESETW (1), - .DEPTH (1) + .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -182,8 +181,7 @@ module VX_fp_cvt #( VX_pipe_register #( .DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)), - .RESETW (1), - .DEPTH (1) + .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -310,8 +308,7 @@ module VX_fp_cvt #( VX_pipe_register #( .DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)), - .RESETW (1), - .DEPTH (1) + .RESETW (1) ) pipe_reg2 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index 90bdb547..ef7f018e 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -23,23 +23,18 @@ module VX_dp_ram #( `STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter")) - localparam DATA32W = DATAW / 32; - localparam BYTEEN32W = BYTEENW / 4; - if (FASTRAM) begin if (BUFFERED) begin reg [DATAW-1:0] dout_r; if (BYTEENW > 1) begin - `USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + `USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0]; always @(posedge clk) begin if (wren) begin - for (integer j = 0; j < BYTEEN32W; j++) begin - for (integer i = 0; i < 4; i++) begin - if (byteen[j * 4 + i]) - mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; - end + for (integer i = 0; i < BYTEENW; i++) begin + if (byteen[i]) + mem[waddr][i] <= din[i * 8 +: 8]; end end if (rden) @@ -60,15 +55,13 @@ module VX_dp_ram #( `UNUSED_VAR (rden) if (BYTEENW > 1) begin - `USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + `USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0]; always @(posedge clk) begin if (wren) begin - for (integer j = 0; j < BYTEEN32W; j++) begin - for (integer i = 0; i < 4; i++) begin - if (byteen[j * 4 + i]) - mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; - end + for (integer i = 0; i < BYTEENW; i++) begin + if (byteen[i]) + mem[waddr][i] <= din[i * 8 +: 8]; end end end @@ -88,15 +81,13 @@ module VX_dp_ram #( reg [DATAW-1:0] dout_r; if (BYTEENW > 1) begin - reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + reg [BYTEENW-1:0][7:0] mem [SIZE-1:0]; always @(posedge clk) begin if (wren) begin - for (integer j = 0; j < BYTEEN32W; j++) begin - for (integer i = 0; i < 4; i++) begin - if (byteen[j * 4 + i]) - mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; - end + for (integer i = 0; i < BYTEENW; i++) begin + if (byteen[i]) + mem[waddr][i] <= din[i * 8 +: 8]; end end if (rden) @@ -118,15 +109,13 @@ module VX_dp_ram #( if (RWCHECK) begin if (BYTEENW > 1) begin - reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + reg [BYTEENW-1:0][7:0] mem [SIZE-1:0]; always @(posedge clk) begin if (wren) begin - for (integer j = 0; j < BYTEEN32W; j++) begin - for (integer i = 0; i < 4; i++) begin - if (byteen[j * 4 + i]) - mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; - end + for (integer i = 0; i < BYTEENW; i++) begin + if (byteen[i]) + mem[waddr][i] <= din[i * 8 +: 8]; end end end @@ -142,15 +131,13 @@ module VX_dp_ram #( end end else begin if (BYTEENW > 1) begin - `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] mem [SIZE-1:0]; always @(posedge clk) begin if (wren) begin - for (integer j = 0; j < BYTEEN32W; j++) begin - for (integer i = 0; i < 4; i++) begin - if (byteen[j * 4 + i]) - mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8]; - end + for (integer i = 0; i < BYTEENW; i++) begin + if (byteen[i]) + mem[waddr][i] <= din[i * 8 +: 8]; end end end diff --git a/hw/rtl/libs/VX_pending_size.v b/hw/rtl/libs/VX_pending_size.v index 595b10d0..bdffa566 100644 --- a/hw/rtl/libs/VX_pending_size.v +++ b/hw/rtl/libs/VX_pending_size.v @@ -1,35 +1,47 @@ `include "VX_platform.vh" module VX_pending_size #( - parameter SIZE = 1 + parameter SIZE = 1, + parameter SIZEW = $clog2(SIZE+1) ) ( input wire clk, input wire reset, input wire push, input wire pop, - output wire full + output wire empty, + output wire full, + output wire [SIZEW-1:0] size ); localparam ADDRW = $clog2(SIZE); - reg [ADDRW-1:0] size_r; + reg [ADDRW-1:0] used_r; + reg empty_r; reg full_r; always @(posedge clk) begin if (reset) begin - size_r <= 0; - full_r <= 0; + used_r <= 0; + empty_r <= 0; + full_r <= 0; end else begin assert(!push || !full); if (push) begin - if (!pop && (used_r == ADDRW'(SIZE-1))) - full_r <= 1; + if (!pop) begin + empty_r <= 0; + if (used_r == ADDRW'(SIZE-1)) + full_r <= 1; + end end else if (pop) begin full_r <= 0; + if (used_r == ADDRW'(1)) + empty_r <= 1; end - size_r <= size_r + ADDRW'($signed(2'(push && !pop) - 2'(pop && !push))); + used_r <= used_r + ADDRW'($signed(2'(push && !pop) - 2'(pop && !push))); end end - assign full = full_r; + assign empty = empty_r; + assign full = full_r; + assign size = {full_r, used_r}; endmodule \ No newline at end of file diff --git a/hw/syn/quartus/top16/Makefile b/hw/syn/quartus/top16/Makefile index 9c411fa6..3583a832 100644 --- a/hw/syn/quartus/top16/Makefile +++ b/hw/syn/quartus/top16/Makefile @@ -1,10 +1,10 @@ -#FAMILY = "Arria 10" -#DEVICE = 10AX115N3F40E2SG -#FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 -FAMILY = "Stratix 10" -DEVICE = 1SX280HN2F43E2VG -FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu