vlsim fix, verilator fst trace, use ram optimization

This commit is contained in:
Blaise Tine
2020-10-25 16:40:50 -07:00
parent 81dc8c7279
commit 43ae82e788
23 changed files with 424 additions and 422 deletions

View File

@@ -6,11 +6,6 @@
///////////////////////////////////////////////////////////////////////////////
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)

View File

@@ -10,131 +10,24 @@ module VX_gpr_ram (
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
);
`ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
q1 <= mem[rs1];
q2 <= mem[rs2];
end
assign rs1_data = q1;
assign rs2_data = q2;
`else
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
assign write_bit_mask[i] = {32{~we[i]}};
end
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
wire [`NUM_THREADS-1:0][31:0] tmp_a;
wire [`NUM_THREADS-1:0][31:0] tmp_b;
`ifndef SYNTHESIS
for (integer i = 0; i < `NUM_THREADS; i++) begin
for (integer j = 0; j < 32; j++) begin
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
`else
assign rs1_data = tmp_a;
assign rs2_data = tmp_b;
`endif
for (integer i = 0; i < 'NT; i=i+4) begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
q1 <= mem[rs1];
q2 <= mem[rs2];
end
rf2_`NUM_GPRSx128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
`IGNORE_WARNINGS_END
end
`endif
assign rs1_data = q1;
assign rs2_data = q2;
endmodule

View File

@@ -20,8 +20,8 @@ module VX_icache_stage #(
);
`UNUSED_VAR (reset)
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;

View File

@@ -16,8 +16,8 @@ module VX_ipdom_stack #(
);
localparam STACK_SIZE = 2 ** DEPTH;
reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;

View File

@@ -52,7 +52,7 @@
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* ramstyle="mlab" *)
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////

View File

@@ -306,9 +306,9 @@ module VX_bank #(
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
//decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
dfpq_pop_unqual ? dfpq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;

View File

@@ -56,7 +56,7 @@ module VX_cache_miss_resrv #(
output wire miss_resrv_is_snp_st0,
output wire miss_resrv_snp_invalidate_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MRVQ_SIZE-1:0] valid_table;
@@ -72,8 +72,8 @@ module VX_cache_miss_resrv #(
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
@@ -86,11 +86,11 @@ module VX_cache_miss_resrv #(
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
assign miss_resrv_valid_st0 = dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0,
miss_resrv_tid_st0,
miss_resrv_tag_st0,
@@ -98,7 +98,7 @@ module VX_cache_miss_resrv #(
miss_resrv_byteen_st0,
miss_resrv_wsel_st0,
miss_resrv_is_snp_st0,
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
miss_resrv_snp_invalidate_st0} = metadata_table;
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
@@ -125,7 +125,6 @@ module VX_cache_miss_resrv #(
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
@@ -155,6 +154,22 @@ module VX_cache_miss_resrv #(
end
end
VX_dp_ram #(
.DATAW(`MRVQ_METADATA_WIDTH),
.SIZE(MRVQ_SIZE),
.BYTEENW(1),
.BUFFERED(0),
.RWCHECK(1)
) metadata_ram (
.clk(clk),
.waddr(enqueue_index),
.raddr(dequeue_index),
.wren(mrvq_push),
.rden(1'b1),
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
.dout(metadata_table)
);
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin

View File

@@ -183,15 +183,15 @@ module VX_tag_data_access #(
if (valid_req_st1) begin
if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
end else begin
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
end
end else
if (miss_st1) begin
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
end else begin
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
end
end
end

View File

@@ -78,7 +78,7 @@ module VX_tag_data_store #(
.SIZE(`BANK_LINE_COUNT),
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
.BUFFERED(0),
.RWCHECK(0)
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(write_addr),

View File

@@ -6,6 +6,7 @@ module VX_dp_ram #(
parameter BYTEENW = 1,
parameter BUFFERED = 1,
parameter RWCHECK = 1,
parameter RWBYPASS = 0,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
@@ -29,19 +30,46 @@ module VX_dp_ram #(
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
if (rden)
dout_r <= mem[raddr];
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
if (rden)
dout_r <= mem[raddr];
end
end
end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : dout_r;
end else begin
assign dout = dout_r;
end
end else begin
@@ -65,7 +93,7 @@ module VX_dp_ram #(
end
end
`ifdef SYNTHESIS
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
@@ -89,13 +117,13 @@ module VX_dp_ram #(
end
assign dout = bypass_r ? din_r : mem[raddr];
`else
end else begin
assign dout = mem[raddr];
`endif
end
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin

View File

@@ -85,7 +85,7 @@ module VX_generic_queue #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(0),
.RWCHECK(0)
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_a),

View File

@@ -36,8 +36,9 @@ module VX_scope #(
localparam GET_COUNT = 3'd3;
localparam GET_OFFSET = 3'd6;
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;