vlsim fix, verilator fst trace, use ram optimization
This commit is contained in:
@@ -6,11 +6,6 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// `define SYNTHESIS 1
|
||||
// `define ASIC 1
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
|
||||
@@ -10,131 +10,24 @@ module VX_gpr_ram (
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs2_data
|
||||
);
|
||||
`ifndef ASIC
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
end
|
||||
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
`else
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign write_bit_mask[i] = {32{~we[i]}};
|
||||
end
|
||||
|
||||
wire cenb = 0;
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_a;
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer j = 0; j < 32; j++) begin
|
||||
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||
if (we[i]) begin
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
`else
|
||||
assign rs1_data = tmp_a;
|
||||
assign rs2_data = tmp_b;
|
||||
`endif
|
||||
for (integer i = 0; i < 'NT; i=i+4) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(tmp_a[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(rs1[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
end
|
||||
|
||||
rf2_`NUM_GPRSx128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(tmp_b[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(rs2[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
`endif
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
endmodule
|
||||
@@ -20,8 +20,8 @@ module VX_icache_stage #(
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
|
||||
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
|
||||
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ module VX_ipdom_stack #(
|
||||
);
|
||||
localparam STACK_SIZE = 2 ** DEPTH;
|
||||
|
||||
reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
|
||||
reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
|
||||
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
|
||||
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
|
||||
reg is_part [0:STACK_SIZE-1];
|
||||
|
||||
reg [DEPTH-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* ramstyle="mlab" *)
|
||||
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
6
hw/rtl/cache/VX_bank.v
vendored
6
hw/rtl/cache/VX_bank.v
vendored
@@ -306,9 +306,9 @@ module VX_bank #(
|
||||
|
||||
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
|
||||
|
||||
//decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req
|
||||
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
|
||||
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
|
||||
29
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
29
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -56,7 +56,7 @@ module VX_cache_miss_resrv #(
|
||||
output wire miss_resrv_is_snp_st0,
|
||||
output wire miss_resrv_snp_invalidate_st0
|
||||
);
|
||||
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
|
||||
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] valid_table;
|
||||
@@ -72,8 +72,8 @@ module VX_cache_miss_resrv #(
|
||||
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
|
||||
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||
@@ -86,11 +86,11 @@ module VX_cache_miss_resrv #(
|
||||
|
||||
assign pending_hazard_st1 = |(valid_address_match);
|
||||
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
||||
|
||||
assign miss_resrv_valid_st0 = dequeue_possible;
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign {miss_resrv_data_st0,
|
||||
miss_resrv_tid_st0,
|
||||
miss_resrv_tag_st0,
|
||||
@@ -98,7 +98,7 @@ module VX_cache_miss_resrv #(
|
||||
miss_resrv_byteen_st0,
|
||||
miss_resrv_wsel_st0,
|
||||
miss_resrv_is_snp_st0,
|
||||
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
|
||||
miss_resrv_snp_invalidate_st0} = metadata_table;
|
||||
|
||||
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
@@ -125,7 +125,6 @@ module VX_cache_miss_resrv #(
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
||||
addr_table[enqueue_index] <= miss_add_addr;
|
||||
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
|
||||
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
|
||||
end else if (increment_head) begin
|
||||
valid_table[head_ptr] <= 0;
|
||||
@@ -155,6 +154,22 @@ module VX_cache_miss_resrv #(
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`MRVQ_METADATA_WIDTH),
|
||||
.SIZE(MRVQ_SIZE),
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) metadata_ram (
|
||||
.clk(clk),
|
||||
.waddr(enqueue_index),
|
||||
.raddr(dequeue_index),
|
||||
.wren(mrvq_push),
|
||||
.rden(1'b1),
|
||||
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
|
||||
.dout(metadata_table)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
|
||||
8
hw/rtl/cache/VX_tag_data_access.v
vendored
8
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -183,15 +183,15 @@ module VX_tag_data_access #(
|
||||
if (valid_req_st1) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1) begin
|
||||
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
|
||||
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
|
||||
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
|
||||
end
|
||||
end else
|
||||
if (miss_st1) begin
|
||||
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
|
||||
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
2
hw/rtl/cache/VX_tag_data_store.v
vendored
2
hw/rtl/cache/VX_tag_data_store.v
vendored
@@ -78,7 +78,7 @@ module VX_tag_data_store #(
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
|
||||
@@ -6,6 +6,7 @@ module VX_dp_ram #(
|
||||
parameter BYTEENW = 1,
|
||||
parameter BUFFERED = 1,
|
||||
parameter RWCHECK = 1,
|
||||
parameter RWBYPASS = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1)
|
||||
) (
|
||||
@@ -29,19 +30,46 @@ module VX_dp_ram #(
|
||||
if (wren[i])
|
||||
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||
end
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
|
||||
if (RWBYPASS) begin
|
||||
reg [DATAW-1:0] din_r;
|
||||
wire writing;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
assign writing = (| wren);
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign writing = wren;
|
||||
always @(posedge clk) begin
|
||||
din_r <= din;
|
||||
end
|
||||
end
|
||||
|
||||
reg bypass_r;
|
||||
always @(posedge clk) begin
|
||||
bypass_r <= writing && (raddr == waddr);
|
||||
end
|
||||
|
||||
assign dout = bypass_r ? din_r : dout_r;
|
||||
end else begin
|
||||
assign dout = dout_r;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -65,7 +93,7 @@ module VX_dp_ram #(
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (RWBYPASS) begin
|
||||
reg [DATAW-1:0] din_r;
|
||||
wire writing;
|
||||
|
||||
@@ -89,13 +117,13 @@ module VX_dp_ram #(
|
||||
end
|
||||
|
||||
assign dout = bypass_r ? din_r : mem[raddr];
|
||||
`else
|
||||
end else begin
|
||||
assign dout = mem[raddr];
|
||||
`endif
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -85,7 +85,7 @@ module VX_generic_queue #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_a),
|
||||
|
||||
@@ -36,8 +36,9 @@ module VX_scope #(
|
||||
localparam GET_COUNT = 3'd3;
|
||||
localparam GET_OFFSET = 3'd6;
|
||||
|
||||
reg [DATAW-1:0] data_store [SIZE-1:0];
|
||||
reg [DELTAW-1:0] delta_store [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
|
||||
|
||||
reg [UPDW-1:0] prev_trigger_id;
|
||||
reg [DELTAW-1:0] delta;
|
||||
reg [BUSW-1:0] bus_out_r;
|
||||
|
||||
Reference in New Issue
Block a user