decoupled load/store commits

This commit is contained in:
Blaise Tine
2020-12-03 15:08:48 -08:00
parent c3ec4c9e90
commit fb60d0af87
9 changed files with 93 additions and 89 deletions

View File

@@ -8,7 +8,8 @@ module VX_commit #(
// inputs
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
@@ -18,37 +19,44 @@ module VX_commit #(
VX_writeback_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if
);
localparam CMTW = $clog2(`NUM_THREADS+1);
localparam CMTW = $clog2(3*`NUM_THREADS+1);
// CSRs update
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
wire lsu_commit_fire = lsu_commit_if.valid && lsu_commit_if.ready;
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready;
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
wire commit_fire = alu_commit_fire
|| lsu_commit_fire
|| ld_commit_fire
|| st_commit_fire
|| csr_commit_fire
|| mul_commit_fire
|| fpu_commit_fire
|| gpu_commit_fire;
wire [`NUM_THREADS-1:0] commit_tmask = alu_commit_fire ? alu_commit_if.tmask:
lsu_commit_fire ? lsu_commit_if.tmask:
csr_commit_fire ? csr_commit_if.tmask:
mul_commit_fire ? mul_commit_if.tmask:
fpu_commit_fire ? fpu_commit_if.tmask:
gpu_commit_if.tmask;
wire [`NUM_THREADS-1:0] commit_tmask1, commit_tmask2, commit_tmask3;
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
ld_commit_fire ? ld_commit_if.tmask:
csr_commit_fire ? csr_commit_if.tmask:
mul_commit_fire ? mul_commit_if.tmask:
fpu_commit_fire ? fpu_commit_if.tmask:
0;
assign commit_tmask2 = st_commit_fire ? st_commit_if.tmask : 0;
assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0;
wire [CMTW-1:0] commit_size;
VX_countones #(
.N(`NUM_THREADS)
) commit_ctr (
.valids(commit_tmask),
.N(3*`NUM_THREADS)
) commit_ctr1 (
.valids({commit_tmask3, commit_tmask2, commit_tmask1}),
.count (commit_size)
);
@@ -64,22 +72,28 @@ module VX_commit #(
.reset (reset),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.ld_commit_if (ld_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.fpu_commit_if (fpu_commit_if),
.writeback_if (writeback_if)
);
// store and gpu commits don't writeback
assign st_commit_if.ready = 1'b1;
assign gpu_commit_if.ready = 1'b1;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_commit_if.valid && alu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
end
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.PC, lsu_commit_if.tmask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
if (ld_commit_if.valid && ld_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd, ld_commit_if.data);
end
if (st_commit_if.valid && st_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.data);
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);

View File

@@ -31,7 +31,8 @@ module VX_execute #(
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
@@ -63,7 +64,8 @@ module VX_execute #(
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.lsu_req_if (lsu_req_if),
.lsu_commit_if (lsu_commit_if)
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if)
);
VX_csr_unit #(

View File

@@ -16,7 +16,8 @@ module VX_lsu_unit #(
VX_lsu_req_if lsu_req_if,
// outputs
VX_commit_if lsu_commit_if
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if
);
wire [`NUM_THREADS-1:0] req_tmask;
wire req_rw;
@@ -135,7 +136,7 @@ module VX_lsu_unit #(
end
end
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
wire stall_out = ~ld_commit_if.ready && ld_commit_if.valid;
wire store_stall = valid_in && req_rw && stall_out;
// Core Request
@@ -168,18 +169,23 @@ module VX_lsu_unit #(
end
end
wire is_store_req = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
// send store commit
wire is_store_rsp = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;
assign st_commit_if.tmask = req_tmask;
assign st_commit_if.PC = req_pc;
assign st_commit_if.rd = 0;
assign st_commit_if.wb = 0;
assign st_commit_if.data = 0;
`UNUSED_VAR (st_commit_if.ready)
// send load commit
wire is_load_rsp = (| dcache_rsp_if.valid);
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
wire arb_valid = is_store_req || is_load_rsp;
wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid;
wire [`NUM_THREADS-1:0] arb_tmask = is_store_req ? req_tmask : dcache_rsp_if.valid;
wire [31:0] arb_PC = is_store_req ? req_pc : rsp_pc;
wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd;
wire arb_wb = is_store_req ? 0 : rsp_wb;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.R(1)
@@ -188,12 +194,12 @@ module VX_lsu_unit #(
.reset (reset),
.stall (stall_out),
.flush (1'b0),
.in ({arb_valid, arb_wid, arb_tmask, arb_PC, arb_rd, arb_wb, rsp_data}),
.out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.tmask, lsu_commit_if.PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data})
.in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
.out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall);
assign dcache_rsp_if.ready = ~stall_out;
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});

View File

@@ -113,8 +113,9 @@ module VX_pipeline #(
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_commit_if alu_commit_if();
VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if ld_commit_if();
VX_commit_if st_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if fpu_commit_if();
VX_commit_if gpu_commit_if();
@@ -191,7 +192,8 @@ module VX_pipeline #(
.warp_ctl_if (warp_ctl_if),
.branch_ctl_if (branch_ctl_if),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
@@ -208,7 +210,8 @@ module VX_pipeline #(
.reset (reset),
.alu_commit_if (alu_commit_if),
.lsu_commit_if (lsu_commit_if),
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),

View File

@@ -8,17 +8,16 @@ module VX_writeback #(
// inputs
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
VX_commit_if fpu_commit_if,
// outputs
VX_writeback_if writeback_if
);
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb;
wire ld_valid = ld_commit_if.valid /*&& ld_commit_if.wb*/;
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
@@ -31,42 +30,42 @@ module VX_writeback #(
wire [`NUM_THREADS-1:0][31:0] wb_data;
assign wb_valid = alu_valid ? alu_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
ld_valid ? ld_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
assign wb_wid = alu_valid ? alu_commit_if.wid :
lsu_valid ? lsu_commit_if.wid :
ld_valid ? ld_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_PC = alu_valid ? alu_commit_if.PC :
lsu_valid ? lsu_commit_if.PC :
ld_valid ? ld_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
0;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
lsu_valid ? lsu_commit_if.tmask :
ld_valid ? ld_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
mul_valid ? mul_commit_if.tmask :
fpu_valid ? fpu_commit_if.tmask :
0;
assign wb_rd = alu_valid ? alu_commit_if.rd :
lsu_valid ? lsu_commit_if.rd :
ld_valid ? ld_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
0;
assign wb_data = alu_valid ? alu_commit_if.data :
lsu_valid ? lsu_commit_if.data :
ld_valid ? ld_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
@@ -88,11 +87,10 @@ module VX_writeback #(
);
assign alu_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
assign gpu_commit_if.ready = 1'b1;
assign ld_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !ld_valid;
assign mul_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid && !mul_valid;
// special workaround to get RISC-V tests Pass/Fail status
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;

View File

@@ -835,10 +835,12 @@ end
wire dwbq_pop = dram_req_valid && dram_req_ready;
wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = do_writeback_st3 ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} :
addr_st3;
wire writeback = WRITE_ENABLE && do_writeback_st3;
wire [BANK_LINE_SIZE-1:0] dwbq_byteen = do_writeback_st3 ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}};
wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} :
addr_st3;
wire [BANK_LINE_SIZE-1:0] dwbq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}};
if (DRAM_ENABLE) begin
VX_generic_queue #(
@@ -850,8 +852,8 @@ end
.reset (reset),
.push (dwbq_push),
.pop (dwbq_pop),
.data_in ({do_writeback_st3, dwbq_byteen, dwbq_addr, readdata_st3}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.data_in ({writeback, dwbq_byteen, dwbq_addr, readdata_st3}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dwbq_empty),
.full (dwbq_full),
`UNUSED_PIN (size)
@@ -864,6 +866,7 @@ end
`UNUSED_VAR (readtag_st3)
`UNUSED_VAR (dirtyb_st3)
`UNUSED_VAR (readdata_st3)
`UNUSED_VAR (writeback)
`UNUSED_VAR (dram_req_ready)
assign dwbq_empty = 1;
assign dwbq_full = 0;
@@ -895,8 +898,8 @@ end
if (FLUSH_ENABLE) begin
VX_generic_queue #(
.DATAW (SNP_TAG_WIDTH),
.SIZE (SNPQ_SIZE),
.DATAW (SNP_TAG_WIDTH),
.SIZE (SNPQ_SIZE),
.BUFFERED(1)
) snp_rsp_queue (
.clk (clk),
@@ -933,7 +936,7 @@ end
`SCOPE_ASSIGN (valid_st2, valid_st2);
`SCOPE_ASSIGN (valid_st3, valid_st3);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (miss_st1, miss_st1);
`SCOPE_ASSIGN (dirty_st1, dirty_st1);

View File

@@ -34,10 +34,11 @@ module VX_cache_core_rsp_merge #(
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
reg [CORE_TAG_ID_BITS-1:0] sel_tag_id;
if (CORE_TAG_ID_BITS != 0) begin
reg [CORE_TAG_ID_BITS-1:0] sel_tag_id;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;

View File

@@ -6,7 +6,7 @@
interface VX_cmt_to_csr_if ();
wire valid;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
wire [$clog2(3*`NUM_THREADS+1)-1:0] commit_size;
endinterface

View File

@@ -1,23 +0,0 @@
`ifndef VX_FPU_TO_CMT_IF
`define VX_FPU_TO_CMT_IF
`include "VX_define.vh"
interface VX_fpu_to_cmt_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire has_fflags;
fflags_t [`NUM_THREADS-1:0] fflags;
wire ready;
endinterface
`endif