Implement WU architecture support
This commit is contained in:
@@ -25,27 +25,40 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
// inputs
|
||||
VX_schedule_if.slave schedule_if,
|
||||
VX_schedule_if.slave scalar_schedule_if,
|
||||
VX_schedule_if.slave tensor_schedule_if,
|
||||
|
||||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
VX_fetch_if.master scalar_fetch_if,
|
||||
VX_fetch_if.master tensor_fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire icache_req_valid;
|
||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
|
||||
wire icache_req_ready;
|
||||
|
||||
wire rsp_domain;
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
reg fetch_domain_rr;
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
wire scalar_req_valid = scalar_schedule_if.valid;
|
||||
wire tensor_req_valid = tensor_schedule_if.valid;
|
||||
wire select_tensor_req = tensor_req_valid && (!scalar_req_valid || fetch_domain_rr);
|
||||
wire selected_domain = select_tensor_req ? WU_DOMAIN_TENSOR : WU_DOMAIN_SCALAR;
|
||||
wire selected_valid = scalar_req_valid || tensor_req_valid;
|
||||
wire [`NW_WIDTH-1:0] selected_wid = select_tensor_req ? tensor_schedule_if.data.wid : scalar_schedule_if.data.wid;
|
||||
wire [`XLEN-1:0] selected_pc = select_tensor_req ? tensor_schedule_if.data.PC : scalar_schedule_if.data.PC;
|
||||
wire [`NUM_THREADS-1:0] selected_tmask = select_tensor_req ? tensor_schedule_if.data.tmask : scalar_schedule_if.data.tmask;
|
||||
wire [`UUID_WIDTH-1:0] selected_uuid = select_tensor_req ? tensor_schedule_if.data.uuid : scalar_schedule_if.data.uuid;
|
||||
|
||||
assign req_tag = selected_wid;
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
assign {rsp_domain, rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
|
||||
wire [`XLEN-1:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
@@ -60,7 +73,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
.wdata ({selected_pc, selected_tmask}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
@@ -69,7 +82,8 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(selected_wid);
|
||||
wire [`ISSUE_WIDTH-1:0] domain_ibuf_pop = scalar_fetch_if.ibuf_pop | tensor_fetch_if.ibuf_pop;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
@@ -79,7 +93,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_isw == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
.decr (domain_ibuf_pop[i]),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
@@ -90,15 +104,24 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
wire ibuf_ready = 1'b1;
|
||||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
`RUNTIME_ASSERT((!selected_valid || selected_pc != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, selected_pc, selected_wid, selected_tmask, selected_uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
assign schedule_if.ready = icache_req_ready && ibuf_ready;
|
||||
assign icache_req_valid = selected_valid && ibuf_ready;
|
||||
assign icache_req_addr = selected_pc[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_tag = {selected_domain, selected_uuid, req_tag};
|
||||
assign scalar_schedule_if.ready = icache_req_ready && ibuf_ready && selected_valid && !select_tensor_req;
|
||||
assign tensor_schedule_if.ready = icache_req_ready && ibuf_ready && selected_valid && select_tensor_req;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fetch_domain_rr <= 1'b0;
|
||||
end else if (icache_req_fire && scalar_req_valid && tensor_req_valid) begin
|
||||
fetch_domain_rr <= ~fetch_domain_rr;
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (ICACHE_ADDR_WIDTH + ICACHE_TAG_WIDTH),
|
||||
@@ -121,18 +144,26 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
|
||||
// Icache Response
|
||||
|
||||
assign fetch_if.valid = icache_bus_if.rsp_valid;
|
||||
assign fetch_if.data.tmask = rsp_tmask;
|
||||
assign fetch_if.data.wid = rsp_tag;
|
||||
assign fetch_if.data.PC = rsp_PC;
|
||||
assign fetch_if.data.instr = icache_bus_if.rsp_data.data;
|
||||
assign fetch_if.data.uuid = rsp_uuid;
|
||||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
assign scalar_fetch_if.valid = icache_bus_if.rsp_valid && (rsp_domain == WU_DOMAIN_SCALAR);
|
||||
assign scalar_fetch_if.data.tmask = rsp_tmask;
|
||||
assign scalar_fetch_if.data.wid = rsp_tag;
|
||||
assign scalar_fetch_if.data.PC = rsp_PC;
|
||||
assign scalar_fetch_if.data.instr = icache_bus_if.rsp_data.data;
|
||||
assign scalar_fetch_if.data.uuid = rsp_uuid;
|
||||
|
||||
assign tensor_fetch_if.valid = icache_bus_if.rsp_valid && (rsp_domain == WU_DOMAIN_TENSOR);
|
||||
assign tensor_fetch_if.data.tmask = rsp_tmask;
|
||||
assign tensor_fetch_if.data.wid = rsp_tag;
|
||||
assign tensor_fetch_if.data.PC = rsp_PC;
|
||||
assign tensor_fetch_if.data.instr = icache_bus_if.rsp_data.data;
|
||||
assign tensor_fetch_if.data.uuid = rsp_uuid;
|
||||
|
||||
assign icache_bus_if.rsp_ready = (rsp_domain == WU_DOMAIN_TENSOR) ? tensor_fetch_if.ready : scalar_fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire schedule_fire = icache_req_fire;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
@@ -150,7 +181,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
selected_uuid, selected_wid, selected_tmask, selected_pc,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
@@ -161,7 +192,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe0 ({reset, selected_uuid, selected_wid, selected_tmask, selected_pc, icache_req_ready, selected_valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
@@ -172,14 +203,18 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
wire schedule_fire = icache_req_fire;
|
||||
wire scalar_fetch_fire = scalar_fetch_if.valid && scalar_fetch_if.ready;
|
||||
wire tensor_fetch_fire = tensor_fetch_if.valid && tensor_fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, schedule_if.data.PC, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
`TRACE(1, ("%d: I$%0d req: domain=%0d wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, selected_domain, selected_wid, selected_pc, selected_tmask, selected_uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, fetch_if.data.PC, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
if (scalar_fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d scalar rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, scalar_fetch_if.data.wid, scalar_fetch_if.data.PC, scalar_fetch_if.data.tmask, scalar_fetch_if.data.instr, scalar_fetch_if.data.uuid));
|
||||
end
|
||||
if (tensor_fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d tensor rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, tensor_fetch_if.data.wid, tensor_fetch_if.data.PC, tensor_fetch_if.data.tmask, tensor_fetch_if.data.instr, tensor_fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
Reference in New Issue
Block a user