cache uuid support

This commit is contained in:
Blaise Tine
2021-12-09 20:43:22 -05:00
parent 0e2de4f13a
commit d7737542e4
36 changed files with 159 additions and 200 deletions

View File

@@ -32,4 +32,20 @@
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(5)
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(6)
task trace_tex_state (
input [`CSR_ADDR_BITS-1:0] state
);
case (state)
`CSR_TEX_ADDR: dpi_trace("ADDR");
`CSR_TEX_WIDTH: dpi_trace("WIDTH");
`CSR_TEX_HEIGHT: dpi_trace("HEIGHT");
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
`CSR_TEX_FILTER: dpi_trace("FILTER");
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
//`CSR_TEX_MIPOFF
default: dpi_trace("MIPOFF");
endcase
endtask
`endif

View File

@@ -75,6 +75,9 @@ module VX_tex_mem #(
wire [`TEX_LGSTRIDE_BITS-1:0] q_req_lgstride;
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
wire [3:0] q_dup_reqs;
wire [`NW_BITS-1:0] q_req_wid;
wire [31:0] q_req_PC;
wire [`UUID_BITS-1:0] q_req_uuid;
assign reqq_push = req_valid && req_ready;
@@ -105,12 +108,8 @@ module VX_tex_mem #(
wire sent_all_ready, last_texel_sent;
wire req_texel_dup;
wire [NUM_REQS-1:0][29:0] req_texel_addr;
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
reg [1:0] req_texel_idx;
reg req_texels_done;
`UNUSED_VAR (rsp_req_id)
always @(posedge clk) begin
if (reset || last_texel_sent) begin
@@ -156,22 +155,16 @@ module VX_tex_mem #(
wire [NUM_REQS-1:0] req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1};
assign {q_req_wid, q_req_PC, q_req_uuid} = q_req_info[`NW_BITS+32+`UUID_BITS-1:0];
`UNUSED_VAR (q_req_wid)
`UNUSED_VAR (q_req_PC)
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
assign dcache_req_if.addr = req_texel_addr;
assign dcache_req_if.byteen = {NUM_REQS{4'b0}};
assign dcache_req_if.data = 'x;
assign dcache_req_if.tag = {NUM_REQS{req_id, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
always @(posedge clk) begin
if (reset) begin
req_id <= `DBG_CACHE_REQ_ID(2, 0);
end else begin
if (dcache_req_fire_any) begin
req_id <= req_id + 1;
end
end
end
assign dcache_req_if.tag = {NUM_REQS{q_req_uuid, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
// Dcache Response
@@ -188,7 +181,6 @@ module VX_tex_mem #(
wire rsp_texel_dup;
assign rsp_texel_idx = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: 2];
assign rsp_req_id = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS +: `DBG_CACHE_REQ_IDW];
`UNUSED_VAR (dcache_rsp_if.tag)
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
@@ -285,25 +277,25 @@ module VX_tex_mem #(
// Can accept new cache response?
assign dcache_rsp_if.ready = ~(is_last_rsp && stall_out);
`ifdef DBG_TRACE_TEX
wire [`NW_BITS-1:0] q_req_wid, req_wid, rsp_wid;
wire [31:0] q_req_PC, req_PC, rsp_PC;
assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0];
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
`ifdef DBG_TRACE_TEX
wire [`NW_BITS-1:0] req_wid, rsp_wid;
wire [31:0] req_PC, rsp_PC;
wire [`UUID_BITS-1:0] req_uuid, rsp_uuid;
assign {req_wid, req_PC, req_uuid} = req_info[`NW_BITS+32+`UUID_BITS-1:0];
assign {rsp_wid, rsp_PC, rsp_uuid} = rsp_info[`NW_BITS+32+`UUID_BITS-1:0];
always @(posedge clk) begin
if (dcache_req_fire_any) begin
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_id, req_texel_idx);
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_idx);
`TRACE_ARRAY1D(req_texel_addr, NUM_REQS);
dpi_trace(", is_dup=%b\n", req_texel_dup);
dpi_trace(", is_dup=%b (#%0d)\n", req_texel_dup, q_req_uuid);
end
if (dcache_rsp_fire) begin
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_req_id, rsp_texel_idx);
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_texel_idx);
`TRACE_ARRAY1D(dcache_rsp_if.data, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", q_req_uuid);
end
if (req_valid && req_ready) begin
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, baseaddr=",
@@ -311,13 +303,13 @@ module VX_tex_mem #(
`TRACE_ARRAY1D(req_baseaddr, NUM_REQS);
dpi_trace(", addr=");
`TRACE_ARRAY2D(req_addr, 4, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", req_uuid);
end
if (rsp_valid && rsp_ready) begin
dpi_trace("%d: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
`TRACE_ARRAY2D(rsp_data, 4, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", rsp_uuid);
end
end
`endif

View File

@@ -23,11 +23,11 @@ module VX_tex_unit #(
VX_tex_rsp_if.master tex_rsp_if
);
localparam REQ_INFO_W = 64 + `NR_BITS + 1 + `NW_BITS + 32;
localparam REQ_INFO_W = `NR_BITS + 1 + `NW_BITS + 32 + `UUID_BITS;
localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC);
reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(`TEX_LOD_MAX+1)-1:0];
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
@@ -36,9 +36,6 @@ module VX_tex_unit #(
// CSRs programming
reg csrs_dirty [`NUM_TEX_UNITS-1:0];
`UNUSED_VAR (csrs_dirty)
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
@@ -47,50 +44,39 @@ module VX_tex_unit #(
end
`CSR_TEX_ADDR: begin
tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FORMAT: begin
tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPU: begin
tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPV: begin
tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FILTER: begin
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_WIDTH: begin
tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_HEIGHT: begin
tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin
`IGNORE_WARNINGS_END
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
end
end
endcase
end
if (reset || (tex_req_if.valid && tex_req_if.ready)) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
csrs_dirty[i] <= 0;
end
end
end
wire [`UUID_BITS-1:0] write_uuid = tex_csr_if.write_uuid;
`UNUSED_VAR (write_uuid);
// mipmap attributes
@@ -136,7 +122,7 @@ module VX_tex_unit #(
.mip_level (mip_level),
.req_mipoff (sel_mipoff),
.req_logdims(sel_logdims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC, tex_req_if.uuid}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
@@ -211,9 +197,9 @@ module VX_tex_unit #(
.rsp_valid (tex_rsp_if.valid),
.rsp_tmask (tex_rsp_if.tmask),
.rsp_data (tex_rsp_if.data),
.rsp_info ({tex_rsp_if.uuid, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.uuid}),
.rsp_ready (tex_rsp_if.ready)
);
);
`ifdef PERF_ENABLE
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle;
@@ -255,31 +241,24 @@ module VX_tex_unit #(
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
dpi_trace("%d: core%0d-tex-csr: unit=%0d, state=", $time, CORE_ID, csr_tex_unit);
trace_tex_state(tex_csr_if.write_addr);
dpi_trace(", data=%0h (#%0d)\n", tex_csr_if.write_data, tex_csr_if.write_uuid);
end
if (tex_req_if.valid && tex_req_if.ready) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
if (csrs_dirty[i]) begin
dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]);
end
end
dpi_trace("%d: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
`TRACE_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
dpi_trace(", v=");
`TRACE_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", tex_req_if.uuid);
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
dpi_trace("%d: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`TRACE_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", tex_rsp_if.uuid);
end
end
`endif