Files
vortex/hw/rtl/tex_unit/VX_tex_unit.sv
2021-12-09 20:43:22 -05:00

266 lines
9.8 KiB
Systemverilog

`include "VX_tex_define.vh"
module VX_tex_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_tex_if.master perf_tex_if,
`endif
// Texture unit <-> Memory Unit
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// Inputs
VX_tex_req_if.slave tex_req_if,
VX_tex_csr_if.slave tex_csr_if,
// Outputs
VX_tex_rsp_if.master tex_rsp_if
);
localparam REQ_INFO_W = `NR_BITS + 1 + `NW_BITS + 32 + `UUID_BITS;
localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC);
reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(`TEX_LOD_MAX+1)-1:0];
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
// CSRs programming
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_UNIT: begin
csr_tex_unit <= tex_csr_if.write_data[$clog2(`NUM_TEX_UNITS)-1:0];
end
`CSR_TEX_ADDR: begin
tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
end
`CSR_TEX_FORMAT: begin
tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
end
`CSR_TEX_WRAPU: begin
tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
end
`CSR_TEX_WRAPV: begin
tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
end
`CSR_TEX_FILTER: begin
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_WIDTH: begin
tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
end
`CSR_TEX_HEIGHT: begin
tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
end
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin
`IGNORE_WARNINGS_END
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
end
end
endcase
end
end
wire [`UUID_BITS-1:0] write_uuid = tex_csr_if.write_uuid;
`UNUSED_VAR (write_uuid);
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_LOD_BITS-1:0] mip_level;
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
wire [`NUM_THREADS-1:0][1:0][`TEX_LOD_BITS-1:0] sel_logdims;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
assign mip_level[i] = tex_req_if.lod[i][`TEX_LOD_BITS-1:0];
assign sel_mipoff[i] = tex_mipoff[unit][mip_level[i]];
assign sel_logdims[i][0] = tex_logdims[unit][0];
assign sel_logdims[i][1] = tex_logdims[unit][1];
end
// address generation
wire mem_req_valid;
wire [`NUM_THREADS-1:0] mem_req_tmask;
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
wire [`NUM_THREADS-1:0][31:0] mem_req_baseaddr;
wire [(`TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_req_info;
wire mem_req_ready;
VX_tex_addr #(
.CORE_ID (CORE_ID),
.REQ_INFOW (`TEX_FORMAT_BITS + REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_addr (
.clk (clk),
.reset (reset),
.req_valid (tex_req_if.valid),
.req_tmask (tex_req_if.tmask),
.req_coords (tex_req_if.coords),
.req_format (tex_format[tex_req_if.unit]),
.req_filter (tex_filter[tex_req_if.unit]),
.req_wraps (tex_wraps[tex_req_if.unit]),
.req_baseaddr(tex_baddr[tex_req_if.unit]),
.mip_level (mip_level),
.req_mipoff (sel_mipoff),
.req_logdims(sel_logdims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC, tex_req_if.uuid}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
.rsp_tmask (mem_req_tmask),
.rsp_filter (mem_req_filter),
.rsp_lgstride(mem_req_lgstride),
.rsp_baseaddr(mem_req_baseaddr),
.rsp_addr (mem_req_addr),
.rsp_blends (mem_req_blends),
.rsp_info (mem_req_info),
.rsp_ready (mem_req_ready)
);
// retrieve texel values from memory
wire mem_rsp_valid;
wire [`NUM_THREADS-1:0] mem_rsp_tmask;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
wire [(BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_rsp_info;
wire mem_rsp_ready;
VX_tex_mem #(
.CORE_ID (CORE_ID),
.REQ_INFOW (BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_mem (
.clk (clk),
.reset (reset),
// memory interface
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
// inputs
.req_valid (mem_req_valid),
.req_tmask (mem_req_tmask),
.req_filter(mem_req_filter),
.req_lgstride(mem_req_lgstride),
.req_baseaddr(mem_req_baseaddr),
.req_addr (mem_req_addr),
.req_info ({mem_req_blends, mem_req_info}),
.req_ready (mem_req_ready),
// outputs
.rsp_valid (mem_rsp_valid),
.rsp_tmask (mem_rsp_tmask),
.rsp_data (mem_rsp_data),
.rsp_info (mem_rsp_info),
.rsp_ready (mem_rsp_ready)
);
// apply sampler
VX_tex_sampler #(
.CORE_ID (CORE_ID),
.REQ_INFOW (REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_sampler (
.clk (clk),
.reset (reset),
// inputs
.req_valid (mem_rsp_valid),
.req_tmask (mem_rsp_tmask),
.req_data (mem_rsp_data),
.req_blends (mem_rsp_info[(REQ_INFO_W+`TEX_FORMAT_BITS) +: BLEND_FRAC_W]),
.req_format (mem_rsp_info[REQ_INFO_W +: `TEX_FORMAT_BITS]),
.req_info (mem_rsp_info[0 +: REQ_INFO_W]),
.req_ready (mem_rsp_ready),
// outputs
.rsp_valid (tex_rsp_if.valid),
.rsp_tmask (tex_rsp_if.tmask),
.rsp_data (tex_rsp_if.data),
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.uuid}),
.rsp_ready (tex_rsp_if.ready)
);
`ifdef PERF_ENABLE
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle;
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_rsp_per_cycle;
wire [`NUM_THREADS-1:0] perf_mem_req_per_mask = dcache_req_if.valid & dcache_req_if.ready;
wire [`NUM_THREADS-1:0] perf_mem_rsp_per_mask = dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_if.valid & dcache_rsp_if.ready}};
`POP_COUNT(perf_mem_req_per_cycle, perf_mem_req_per_mask);
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_mask);
reg [`PERF_CTR_BITS-1:0] perf_pending_reads;
wire [$clog2(`NUM_THREADS+1)+1-1:0] perf_pending_reads_cycle = perf_mem_req_per_cycle - perf_mem_rsp_per_cycle;
always @(posedge clk) begin
if (reset) begin
perf_pending_reads <= 0;
end else begin
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'($signed(perf_pending_reads_cycle));
end
end
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
reg [`PERF_CTR_BITS-1:0] perf_mem_latency;
always @(posedge clk) begin
if (reset) begin
perf_mem_reads <= 0;
perf_mem_latency <= 0;
end else begin
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'(perf_mem_req_per_cycle);
perf_mem_latency <= perf_mem_latency + `PERF_CTR_BITS'(perf_pending_reads);
end
end
assign perf_tex_if.mem_reads = perf_mem_reads;
assign perf_tex_if.mem_latency = perf_mem_latency;
`endif
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
dpi_trace("%d: core%0d-tex-csr: unit=%0d, state=", $time, CORE_ID, csr_tex_unit);
trace_tex_state(tex_csr_if.write_addr);
dpi_trace(", data=%0h (#%0d)\n", tex_csr_if.write_data, tex_csr_if.write_uuid);
end
if (tex_req_if.valid && tex_req_if.ready) begin
dpi_trace("%d: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
`TRACE_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
dpi_trace(", v=");
`TRACE_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
dpi_trace(" (#%0d)\n", tex_req_if.uuid);
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
dpi_trace("%d: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`TRACE_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", tex_rsp_if.uuid);
end
end
`endif
endmodule