fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,

This commit is contained in:
Blaise Tine
2021-11-24 00:00:17 -05:00
parent 1501360f4b
commit 18762dffce
70 changed files with 3818 additions and 1727 deletions

View File

@@ -12,13 +12,13 @@ module VX_tex_addr #(
input wire req_valid,
input wire [NUM_REQS-1:0] req_tmask,
input wire [1:0][NUM_REQS-1:0][31:0] req_coords,
input wire [1:0][NUM_REQS-1:0][`TEX_FXD_BITS-1:0] req_coords,
input wire [`TEX_FORMAT_BITS-1:0] req_format,
input wire [`TEX_FILTER_BITS-1:0] req_filter,
input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps,
input wire [`TEX_ADDR_BITS-1:0] req_baseaddr,
input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff,
input wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] req_logdims,
input wire [NUM_REQS-1:0][1:0][`TEX_LOD_BITS-1:0] req_logdims,
input wire [REQ_INFOW-1:0] req_info,
output wire req_ready,
@@ -27,31 +27,33 @@ module VX_tex_addr #(
output wire rsp_valid,
output wire [NUM_REQS-1:0] rsp_tmask,
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
output wire [`TEX_STRIDE_BITS-1:0] rsp_stride,
output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride,
output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr,
output wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends,
output wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends,
output wire [REQ_INFOW-1:0] rsp_info,
input wire rsp_ready
);
`UNUSED_PARAM (CORE_ID)
localparam PITCH_BITS = `MAX(`TEX_DIM_BITS, `TEX_STRIDE_BITS) + 1;
localparam SCALED_U_W = `FIXED_INT + `TEX_STRIDE_BITS;
localparam SCALED_X_W = (2 * `FIXED_INT);
localparam SCALED_V_W = SCALED_X_W + `TEX_STRIDE_BITS;
localparam SHIFT_BITS = $clog2(`TEX_FXD_FRAC+1);
localparam PITCH_BITS = `MAX(`TEX_LOD_BITS, `TEX_LGSTRIDE_BITS) + 1;
localparam SCALED_X_W = `TEX_DIM_BITS + `TEX_BLEND_FRAC;
localparam OFFSET_U_W = `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
localparam OFFSET_V_W = `TEX_DIM_BITS + `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
wire valid_s0;
wire [NUM_REQS-1:0] tmask_s0;
wire [`TEX_FILTER_BITS-1:0] filter_s0;
wire [REQ_INFOW-1:0] req_info_s0;
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0;
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_lo, clamped_lo_s0;
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_hi, clamped_hi_s0;
wire [NUM_REQS-1:0][1:0][SHIFT_BITS-1:0] dim_shift, dim_shift_s0;
wire [`TEX_LGSTRIDE_BITS-1:0] log_stride, log_stride_s0;
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
wire stall_out;
// stride
@@ -67,9 +69,9 @@ module VX_tex_addr #(
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 2; ++j) begin
wire [`FIXED_FRAC-1:0] delta = (`FIXED_HALF >> req_logdims[i][j]);
wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - 32'(delta)) : req_coords[j][i];
wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + 32'(delta)) : req_coords[j][i];
wire [`TEX_FXD_FRAC-1:0] delta = (`TEX_FXD_HALF >> req_logdims[i][j]);
wire [`TEX_FXD_BITS-1:0] coord_lo = req_filter ? (req_coords[j][i] - `TEX_FXD_BITS'(delta)) : req_coords[j][i];
wire [`TEX_FXD_BITS-1:0] coord_hi = req_filter ? (req_coords[j][i] + `TEX_FXD_BITS'(delta)) : req_coords[j][i];
VX_tex_wrap #(
.CORE_ID (CORE_ID)
@@ -86,66 +88,72 @@ module VX_tex_addr #(
.coord_i (coord_hi),
.coord_o (clamped_hi[i][j])
);
assign dim_shift[i][j] = (`TEX_FXD_FRAC - `TEX_BLEND_FRAC - req_logdims[i][j]);
end
assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride);
assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]);
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * SHIFT_BITS + 32 + 2 * 2 * `TEX_FXD_FRAC)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, req_logdims, mip_addr, clamped_lo, clamped_hi}),
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, dim_shift, mip_addr, clamped_lo, clamped_hi}),
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, dim_shift_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
);
// addresses generation
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo;
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi;
wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends;
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_lo;
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_hi;
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_lo;
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_hi;
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_lo;
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_hi;
wire [NUM_REQS-1:0][31:0] base_addr_lo;
wire [NUM_REQS-1:0][31:0] base_addr_hi;
wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] blends;
wire [NUM_REQS-1:0][3:0][31:0] addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 2; ++j) begin
assign scaled_lo[i][j] = scale_to_dim(clamped_lo_s0[i][j], log_dims_s0[i][j]);
assign scaled_hi[i][j] = scale_to_dim(clamped_hi_s0[i][j], log_dims_s0[i][j]);
assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
assign scaled_lo[i][j] = SCALED_X_W'(clamped_lo_s0[i][j] >> dim_shift_s0[i][j]);
assign scaled_hi[i][j] = SCALED_X_W'(clamped_hi_s0[i][j] >> dim_shift_s0[i][j]);
assign blends[i][j] = filter_s0 ? scaled_lo[i][j][`TEX_BLEND_FRAC-1:0] : `TEX_BLEND_FRAC'(0);
end
end
`UNUSED_VAR (log_pitch_s0)
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire [SCALED_U_W-1:0] offset_u_lo = SCALED_U_W'(scaled_lo[i][0]) << log_stride_s0;
wire [SCALED_U_W-1:0] offset_u_hi = SCALED_U_W'(scaled_hi[i][0]) << log_stride_s0;
assign offset_u_lo[i] = OFFSET_U_W'(scaled_lo[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
assign offset_u_hi[i] = OFFSET_U_W'(scaled_hi[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
wire [SCALED_V_W-1:0] offset_v_lo = SCALED_V_W'(scaled_lo[i][1]) << log_pitch_s0[i];
wire [SCALED_V_W-1:0] offset_v_hi = SCALED_V_W'(scaled_hi[i][1]) << log_pitch_s0[i];
assign offset_v_lo[i] = OFFSET_V_W'(scaled_lo[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
assign offset_v_hi[i] = OFFSET_V_W'(scaled_hi[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
wire [31:0] base_addr_lo = mip_addr_s0[i] + 32'(offset_v_lo);
wire [31:0] base_addr_hi = mip_addr_s0[i] + 32'(offset_v_hi);
assign base_addr_lo[i] = mip_addr_s0[i] + 32'(offset_v_lo[i]);
assign base_addr_hi[i] = mip_addr_s0[i] + 32'(offset_v_hi[i]);
assign addr[i][0] = base_addr_lo + 32'(offset_u_lo);
assign addr[i][1] = base_addr_lo + 32'(offset_u_hi);
assign addr[i][2] = base_addr_hi + 32'(offset_u_lo);
assign addr[i][3] = base_addr_hi + 32'(offset_u_hi);
assign addr[i][0] = base_addr_lo[i] + 32'(offset_u_lo[i]);
assign addr[i][1] = base_addr_lo[i] + 32'(offset_u_hi[i]);
assign addr[i][2] = base_addr_hi[i] + 32'(offset_u_lo[i]);
assign addr[i][3] = base_addr_hi[i] + 32'(offset_u_hi[i]);
end
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFOW),
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `TEX_BLEND_FRAC) + REQ_INFOW),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}),
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info})
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_lgstride, rsp_addr, rsp_blends, rsp_info})
);
assign req_ready = ~stall_out;
@@ -157,22 +165,47 @@ module VX_tex_addr #(
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
always @(posedge clk) begin
if (req_valid && ~stall_out) begin
dpi_trace("%d: *** log_pitch=", $time);
`TRACE_ARRAY1D(log_pitch, NUM_REQS);
dpi_trace(", mip_addr=");
`TRACE_ARRAY1D(mip_addr, NUM_REQS);
dpi_trace(", req_logdims=");
`TRACE_ARRAY2D(req_logdims, 2, NUM_REQS);
dpi_trace(", clamped_lo=");
`TRACE_ARRAY2D(clamped_lo, 2, NUM_REQS);
dpi_trace(", clamped_hi=");
`TRACE_ARRAY2D(clamped_hi, 2, NUM_REQS);
dpi_trace("\n");
end
if (valid_s0 && ~stall_out) begin
dpi_trace("%d: *** scaled_lo=", $time);
`TRACE_ARRAY2D(scaled_lo, 2, NUM_REQS);
dpi_trace(", scaled_hi=");
`TRACE_ARRAY2D(scaled_hi, 2, NUM_REQS);
dpi_trace(", offset_u_lo=");
`TRACE_ARRAY1D(offset_u_lo, NUM_REQS);
dpi_trace(", offset_u_hi=");
`TRACE_ARRAY1D(offset_u_hi, NUM_REQS);
dpi_trace(", offset_v_lo=");
`TRACE_ARRAY1D(offset_v_lo, NUM_REQS);
dpi_trace(", offset_v_hi=");
`TRACE_ARRAY1D(offset_v_hi, NUM_REQS);
dpi_trace(", base_addr_lo=");
`TRACE_ARRAY1D(base_addr_lo, NUM_REQS);
dpi_trace(", base_addr_hi=");
`TRACE_ARRAY1D(base_addr_hi, NUM_REQS);
dpi_trace("\n");
end
if (rsp_valid && rsp_ready) begin
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride);
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, lgstride=%0d, addr=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_lgstride);
`TRACE_ARRAY2D(rsp_addr, 4, NUM_REQS);
dpi_trace("\n");
end
end
`endif
function logic [`FIXED_INT-1:0] scale_to_dim (input logic [`FIXED_FRAC-1:0] src,
input logic [`TEX_DIM_BITS-1:0] dim);
`IGNORE_WARNINGS_BEGIN
logic [`FIXED_BITS-1:0] out;
`IGNORE_WARNINGS_END
out = `FIXED_BITS'(src) << dim;
return out[`FIXED_FRAC +: `FIXED_INT];
endfunction
endmodule

View File

@@ -3,31 +3,26 @@
`include "VX_define.vh"
`define FIXED_BITS 32
`define FIXED_FRAC 20
`define FIXED_INT (`FIXED_BITS - `FIXED_FRAC)
`define FIXED_ONE (2 ** `FIXED_FRAC)
`define FIXED_HALF (`FIXED_ONE >> 1)
`define FIXED_MASK (`FIXED_ONE - 1)
`define TEX_FXD_INT (`TEX_FXD_BITS - `TEX_FXD_FRAC)
`define TEX_FXD_ONE (2 ** `TEX_FXD_FRAC)
`define TEX_FXD_HALF (`TEX_FXD_ONE >> 1)
`define TEX_FXD_MASK (`TEX_FXD_ONE - 1)
`define TEX_ADDR_BITS 32
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_DIM_BITS 4
`define TEX_FILTER_BITS 1
`define TEX_MIPOFF_BITS (2*`TEX_DIM_BITS+1)
`define TEX_MIPOFF_BITS (2*12+1)
`define TEX_STRIDE_BITS 2
`define TEX_LOD_BITS 4
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
`define TEX_LGSTRIDE_MAX 2
`define TEX_LGSTRIDE_BITS 2
`define TEX_WRAP_CLAMP 0
`define TEX_WRAP_REPEAT 1
`define TEX_WRAP_MIRROR 2
`define BLEND_FRAC 8
`define BLEND_ONE (2 ** `BLEND_FRAC)
`define TEX_BLEND_FRAC 8
`define TEX_BLEND_ONE (2 ** `TEX_BLEND_FRAC)
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)

View File

@@ -15,7 +15,7 @@ module VX_tex_mem #(
input wire req_valid,
input wire [NUM_REQS-1:0] req_tmask,
input wire [`TEX_FILTER_BITS-1:0] req_filter,
input wire [`TEX_STRIDE_BITS-1:0] req_stride,
input wire [`TEX_LGSTRIDE_BITS-1:0] req_lgstride,
input wire [NUM_REQS-1:0][3:0][31:0] req_addr,
input wire [REQ_INFOW-1:0] req_info,
output wire req_ready,
@@ -63,23 +63,23 @@ module VX_tex_mem #(
wire [NUM_REQS-1:0] q_req_tmask;
wire [`TEX_FILTER_BITS-1:0] q_req_filter;
wire [REQ_INFOW-1:0] q_req_info;
wire [`TEX_STRIDE_BITS-1:0] q_req_stride;
wire [`TEX_LGSTRIDE_BITS-1:0] q_req_lgstride;
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
wire [3:0] q_dup_reqs;
assign reqq_push = req_valid && req_ready;
VX_fifo_queue #(
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
.SIZE (`LSUQ_SIZE),
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (4 * NUM_REQS * 2) + 4),
.SIZE (`TEXQ_SIZE),
.OUT_REG (1)
) req_queue (
.clk (clk),
.reset (reset),
.push (reqq_push),
.pop (reqq_pop),
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_stride, align_offs, dup_reqs}),
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}),
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_lgstride, align_offs, dup_reqs}),
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_lgstride, q_align_offs, q_dup_reqs}),
.empty (reqq_empty),
.full (reqq_full),
`UNUSED_PIN (alm_full),
@@ -96,8 +96,12 @@ module VX_tex_mem #(
wire sent_all_ready, last_texel_sent;
wire req_texel_dup;
wire [NUM_REQS-1:0][29:0] req_texel_addr;
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
reg [1:0] req_texel_idx;
reg req_texels_done;
`UNUSED_VAR (rsp_req_id)
always @(posedge clk) begin
if (reset || last_texel_sent) begin
@@ -146,14 +150,19 @@ module VX_tex_mem #(
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
assign dcache_req_if.addr = req_texel_addr;
assign dcache_req_if.byteen = {NUM_REQS{4'b1111}};
assign dcache_req_if.byteen = {NUM_REQS{4'b0}};
assign dcache_req_if.data = 'x;
assign dcache_req_if.tag = {NUM_REQS{req_id, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
`ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {NUM_REQS{q_req_info[`DBG_CACHE_REQ_MDATAW-1:0], req_texel_idx}};
`else
assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}};
`endif
always @(posedge clk) begin
if (reset) begin
req_id <= `DBG_CACHE_REQ_ID(2, 0);
end else begin
if (dcache_req_fire_any) begin
req_id <= req_id + 1;
end
end
end
// Dcache Response
@@ -162,14 +171,17 @@ module VX_tex_mem #(
reg [NUM_REQS-1:0][31:0] rsp_data_qual;
reg [RSP_CTR_W-1:0] rsp_rem_ctr, rsp_rem_ctr_init;
wire [RSP_CTR_W-1:0] rsp_rem_ctr_n;
wire [NUM_REQS-1:0][1:0] rsp_align_offs;
wire dcache_rsp_fire;
wire [1:0] rsp_texel_idx;
wire rsp_texel_dup;
assign rsp_texel_idx = dcache_rsp_if.tag[1:0];
assign rsp_texel_idx = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: 2];
assign rsp_req_id = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS +: `DBG_CACHE_REQ_IDW];
`UNUSED_VAR (dcache_rsp_if.tag)
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
assign rsp_align_offs = q_align_offs[rsp_texel_idx];
assign dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
@@ -180,12 +192,12 @@ module VX_tex_mem #(
reg [31:0] rsp_data_shifted;
always @(*) begin
rsp_data_shifted[31:16] = src_data[31:16];
rsp_data_shifted[15:0] = q_align_offs[rsp_texel_idx][i][1] ? src_data[31:16] : src_data[15:0];
rsp_data_shifted[7:0] = q_align_offs[rsp_texel_idx][i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
rsp_data_shifted[15:0] = rsp_align_offs[i][1] ? src_data[31:16] : src_data[15:0];
rsp_data_shifted[7:0] = rsp_align_offs[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
end
always @(*) begin
case (q_req_stride)
case (q_req_lgstride)
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
default: rsp_data_qual[i] = rsp_data_shifted;
@@ -266,20 +278,20 @@ module VX_tex_mem #(
always @(posedge clk) begin
if (dcache_req_fire_any) begin
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_idx);
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_id, req_texel_idx);
`TRACE_ARRAY1D(req_texel_addr, NUM_REQS);
dpi_trace(", is_dup=%b\n", req_texel_dup);
end
if (dcache_rsp_fire) begin
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_texel_idx);
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_req_id, rsp_texel_idx);
`TRACE_ARRAY1D(dcache_rsp_if.data, NUM_REQS);
dpi_trace("\n");
end
if (req_valid && req_ready) begin
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=",
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride);
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, addr=",
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_lgstride);
`TRACE_ARRAY2D(req_addr, 4, NUM_REQS);
dpi_trace("\n");
end

View File

@@ -12,7 +12,7 @@ module VX_tex_sampler #(
input wire req_valid,
input wire [NUM_REQS-1:0] req_tmask,
input wire [`TEX_FORMAT_BITS-1:0] req_format,
input wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] req_blends,
input wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] req_blends,
input wire [NUM_REQS-1:0][3:0][31:0] req_data,
input wire [REQ_INFOW-1:0] req_info,
output wire req_ready,
@@ -32,7 +32,7 @@ module VX_tex_sampler #(
wire [REQ_INFOW-1:0] req_info_s0;
wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh;
wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0;
wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v, blend_v_s0;
wire [NUM_REQS-1:0][`TEX_BLEND_FRAC-1:0] blend_v, blend_v_s0;
wire [NUM_REQS-1:0][31:0] texel_v;
wire stall_out;
@@ -52,7 +52,7 @@ module VX_tex_sampler #(
end
wire [7:0] beta = req_blends[i][0];
wire [8:0] alpha = `BLEND_ONE - beta;
wire [8:0] alpha = `TEX_BLEND_ONE - beta;
VX_tex_lerp #(
) tex_lerp_ul (
@@ -76,7 +76,7 @@ module VX_tex_sampler #(
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `BLEND_FRAC) + (2 * NUM_REQS * 32)),
.DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `TEX_BLEND_FRAC) + (2 * NUM_REQS * 32)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@@ -88,7 +88,7 @@ module VX_tex_sampler #(
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [7:0] beta = blend_v_s0[i];
wire [8:0] alpha = `BLEND_ONE - beta;
wire [8:0] alpha = `TEX_BLEND_ONE - beta;
VX_tex_lerp #(
) tex_lerp_v (

View File

@@ -4,11 +4,11 @@ module VX_tex_stride #(
parameter CORE_ID = 0
) (
input wire [`TEX_FORMAT_BITS-1:0] format,
output wire [`TEX_STRIDE_BITS-1:0] log_stride
output wire [`TEX_LGSTRIDE_BITS-1:0] log_stride
);
`UNUSED_PARAM (CORE_ID)
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
reg [`TEX_LGSTRIDE_BITS-1:0] log_stride_r;
always @(*) begin
case (format)

View File

@@ -20,13 +20,13 @@ module VX_tex_unit #(
localparam REQ_INFOW_S = `NR_BITS + 1 + `NW_BITS + 32;
localparam REQ_INFOW_A = `TEX_FORMAT_BITS + REQ_INFOW_S;
localparam REQ_INFOW_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFOW_A;
localparam REQ_INFOW_M = (2 * `NUM_THREADS * `TEX_BLEND_FRAC) + REQ_INFOW_A;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [1:0][`TEX_DIM_BITS-1:0] tex_dims [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
// CSRs programming
@@ -35,38 +35,46 @@ module VX_tex_unit #(
`UNUSED_VAR (csrs_dirty)
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : begin
`CSR_TEX(i, `TEX_STATE_ADDR) : begin
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_FORMAT(i) : begin
`CSR_TEX(i, `TEX_STATE_FORMAT) : begin
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_WRAP(i) : begin
tex_wraps[i][0] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
`CSR_TEX(i, `TEX_STATE_WRAPU) : begin
tex_wraps[i][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_FILTER(i) : begin
`CSR_TEX(i, `TEX_STATE_WRAPV) : begin
tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX(i, `TEX_STATE_FILTER) : begin
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_MIPOFF(i) : begin
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
`CSR_TEX(i, `TEX_STATE_WIDTH) : begin
tex_logdims[i][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_WIDTH(i) : begin
tex_dims[i][mip_level][0] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
`CSR_TEX(i, `TEX_STATE_HEIGHT) : begin
tex_logdims[i][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[i] <= 1;
end
`CSR_TEX_HEIGHT(i) : begin
tex_dims[i][mip_level][1] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
csrs_dirty[i] <= 1;
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (tex_csr_if.write_addr == `CSR_ADDR_BITS'(`CSR_TEX(i, `TEX_STATE_MIPOFF(j)))) begin
`IGNORE_WARNINGS_END
tex_mipoff[i][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
csrs_dirty[i] <= 1;
end
end
end
endcase
end
@@ -78,14 +86,15 @@ module VX_tex_unit #(
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
wire [`NUM_THREADS-1:0][1:0][`TEX_DIM_BITS-1:0] sel_dims;
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
wire [`NUM_THREADS-1:0][1:0][`TEX_LOD_BITS-1:0] sel_logdims;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
assign sel_dims[i] = tex_dims[unit][mip_level];
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][`TEX_LOD_BITS-1:0];
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
assign sel_logdims[i][0] = (tex_logdims[unit][0] - mip_level);
assign sel_logdims[i][1] = (tex_logdims[unit][1] - mip_level);
end
// address generation
@@ -93,8 +102,8 @@ module VX_tex_unit #(
wire mem_req_valid;
wire [`NUM_THREADS-1:0] mem_req_tmask;
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
wire [`TEX_STRIDE_BITS-1:0] mem_req_stride;
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] mem_req_blends;
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
wire [REQ_INFOW_A-1:0] mem_req_info;
wire mem_req_ready;
@@ -113,16 +122,16 @@ module VX_tex_unit #(
.req_format (tex_format[tex_req_if.unit]),
.req_filter (tex_filter[tex_req_if.unit]),
.req_wraps (tex_wraps[tex_req_if.unit]),
.req_baseaddr (tex_baddr[tex_req_if.unit]),
.req_baseaddr(tex_baddr[tex_req_if.unit]),
.req_mipoff (sel_mipoff),
.req_logdims (sel_dims),
.req_logdims(sel_logdims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
.rsp_tmask (mem_req_tmask),
.rsp_filter (mem_req_filter),
.rsp_stride (mem_req_stride),
.rsp_lgstride(mem_req_lgstride),
.rsp_addr (mem_req_addr),
.rsp_blends (mem_req_blends),
.rsp_info (mem_req_info),
@@ -142,8 +151,8 @@ module VX_tex_unit #(
.REQ_INFOW (REQ_INFOW_M),
.NUM_REQS (`NUM_THREADS)
) tex_mem (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// memory interface
.dcache_req_if (dcache_req_if),
@@ -153,7 +162,7 @@ module VX_tex_unit #(
.req_valid (mem_req_valid),
.req_tmask (mem_req_tmask),
.req_filter(mem_req_filter),
.req_stride(mem_req_stride),
.req_lgstride(mem_req_lgstride),
.req_addr (mem_req_addr),
.req_info ({mem_req_blends, mem_req_info}),
.req_ready (mem_req_ready),
@@ -168,7 +177,7 @@ module VX_tex_unit #(
// apply sampler
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends;
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends;
wire [`TEX_FORMAT_BITS-1:0] rsp_format;
wire [REQ_INFOW_S-1:0] rsp_info;
@@ -205,13 +214,12 @@ module VX_tex_unit #(
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
if (csrs_dirty[i]) begin
dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_mipoff[0]=%0h\n", $time, CORE_ID, i, tex_mipoff[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_width[0]=%0h\n", $time, CORE_ID, i, tex_dims[i][0][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_height[0]=%0h\n", $time, CORE_ID, i, tex_dims[i][0][1]);
end
end

View File

@@ -4,19 +4,19 @@ module VX_tex_wrap #(
parameter CORE_ID = 0
) (
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
input wire [31:0] coord_i,
output wire [`FIXED_FRAC-1:0] coord_o
input wire [`TEX_FXD_BITS-1:0] coord_i,
output wire [`TEX_FXD_FRAC-1:0] coord_o
);
`UNUSED_PARAM (CORE_ID)
reg [`FIXED_FRAC-1:0] coord_r;
reg [`TEX_FXD_FRAC-1:0] coord_r;
wire [`FIXED_FRAC-1:0] clamp;
wire [`TEX_FXD_FRAC-1:0] clamp;
VX_tex_sat #(
.IN_W (32),
.OUT_W (`FIXED_FRAC)
.IN_W (`TEX_FXD_BITS),
.OUT_W (`TEX_FXD_FRAC)
) sat_fx (
.data_in (coord_i),
.data_out (clamp)
@@ -27,9 +27,9 @@ module VX_tex_wrap #(
`TEX_WRAP_CLAMP:
coord_r = clamp;
`TEX_WRAP_MIRROR:
coord_r = coord_i[`FIXED_FRAC-1:0] ^ {`FIXED_FRAC{coord_i[`FIXED_FRAC]}};
coord_r = coord_i[`TEX_FXD_FRAC-1:0] ^ {`TEX_FXD_FRAC{coord_i[`TEX_FXD_FRAC]}};
default: //`TEX_WRAP_REPEAT
coord_r = coord_i[`FIXED_FRAC-1:0];
coord_r = coord_i[`TEX_FXD_FRAC-1:0];
endcase
end