fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,
This commit is contained in:
@@ -12,13 +12,13 @@ module VX_tex_addr #(
|
||||
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [1:0][NUM_REQS-1:0][31:0] req_coords,
|
||||
input wire [1:0][NUM_REQS-1:0][`TEX_FXD_BITS-1:0] req_coords,
|
||||
input wire [`TEX_FORMAT_BITS-1:0] req_format,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps,
|
||||
input wire [`TEX_ADDR_BITS-1:0] req_baseaddr,
|
||||
input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff,
|
||||
input wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] req_logdims,
|
||||
input wire [NUM_REQS-1:0][1:0][`TEX_LOD_BITS-1:0] req_logdims,
|
||||
input wire [REQ_INFOW-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
||||
@@ -27,31 +27,33 @@ module VX_tex_addr #(
|
||||
output wire rsp_valid,
|
||||
output wire [NUM_REQS-1:0] rsp_tmask,
|
||||
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] rsp_stride,
|
||||
output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride,
|
||||
output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr,
|
||||
output wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [REQ_INFOW-1:0] rsp_info,
|
||||
input wire rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam PITCH_BITS = `MAX(`TEX_DIM_BITS, `TEX_STRIDE_BITS) + 1;
|
||||
localparam SCALED_U_W = `FIXED_INT + `TEX_STRIDE_BITS;
|
||||
localparam SCALED_X_W = (2 * `FIXED_INT);
|
||||
localparam SCALED_V_W = SCALED_X_W + `TEX_STRIDE_BITS;
|
||||
localparam SHIFT_BITS = $clog2(`TEX_FXD_FRAC+1);
|
||||
localparam PITCH_BITS = `MAX(`TEX_LOD_BITS, `TEX_LGSTRIDE_BITS) + 1;
|
||||
localparam SCALED_X_W = `TEX_DIM_BITS + `TEX_BLEND_FRAC;
|
||||
localparam OFFSET_U_W = `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
|
||||
localparam OFFSET_V_W = `TEX_DIM_BITS + `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
|
||||
|
||||
wire valid_s0;
|
||||
wire [NUM_REQS-1:0] tmask_s0;
|
||||
wire [`TEX_FILTER_BITS-1:0] filter_s0;
|
||||
wire [REQ_INFOW-1:0] req_info_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_lo, clamped_lo_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [NUM_REQS-1:0][1:0][SHIFT_BITS-1:0] dim_shift, dim_shift_s0;
|
||||
wire [`TEX_LGSTRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
|
||||
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
|
||||
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
// stride
|
||||
@@ -67,9 +69,9 @@ module VX_tex_addr #(
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
wire [`FIXED_FRAC-1:0] delta = (`FIXED_HALF >> req_logdims[i][j]);
|
||||
wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - 32'(delta)) : req_coords[j][i];
|
||||
wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + 32'(delta)) : req_coords[j][i];
|
||||
wire [`TEX_FXD_FRAC-1:0] delta = (`TEX_FXD_HALF >> req_logdims[i][j]);
|
||||
wire [`TEX_FXD_BITS-1:0] coord_lo = req_filter ? (req_coords[j][i] - `TEX_FXD_BITS'(delta)) : req_coords[j][i];
|
||||
wire [`TEX_FXD_BITS-1:0] coord_hi = req_filter ? (req_coords[j][i] + `TEX_FXD_BITS'(delta)) : req_coords[j][i];
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
@@ -86,66 +88,72 @@ module VX_tex_addr #(
|
||||
.coord_i (coord_hi),
|
||||
.coord_o (clamped_hi[i][j])
|
||||
);
|
||||
|
||||
assign dim_shift[i][j] = (`TEX_FXD_FRAC - `TEX_BLEND_FRAC - req_logdims[i][j]);
|
||||
end
|
||||
assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride);
|
||||
assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * SHIFT_BITS + 32 + 2 * 2 * `TEX_FXD_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, req_logdims, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, dim_shift, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, dim_shift_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
);
|
||||
|
||||
// addresses generation
|
||||
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi;
|
||||
wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_lo;
|
||||
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_hi;
|
||||
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_lo;
|
||||
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_hi;
|
||||
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_lo;
|
||||
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_hi;
|
||||
wire [NUM_REQS-1:0][31:0] base_addr_lo;
|
||||
wire [NUM_REQS-1:0][31:0] base_addr_hi;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][3:0][31:0] addr;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
assign scaled_lo[i][j] = scale_to_dim(clamped_lo_s0[i][j], log_dims_s0[i][j]);
|
||||
assign scaled_hi[i][j] = scale_to_dim(clamped_hi_s0[i][j], log_dims_s0[i][j]);
|
||||
assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
|
||||
assign scaled_lo[i][j] = SCALED_X_W'(clamped_lo_s0[i][j] >> dim_shift_s0[i][j]);
|
||||
assign scaled_hi[i][j] = SCALED_X_W'(clamped_hi_s0[i][j] >> dim_shift_s0[i][j]);
|
||||
assign blends[i][j] = filter_s0 ? scaled_lo[i][j][`TEX_BLEND_FRAC-1:0] : `TEX_BLEND_FRAC'(0);
|
||||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (log_pitch_s0)
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [SCALED_U_W-1:0] offset_u_lo = SCALED_U_W'(scaled_lo[i][0]) << log_stride_s0;
|
||||
wire [SCALED_U_W-1:0] offset_u_hi = SCALED_U_W'(scaled_hi[i][0]) << log_stride_s0;
|
||||
assign offset_u_lo[i] = OFFSET_U_W'(scaled_lo[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
|
||||
assign offset_u_hi[i] = OFFSET_U_W'(scaled_hi[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
|
||||
|
||||
wire [SCALED_V_W-1:0] offset_v_lo = SCALED_V_W'(scaled_lo[i][1]) << log_pitch_s0[i];
|
||||
wire [SCALED_V_W-1:0] offset_v_hi = SCALED_V_W'(scaled_hi[i][1]) << log_pitch_s0[i];
|
||||
assign offset_v_lo[i] = OFFSET_V_W'(scaled_lo[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
|
||||
assign offset_v_hi[i] = OFFSET_V_W'(scaled_hi[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
|
||||
|
||||
wire [31:0] base_addr_lo = mip_addr_s0[i] + 32'(offset_v_lo);
|
||||
wire [31:0] base_addr_hi = mip_addr_s0[i] + 32'(offset_v_hi);
|
||||
assign base_addr_lo[i] = mip_addr_s0[i] + 32'(offset_v_lo[i]);
|
||||
assign base_addr_hi[i] = mip_addr_s0[i] + 32'(offset_v_hi[i]);
|
||||
|
||||
assign addr[i][0] = base_addr_lo + 32'(offset_u_lo);
|
||||
assign addr[i][1] = base_addr_lo + 32'(offset_u_hi);
|
||||
assign addr[i][2] = base_addr_hi + 32'(offset_u_lo);
|
||||
assign addr[i][3] = base_addr_hi + 32'(offset_u_hi);
|
||||
assign addr[i][0] = base_addr_lo[i] + 32'(offset_u_lo[i]);
|
||||
assign addr[i][1] = base_addr_lo[i] + 32'(offset_u_hi[i]);
|
||||
assign addr[i][2] = base_addr_hi[i] + 32'(offset_u_lo[i]);
|
||||
assign addr[i][3] = base_addr_hi[i] + 32'(offset_u_hi[i]);
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFOW),
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `TEX_BLEND_FRAC) + REQ_INFOW),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}),
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info})
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_lgstride, rsp_addr, rsp_blends, rsp_info})
|
||||
);
|
||||
|
||||
assign req_ready = ~stall_out;
|
||||
@@ -157,22 +165,47 @@ module VX_tex_addr #(
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (req_valid && ~stall_out) begin
|
||||
dpi_trace("%d: *** log_pitch=", $time);
|
||||
`TRACE_ARRAY1D(log_pitch, NUM_REQS);
|
||||
dpi_trace(", mip_addr=");
|
||||
`TRACE_ARRAY1D(mip_addr, NUM_REQS);
|
||||
dpi_trace(", req_logdims=");
|
||||
`TRACE_ARRAY2D(req_logdims, 2, NUM_REQS);
|
||||
dpi_trace(", clamped_lo=");
|
||||
`TRACE_ARRAY2D(clamped_lo, 2, NUM_REQS);
|
||||
dpi_trace(", clamped_hi=");
|
||||
`TRACE_ARRAY2D(clamped_hi, 2, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
|
||||
if (valid_s0 && ~stall_out) begin
|
||||
dpi_trace("%d: *** scaled_lo=", $time);
|
||||
`TRACE_ARRAY2D(scaled_lo, 2, NUM_REQS);
|
||||
dpi_trace(", scaled_hi=");
|
||||
`TRACE_ARRAY2D(scaled_hi, 2, NUM_REQS);
|
||||
dpi_trace(", offset_u_lo=");
|
||||
`TRACE_ARRAY1D(offset_u_lo, NUM_REQS);
|
||||
dpi_trace(", offset_u_hi=");
|
||||
`TRACE_ARRAY1D(offset_u_hi, NUM_REQS);
|
||||
dpi_trace(", offset_v_lo=");
|
||||
`TRACE_ARRAY1D(offset_v_lo, NUM_REQS);
|
||||
dpi_trace(", offset_v_hi=");
|
||||
`TRACE_ARRAY1D(offset_v_hi, NUM_REQS);
|
||||
dpi_trace(", base_addr_lo=");
|
||||
`TRACE_ARRAY1D(base_addr_lo, NUM_REQS);
|
||||
dpi_trace(", base_addr_hi=");
|
||||
`TRACE_ARRAY1D(base_addr_hi, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride);
|
||||
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, lgstride=%0d, addr=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_lgstride);
|
||||
`TRACE_ARRAY2D(rsp_addr, 4, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
function logic [`FIXED_INT-1:0] scale_to_dim (input logic [`FIXED_FRAC-1:0] src,
|
||||
input logic [`TEX_DIM_BITS-1:0] dim);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
logic [`FIXED_BITS-1:0] out;
|
||||
`IGNORE_WARNINGS_END
|
||||
out = `FIXED_BITS'(src) << dim;
|
||||
return out[`FIXED_FRAC +: `FIXED_INT];
|
||||
endfunction
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user