rebase master update
This commit is contained in:
151
hw/rtl/tex_unit/VX_tex_addr.v
Normal file
151
hw/rtl/tex_unit/VX_tex_addr.v
Normal file
@@ -0,0 +1,151 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_addr #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter REQ_INFO_WIDTH = 1,
|
||||
parameter NUM_REQS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [1:0][NUM_REQS-1:0][31:0] req_coords,
|
||||
input wire [`TEX_FORMAT_BITS-1:0] req_format,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps,
|
||||
input wire [`TEX_ADDR_BITS-1:0] req_baseaddr,
|
||||
input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoffset,
|
||||
input wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] req_logdims,
|
||||
input wire [REQ_INFO_WIDTH-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
||||
// outputs
|
||||
|
||||
output wire rsp_valid,
|
||||
output wire [NUM_REQS-1:0] rsp_tmask,
|
||||
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] rsp_stride,
|
||||
output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr,
|
||||
output wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
|
||||
input wire rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire valid_s0;
|
||||
wire [NUM_REQS-1:0] tmask_s0;
|
||||
wire [`TEX_FILTER_BITS-1:0] filter_s0;
|
||||
wire [REQ_INFO_WIDTH-1:0] req_info_s0;
|
||||
|
||||
wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0;
|
||||
wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
// stride
|
||||
|
||||
VX_tex_stride #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_stride (
|
||||
.format (req_format),
|
||||
.log_stride (log_stride)
|
||||
);
|
||||
|
||||
// addressing mode
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
wire [31:0] coord_lo, coord_hi;
|
||||
|
||||
assign coord_lo = req_coords[j][i] - (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0);
|
||||
assign coord_hi = req_coords[j][i] + (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_lo (
|
||||
.wrap_i (req_wraps[j]),
|
||||
.coord_i (coord_lo),
|
||||
.coord_o (clamped_lo[j][i])
|
||||
);
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_wrap_hi (
|
||||
.wrap_i (req_wraps[j]),
|
||||
.coord_i (coord_hi),
|
||||
.coord_o (clamped_hi[j][i])
|
||||
);
|
||||
end
|
||||
assign mip_addr[i] = req_baseaddr + 32'(req_mipoffset[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
);
|
||||
|
||||
// addresses generation
|
||||
|
||||
wire [1:0][NUM_REQS-1:0][`FIXED_INT-1:0] scaled_lo, scaled_hi;
|
||||
wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][3:0][31:0] addr;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i]));
|
||||
assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i]));
|
||||
assign blends[j][i] = filter_s0 ? clamped_lo_s0[j][i][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
|
||||
assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
|
||||
assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
|
||||
assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFO_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}),
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info})
|
||||
);
|
||||
|
||||
assign req_ready = ~stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [31:0] rsp_PC;
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
$write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride);
|
||||
`PRINT_ARRAY2D(rsp_addr, 4, NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
42
hw/rtl/tex_unit/VX_tex_define.vh
Normal file
42
hw/rtl/tex_unit/VX_tex_define.vh
Normal file
@@ -0,0 +1,42 @@
|
||||
`ifndef VX_TEX_DEFINE
|
||||
`define VX_TEX_DEFINE
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define FIXED_FRAC 20
|
||||
`define FIXED_INT (32 - `FIXED_FRAC)
|
||||
`define FIXED_ONE (2 ** `FIXED_FRAC)
|
||||
`define FIXED_HALF (`FIXED_ONE >> 1)
|
||||
`define FIXED_MASK (`FIXED_ONE - 1)
|
||||
|
||||
`define CLAMP(x,lo,hi) (($signed(x) < $signed(lo)) ? lo : ((x > hi) ? hi : x))
|
||||
|
||||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_DIM_BITS 4
|
||||
`define TEX_FILTER_BITS 1
|
||||
|
||||
`define TEX_MIPOFF_BITS (2*12+1)
|
||||
`define TEX_STRIDE_BITS 2
|
||||
|
||||
`define TEX_LOD_BITS 4
|
||||
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
|
||||
|
||||
`define TEX_WRAP_CLAMP 0
|
||||
`define TEX_WRAP_REPEAT 1
|
||||
`define TEX_WRAP_MIRROR 2
|
||||
|
||||
`define TEX_COLOR_BITS 8
|
||||
|
||||
`define BLEND_FRAC 8
|
||||
`define BLEND_ONE (2 ** `BLEND_FRAC)
|
||||
|
||||
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
|
||||
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
|
||||
`define TEX_FORMAT_R4G4B4A4 `TEX_FORMAT_BITS'(2)
|
||||
`define TEX_FORMAT_L8A8 `TEX_FORMAT_BITS'(3)
|
||||
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(4)
|
||||
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(5)
|
||||
|
||||
`endif
|
||||
58
hw/rtl/tex_unit/VX_tex_format.v
Normal file
58
hw/rtl/tex_unit/VX_tex_format.v
Normal file
@@ -0,0 +1,58 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_format #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_FORMAT_BITS-1:0] format,
|
||||
input wire [31:0] texel_in,
|
||||
output wire [31:0] texel_out
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [31:0] texel_out_r;
|
||||
|
||||
always @(*) begin
|
||||
case (format)
|
||||
`TEX_FORMAT_R5G6B5: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[15:11],texel_in[15:13]});
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'({texel_in[10:5],texel_in[10:9]});
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'({texel_in[4:0],texel_in[4:2]});
|
||||
texel_out_r[31:24] = {`TEX_COLOR_BITS{1'b1}};
|
||||
end
|
||||
`TEX_FORMAT_R4G4B4A4: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[11:8],texel_in[15:12]});
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'({2{texel_in[7:4]}});
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'({2{texel_in[3:0]}});
|
||||
texel_out_r[31:24] = `TEX_COLOR_BITS'({2{texel_in[15:12]}});
|
||||
end
|
||||
`TEX_FORMAT_L8A8: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[15:8]);
|
||||
end
|
||||
`TEX_FORMAT_A8: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'(0);
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'(0);
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'(0);
|
||||
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
end
|
||||
`TEX_FORMAT_L8: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[31:24] = {`TEX_COLOR_BITS{1'b1}};
|
||||
end
|
||||
// `TEX_FORMAT_R8G8B8A8
|
||||
default: begin
|
||||
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
|
||||
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[15:8]);
|
||||
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[23:16]);
|
||||
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[31:24]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign texel_out = texel_out_r;
|
||||
|
||||
endmodule
|
||||
17
hw/rtl/tex_unit/VX_tex_lerp.v
Normal file
17
hw/rtl/tex_unit/VX_tex_lerp.v
Normal file
@@ -0,0 +1,17 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_lerp #(
|
||||
) (
|
||||
input wire [`BLEND_FRAC-1:0] blend,
|
||||
input wire [31:0] in1,
|
||||
input wire [31:0] in2,
|
||||
output wire [31:0] out
|
||||
);
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
wire [8:0] blend_m1 = `BLEND_ONE - blend;
|
||||
wire [16:0] sum = in1[i*8+:8] * blend_m1 + in2[i*8+:8] * blend;
|
||||
`UNUSED_VAR (sum)
|
||||
assign out[i*8+:8] = sum[15:8];
|
||||
end
|
||||
|
||||
endmodule
|
||||
128
hw/rtl/tex_unit/VX_tex_lsu_arb.v
Normal file
128
hw/rtl/tex_unit/VX_tex_lsu_arb.v
Normal file
@@ -0,0 +1,128 @@
|
||||
`include "../cache/VX_cache_define.vh"
|
||||
|
||||
module VX_tex_lsu_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [LANES-1:0] req_valid_out,
|
||||
output wire [LANES-1:0] req_rw_out,
|
||||
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
|
||||
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
|
||||
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
input wire [LANES-1:0] req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire [LANES-1:0] rsp_valid_in,
|
||||
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
wire [NUM_REQS-1:0] req_valid_in_any;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
assign req_valid_in_any[i] = (| req_valid_in[i]);
|
||||
end
|
||||
|
||||
wire sel_valid;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
|
||||
wire sel_enable = (| req_ready_out);
|
||||
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (req_valid_in_any),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
wire [LANES-1:0] req_valid_out_unqual;
|
||||
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
|
||||
|
||||
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
|
||||
|
||||
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
|
||||
|
||||
reg [NUM_REQS-1:0][LANES-1:0] rsp_valid_out_unqual;
|
||||
always @(*) begin
|
||||
rsp_valid_out_unqual = '0;
|
||||
rsp_valid_out_unqual[rsp_sel] = rsp_valid_in;
|
||||
end
|
||||
assign rsp_valid_out = rsp_valid_out_unqual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign rsp_data_out[i] = rsp_data_in;
|
||||
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
|
||||
end
|
||||
|
||||
assign rsp_ready_in = rsp_ready_out[rsp_sel];
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
288
hw/rtl/tex_unit/VX_tex_memory.v
Normal file
288
hw/rtl/tex_unit/VX_tex_memory.v
Normal file
@@ -0,0 +1,288 @@
|
||||
`include "VX_tex_define.vh"
|
||||
module VX_tex_memory #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter REQ_INFO_WIDTH = 1,
|
||||
parameter NUM_REQS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// memory interface
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [`TEX_STRIDE_BITS-1:0] req_stride,
|
||||
input wire [NUM_REQS-1:0][3:0][31:0] req_addr,
|
||||
input wire [REQ_INFO_WIDTH-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
||||
// outputs
|
||||
output wire rsp_valid,
|
||||
output wire [NUM_REQS-1:0] rsp_tmask,
|
||||
output wire [NUM_REQS-1:0][3:0][31:0] rsp_data,
|
||||
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
|
||||
input wire rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam RSP_CTR_W = $clog2(NUM_REQS * 4 + 1);
|
||||
|
||||
wire [3:0] dup_reqs;
|
||||
wire [3:0][NUM_REQS-1:0][29:0] req_addr_w;
|
||||
wire [3:0][NUM_REQS-1:0][1:0] align_offs;
|
||||
|
||||
// reorder address into quads
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 4; ++j) begin
|
||||
assign req_addr_w[j][i] = req_addr[i][j][31:2];
|
||||
assign align_offs[j][i] = req_addr[i][j][1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// find duplicate addresses
|
||||
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
wire [NUM_REQS-1:0] addr_matches;
|
||||
for (genvar j = 0; j < NUM_REQS; j++) begin
|
||||
assign addr_matches[j] = (req_addr_w[i][0] == req_addr_w[i][j]) || ~req_tmask[j];
|
||||
end
|
||||
assign dup_reqs[i] = req_tmask[0] && (& addr_matches);
|
||||
end
|
||||
|
||||
// save request addresses into fifo
|
||||
|
||||
wire reqq_push, reqq_pop, reqq_empty, reqq_full;
|
||||
|
||||
wire [3:0][NUM_REQS-1:0][29:0] q_req_addr;
|
||||
wire [NUM_REQS-1:0] q_req_tmask;
|
||||
wire [`TEX_FILTER_BITS-1:0] q_req_filter;
|
||||
wire [REQ_INFO_WIDTH-1:0] q_req_info;
|
||||
wire [`TEX_STRIDE_BITS-1:0] q_req_stride;
|
||||
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
|
||||
wire [3:0] q_dup_reqs;
|
||||
|
||||
assign reqq_push = req_valid && req_ready;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFO_WIDTH + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
|
||||
.SIZE (`LSUQ_SIZE),
|
||||
.OUTPUT_REG (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (reqq_push),
|
||||
.pop (reqq_pop),
|
||||
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_stride, align_offs, dup_reqs}),
|
||||
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}),
|
||||
.empty (reqq_empty),
|
||||
.full (reqq_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
// can take more requests?
|
||||
assign req_ready = ~reqq_full;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire req_texel_valid;
|
||||
wire sent_all_ready, last_texel_sent;
|
||||
wire req_texel_dup;
|
||||
wire [NUM_REQS-1:0][29:0] req_texel_addr;
|
||||
reg [1:0] req_texel_idx;
|
||||
reg req_texels_done;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || last_texel_sent) begin
|
||||
req_texel_idx <= 0;
|
||||
end else if (req_texel_valid && sent_all_ready) begin
|
||||
req_texel_idx <= req_texel_idx + 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || reqq_pop) begin
|
||||
req_texels_done <= 0;
|
||||
end else if (last_texel_sent) begin
|
||||
req_texels_done <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
assign req_texel_valid = ~reqq_empty && ~req_texels_done;
|
||||
assign req_texel_addr = q_req_addr[req_texel_idx];
|
||||
assign req_texel_dup = q_dup_reqs[req_texel_idx];
|
||||
|
||||
wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0));
|
||||
assign last_texel_sent = req_texel_valid && sent_all_ready && is_last_texel;
|
||||
|
||||
// DCache Request
|
||||
|
||||
reg [NUM_REQS-1:0] texel_sent_mask;
|
||||
wire [NUM_REQS-1:0] dcache_req_fire;
|
||||
wire [NUM_REQS-1:0] req_dup_mask;
|
||||
|
||||
assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|
||||
|| (req_texel_dup & dcache_req_if.ready[0]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || sent_all_ready) begin
|
||||
texel_sent_mask <= 0;
|
||||
end else begin
|
||||
texel_sent_mask <= texel_sent_mask | dcache_req_fire;
|
||||
end
|
||||
end
|
||||
|
||||
assign req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1};
|
||||
|
||||
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
|
||||
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
|
||||
assign dcache_req_if.addr = req_texel_addr;
|
||||
assign dcache_req_if.byteen = {NUM_REQS{4'b1111}};
|
||||
assign dcache_req_if.data = 'x;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
wire [`NW_BITS-1:0] q_req_wid;
|
||||
wire [31:0] q_req_PC;
|
||||
assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0];
|
||||
assign dcache_req_if.tag = {NUM_REQS{q_req_PC, q_req_wid, req_texel_idx}};
|
||||
`else
|
||||
assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}};
|
||||
`endif
|
||||
|
||||
// Dcache Response
|
||||
|
||||
reg [3:0][NUM_REQS-1:0][31:0] rsp_texels, rsp_texels_n;
|
||||
wire [NUM_REQS-1:0][3:0][31:0] rsp_texels_qual;
|
||||
reg [NUM_REQS-1:0][31:0] rsp_data_qual;
|
||||
reg [RSP_CTR_W-1:0] rsp_rem_ctr;
|
||||
wire [NUM_REQS-1:0] rsp_cur_tmask;
|
||||
wire [$clog2(NUM_REQS + 1)-1:0] rsp_cur_cnt;
|
||||
wire dcache_rsp_fire;
|
||||
wire [1:0] rsp_texel_idx;
|
||||
wire rsp_texel_dup;
|
||||
|
||||
assign rsp_texel_idx = dcache_rsp_if.tag[1:0];
|
||||
|
||||
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
|
||||
|
||||
assign dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
|
||||
|
||||
assign rsp_cur_tmask = rsp_texel_dup ? q_req_tmask : dcache_rsp_if.tmask;
|
||||
|
||||
assign rsp_cur_cnt = $countones(rsp_cur_tmask);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
wire [31:0] src_mask = {32{dcache_rsp_if.tmask[i]}};
|
||||
wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : (dcache_rsp_if.data[i]) & src_mask);
|
||||
|
||||
reg [31:0] rsp_data_shifted;
|
||||
always @(*) begin
|
||||
rsp_data_shifted[31:16] = src_data[31:16];
|
||||
rsp_data_shifted[15:0] = q_align_offs[rsp_texel_idx][i][1] ? src_data[31:16] : src_data[15:0];
|
||||
rsp_data_shifted[7:0] = q_align_offs[rsp_texel_idx][i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (q_req_stride)
|
||||
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
|
||||
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: rsp_data_qual[i] = rsp_data_shifted;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
rsp_texels_n = rsp_texels;
|
||||
rsp_texels_n[rsp_texel_idx] |= rsp_data_qual;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || reqq_pop) begin
|
||||
rsp_texels <= '0;
|
||||
end else if (dcache_rsp_fire) begin
|
||||
rsp_texels <= rsp_texels_n;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_rem_ctr <= 0;
|
||||
end else begin
|
||||
if ((| dcache_req_fire) && 0 == rsp_rem_ctr) begin
|
||||
rsp_rem_ctr <= q_req_filter ? {$countones(q_req_tmask), 2'b0} : {2'b0, $countones(q_req_tmask)};
|
||||
end else if (dcache_rsp_fire) begin
|
||||
rsp_rem_ctr <= rsp_rem_ctr - RSP_CTR_W'(rsp_cur_cnt);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 4; ++j) begin
|
||||
assign rsp_texels_qual[i][j] = rsp_texels_n[j][i];
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt));
|
||||
|
||||
assign reqq_pop = rsp_texels_done && ~stall_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (4 * NUM_REQS * 32)),
|
||||
.RESETW (1)
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_texels_done, q_req_tmask, q_req_info, rsp_texels_qual}),
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = ~stall_out || (rsp_rem_ctr != RSP_CTR_W'(rsp_cur_cnt));
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
wire [`NW_BITS-1:0] req_wid, rsp_wid;
|
||||
wire [31:0] req_PC, rsp_PC;
|
||||
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_fire)) begin
|
||||
$write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag);
|
||||
`PRINT_ARRAY1D(req_texel_addr, NUM_REQS);
|
||||
$write(", is_dup=%b\n", req_texel_dup);
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
$write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag);
|
||||
`PRINT_ARRAY1D(rsp_data_qual, NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
if (req_valid && req_ready) begin
|
||||
$write("%t: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=",
|
||||
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride);
|
||||
`PRINT_ARRAY2D(req_addr, 4, NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
$write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
|
||||
`PRINT_ARRAY2D(rsp_data, 4, NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
136
hw/rtl/tex_unit/VX_tex_sampler.v
Normal file
136
hw/rtl/tex_unit/VX_tex_sampler.v
Normal file
@@ -0,0 +1,136 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_sampler #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter REQ_INFO_WIDTH = 1,
|
||||
parameter NUM_REQS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
input wire req_valid,
|
||||
input wire [`NUM_THREADS-1:0] req_tmask,
|
||||
input wire [`TEX_FORMAT_BITS-1:0] req_format,
|
||||
input wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] req_blends,
|
||||
input wire [NUM_REQS-1:0][3:0][31:0] req_data,
|
||||
input wire [REQ_INFO_WIDTH-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
||||
// ouputs
|
||||
output wire rsp_valid,
|
||||
output wire [`NUM_THREADS-1:0] rsp_tmask,
|
||||
output wire [NUM_REQS-1:0][31:0] rsp_data,
|
||||
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
|
||||
input wire rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire valid_s0;
|
||||
wire [`NUM_THREADS-1:0] tmask_s0;
|
||||
wire [REQ_INFO_WIDTH-1:0] req_info_s0;
|
||||
wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh;
|
||||
wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0;
|
||||
wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v_s0;
|
||||
wire [NUM_REQS-1:0][31:0] texel_v;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
|
||||
wire [3:0][31:0] fmt_texels;
|
||||
|
||||
for (genvar j = 0; j < 4; j++) begin
|
||||
VX_tex_format #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_format (
|
||||
.format (req_format),
|
||||
.texel_in (req_data[i][j]),
|
||||
.texel_out (fmt_texels[j])
|
||||
);
|
||||
end
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_ul (
|
||||
.blend (req_blends[0][i]),
|
||||
.in1 (fmt_texels[0]),
|
||||
.in2 (fmt_texels[1]),
|
||||
.out (texel_ul[i])
|
||||
);
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_uh (
|
||||
.blend (req_blends[0][i]),
|
||||
.in1 (fmt_texels[2]),
|
||||
.in2 (fmt_texels[3]),
|
||||
.out (texel_uh[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * `BLEND_FRAC) + (2 * NUM_REQS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_tmask, req_info, req_blends[1], texel_ul, texel_uh}),
|
||||
.data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_v (
|
||||
.blend (blend_v_s0[i]),
|
||||
.in1 (texel_ul_s0[i]),
|
||||
.in2 (texel_uh_s0[i]),
|
||||
.out (texel_v[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_s0, tmask_s0, req_info_s0, texel_v}),
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign req_ready = ~stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
|
||||
wire [`NW_BITS-1:0] req_wid, rsp_wid;
|
||||
wire [31:0] req_PC, rsp_PC;
|
||||
|
||||
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (req_valid && req_ready) begin
|
||||
$write("%t: core%0d-tex-sampler-req: wid=%0d, PC=%0h, tmask=%b, format=%0d, data=",
|
||||
$time, CORE_ID, req_wid, req_PC, req_tmask, req_format);
|
||||
`PRINT_ARRAY2D(req_data, 4, NUM_REQS);
|
||||
$write(", u0=");
|
||||
`PRINT_ARRAY1D(req_blends[0], NUM_REQS);
|
||||
$write(", v0=");
|
||||
`PRINT_ARRAY1D(req_blends[1], NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
$write("%t: core%0d-tex-sampler-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
|
||||
`PRINT_ARRAY1D(rsp_data, NUM_REQS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
27
hw/rtl/tex_unit/VX_tex_stride.v
Normal file
27
hw/rtl/tex_unit/VX_tex_stride.v
Normal file
@@ -0,0 +1,27 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_stride #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_FORMAT_BITS-1:0] format,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] log_stride
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
|
||||
|
||||
always @(*) begin
|
||||
case (format)
|
||||
`TEX_FORMAT_A8: log_stride_r = 0;
|
||||
`TEX_FORMAT_L8: log_stride_r = 0;
|
||||
`TEX_FORMAT_L8A8: log_stride_r = 1;
|
||||
`TEX_FORMAT_R5G6B5: log_stride_r = 1;
|
||||
`TEX_FORMAT_R4G4B4A4: log_stride_r = 1;
|
||||
//`TEX_FORMAT_R8G8B8A8
|
||||
default: log_stride_r = 2;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign log_stride = log_stride_r;
|
||||
|
||||
endmodule
|
||||
226
hw/rtl/tex_unit/VX_tex_unit.v
Normal file
226
hw/rtl/tex_unit/VX_tex_unit.v
Normal file
@@ -0,0 +1,226 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Texture unit <-> Memory Unit
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
|
||||
// Inputs
|
||||
VX_tex_req_if tex_req_if,
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
);
|
||||
|
||||
localparam REQ_INFO_WIDTH_S = `NR_BITS + 1 + `NW_BITS + 32;
|
||||
localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + REQ_INFO_WIDTH_S;
|
||||
localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH_A;
|
||||
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_DIM_BITS-1:0] tex_dims [1:0][`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_WRAP_BITS-1:0] tex_wraps [1:0][`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
|
||||
|
||||
// CSRs programming
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : begin
|
||||
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_FORMAT(i) : begin
|
||||
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wraps[0][i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wraps[1][i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : begin
|
||||
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_MIPOFF(i) : begin
|
||||
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WIDTH(i) : begin
|
||||
tex_dims[0][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_HEIGHT(i) : begin
|
||||
tex_dims[1][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// mipmap attributes
|
||||
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
|
||||
wire [1:0][`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] sel_dims;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
|
||||
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
|
||||
assign sel_dims[0][i] = tex_dims[0][unit][mip_level];
|
||||
assign sel_dims[1][i] = tex_dims[1][unit][mip_level];
|
||||
end
|
||||
|
||||
// address generation
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [`NUM_THREADS-1:0] mem_req_tmask;
|
||||
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
|
||||
wire [`TEX_STRIDE_BITS-1:0] mem_req_stride;
|
||||
wire [1:0][`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blends;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
|
||||
wire [REQ_INFO_WIDTH_A-1:0] mem_req_info;
|
||||
wire mem_req_ready;
|
||||
|
||||
VX_tex_addr #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_A),
|
||||
.NUM_REQS (`NUM_THREADS)
|
||||
) tex_addr (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.req_valid (tex_req_if.valid),
|
||||
.req_tmask (tex_req_if.tmask),
|
||||
.req_coords (tex_req_if.coords),
|
||||
.req_format (tex_format[tex_req_if.unit]),
|
||||
.req_filter (tex_filter[tex_req_if.unit]),
|
||||
.req_wraps ({tex_wraps[1][tex_req_if.unit], tex_wraps[0][tex_req_if.unit]}),
|
||||
.req_baseaddr(tex_baddr[tex_req_if.unit]),
|
||||
.req_mipoffset(sel_mipoff),
|
||||
.req_logdims(sel_dims),
|
||||
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
|
||||
.req_ready (tex_req_if.ready),
|
||||
|
||||
.rsp_valid (mem_req_valid),
|
||||
.rsp_tmask (mem_req_tmask),
|
||||
.rsp_filter (mem_req_filter),
|
||||
.rsp_stride (mem_req_stride),
|
||||
.rsp_addr (mem_req_addr),
|
||||
.rsp_blends (mem_req_blends),
|
||||
.rsp_info (mem_req_info),
|
||||
.rsp_ready (mem_req_ready)
|
||||
);
|
||||
|
||||
// retrieve texel values from memory
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_tmask;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
|
||||
wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
VX_tex_memory #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_M),
|
||||
.NUM_REQS (`NUM_THREADS)
|
||||
) tex_memory (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// memory interface
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
// inputs
|
||||
.req_valid (mem_req_valid),
|
||||
.req_tmask (mem_req_tmask),
|
||||
.req_filter(mem_req_filter),
|
||||
.req_stride(mem_req_stride),
|
||||
.req_addr (mem_req_addr),
|
||||
.req_info ({mem_req_blends, mem_req_info}),
|
||||
.req_ready (mem_req_ready),
|
||||
|
||||
// outputs
|
||||
.rsp_valid (mem_rsp_valid),
|
||||
.rsp_tmask (mem_rsp_tmask),
|
||||
.rsp_data (mem_rsp_data),
|
||||
.rsp_info (mem_rsp_info),
|
||||
.rsp_ready (mem_rsp_ready)
|
||||
);
|
||||
|
||||
// apply sampler
|
||||
|
||||
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends;
|
||||
wire [`TEX_FORMAT_BITS-1:0] rsp_format;
|
||||
wire [REQ_INFO_WIDTH_S-1:0] rsp_info;
|
||||
|
||||
assign {rsp_blends, rsp_format, rsp_info} = mem_rsp_info;
|
||||
|
||||
VX_tex_sampler #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_S),
|
||||
.NUM_REQS (`NUM_THREADS)
|
||||
) tex_sampler (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// inputs
|
||||
.req_valid (mem_rsp_valid),
|
||||
.req_tmask (mem_rsp_tmask),
|
||||
.req_data (mem_rsp_data),
|
||||
.req_format (rsp_format),
|
||||
.req_blends (rsp_blends),
|
||||
.req_info (rsp_info),
|
||||
.req_ready (mem_rsp_ready),
|
||||
|
||||
// outputs
|
||||
.rsp_valid (tex_rsp_if.valid),
|
||||
.rsp_tmask (tex_rsp_if.tmask),
|
||||
.rsp_data (tex_rsp_if.data),
|
||||
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
|
||||
.rsp_ready (tex_rsp_if.ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable
|
||||
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
|
||||
$display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wraps[0][i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wraps[1][i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_dims[0][i][0]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_dims[1][i][0]);
|
||||
end
|
||||
end
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (tex_req_if.valid && tex_req_if.ready) begin
|
||||
$display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
|
||||
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
|
||||
`PRINT_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
|
||||
$write(", v=");
|
||||
`PRINT_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
|
||||
$write("%t: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
|
||||
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
|
||||
`PRINT_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
32
hw/rtl/tex_unit/VX_tex_wrap.v
Normal file
32
hw/rtl/tex_unit/VX_tex_wrap.v
Normal file
@@ -0,0 +1,32 @@
|
||||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_wrap #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
|
||||
input wire [31:0] coord_i,
|
||||
output wire [`FIXED_FRAC-1:0] coord_o
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`FIXED_FRAC-1:0] coord_r;
|
||||
|
||||
wire [31:0] clamp = `CLAMP(coord_i, 0, `FIXED_MASK);
|
||||
|
||||
`UNUSED_VAR (clamp)
|
||||
|
||||
always @(*) begin
|
||||
case (wrap_i)
|
||||
`TEX_WRAP_CLAMP:
|
||||
coord_r = clamp[`FIXED_FRAC-1:0];
|
||||
`TEX_WRAP_MIRROR:
|
||||
coord_r = coord_i[`FIXED_FRAC-1:0] ^ {`FIXED_FRAC{coord_i[`FIXED_FRAC]}};
|
||||
default: //`TEX_WRAP_REPEAT
|
||||
coord_r = coord_i[`FIXED_FRAC-1:0];
|
||||
endcase
|
||||
end
|
||||
|
||||
assign coord_o = coord_r;
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user