rebase master update

This commit is contained in:
Blaise Tine
2021-07-30 21:03:14 -07:00
parent 79fd92a1b4
commit bb1ceffadd
86 changed files with 6111 additions and 132 deletions

View File

@@ -0,0 +1,151 @@
`include "VX_tex_define.vh"
module VX_tex_addr #(
parameter CORE_ID = 0,
parameter REQ_INFO_WIDTH = 1,
parameter NUM_REQS = 1
) (
input wire clk,
input wire reset,
// inputs
input wire req_valid,
input wire [NUM_REQS-1:0] req_tmask,
input wire [1:0][NUM_REQS-1:0][31:0] req_coords,
input wire [`TEX_FORMAT_BITS-1:0] req_format,
input wire [`TEX_FILTER_BITS-1:0] req_filter,
input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps,
input wire [`TEX_ADDR_BITS-1:0] req_baseaddr,
input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoffset,
input wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] req_logdims,
input wire [REQ_INFO_WIDTH-1:0] req_info,
output wire req_ready,
// outputs
output wire rsp_valid,
output wire [NUM_REQS-1:0] rsp_tmask,
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
output wire [`TEX_STRIDE_BITS-1:0] rsp_stride,
output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr,
output wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] rsp_blends,
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
input wire rsp_ready
);
`UNUSED_PARAM (CORE_ID)
wire valid_s0;
wire [NUM_REQS-1:0] tmask_s0;
wire [`TEX_FILTER_BITS-1:0] filter_s0;
wire [REQ_INFO_WIDTH-1:0] req_info_s0;
wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0;
wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
wire stall_out;
// stride
VX_tex_stride #(
.CORE_ID (CORE_ID)
) tex_stride (
.format (req_format),
.log_stride (log_stride)
);
// addressing mode
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 2; ++j) begin
wire [31:0] coord_lo, coord_hi;
assign coord_lo = req_coords[j][i] - (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0);
assign coord_hi = req_coords[j][i] + (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_lo (
.wrap_i (req_wraps[j]),
.coord_i (coord_lo),
.coord_o (clamped_lo[j][i])
);
VX_tex_wrap #(
.CORE_ID (CORE_ID)
) tex_wrap_hi (
.wrap_i (req_wraps[j]),
.coord_i (coord_hi),
.coord_o (clamped_hi[j][i])
);
end
assign mip_addr[i] = req_baseaddr + 32'(req_mipoffset[i]);
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}),
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
);
// addresses generation
wire [1:0][NUM_REQS-1:0][`FIXED_INT-1:0] scaled_lo, scaled_hi;
wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] blends;
wire [NUM_REQS-1:0][3:0][31:0] addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 2; ++j) begin
assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i]));
assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i]));
assign blends[j][i] = filter_s0 ? clamped_lo_s0[j][i][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0;
end
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFO_WIDTH),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}),
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info})
);
assign req_ready = ~stall_out;
`ifdef DBG_PRINT_TEX
wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_PC;
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
always @(posedge clk) begin
if (rsp_valid && rsp_ready) begin
$write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride);
`PRINT_ARRAY2D(rsp_addr, 4, NUM_REQS);
$write("\n");
end
end
`endif
endmodule

View File

@@ -0,0 +1,42 @@
`ifndef VX_TEX_DEFINE
`define VX_TEX_DEFINE
`include "VX_define.vh"
`define FIXED_FRAC 20
`define FIXED_INT (32 - `FIXED_FRAC)
`define FIXED_ONE (2 ** `FIXED_FRAC)
`define FIXED_HALF (`FIXED_ONE >> 1)
`define FIXED_MASK (`FIXED_ONE - 1)
`define CLAMP(x,lo,hi) (($signed(x) < $signed(lo)) ? lo : ((x > hi) ? hi : x))
`define TEX_ADDR_BITS 32
`define TEX_FORMAT_BITS 3
`define TEX_WRAP_BITS 2
`define TEX_DIM_BITS 4
`define TEX_FILTER_BITS 1
`define TEX_MIPOFF_BITS (2*12+1)
`define TEX_STRIDE_BITS 2
`define TEX_LOD_BITS 4
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
`define TEX_WRAP_CLAMP 0
`define TEX_WRAP_REPEAT 1
`define TEX_WRAP_MIRROR 2
`define TEX_COLOR_BITS 8
`define BLEND_FRAC 8
`define BLEND_ONE (2 ** `BLEND_FRAC)
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
`define TEX_FORMAT_R4G4B4A4 `TEX_FORMAT_BITS'(2)
`define TEX_FORMAT_L8A8 `TEX_FORMAT_BITS'(3)
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(4)
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(5)
`endif

View File

@@ -0,0 +1,58 @@
`include "VX_tex_define.vh"
module VX_tex_format #(
parameter CORE_ID = 0
) (
input wire [`TEX_FORMAT_BITS-1:0] format,
input wire [31:0] texel_in,
output wire [31:0] texel_out
);
`UNUSED_PARAM (CORE_ID)
reg [31:0] texel_out_r;
always @(*) begin
case (format)
`TEX_FORMAT_R5G6B5: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[15:11],texel_in[15:13]});
texel_out_r[15:08] = `TEX_COLOR_BITS'({texel_in[10:5],texel_in[10:9]});
texel_out_r[23:16] = `TEX_COLOR_BITS'({texel_in[4:0],texel_in[4:2]});
texel_out_r[31:24] = {`TEX_COLOR_BITS{1'b1}};
end
`TEX_FORMAT_R4G4B4A4: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'({texel_in[11:8],texel_in[15:12]});
texel_out_r[15:08] = `TEX_COLOR_BITS'({2{texel_in[7:4]}});
texel_out_r[23:16] = `TEX_COLOR_BITS'({2{texel_in[3:0]}});
texel_out_r[31:24] = `TEX_COLOR_BITS'({2{texel_in[15:12]}});
end
`TEX_FORMAT_L8A8: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[15:8]);
end
`TEX_FORMAT_A8: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'(0);
texel_out_r[15:08] = `TEX_COLOR_BITS'(0);
texel_out_r[23:16] = `TEX_COLOR_BITS'(0);
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[7:0]);
end
`TEX_FORMAT_L8: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[31:24] = {`TEX_COLOR_BITS{1'b1}};
end
// `TEX_FORMAT_R8G8B8A8
default: begin
texel_out_r[07:00] = `TEX_COLOR_BITS'(texel_in[7:0]);
texel_out_r[15:08] = `TEX_COLOR_BITS'(texel_in[15:8]);
texel_out_r[23:16] = `TEX_COLOR_BITS'(texel_in[23:16]);
texel_out_r[31:24] = `TEX_COLOR_BITS'(texel_in[31:24]);
end
endcase
end
assign texel_out = texel_out_r;
endmodule

View File

@@ -0,0 +1,17 @@
`include "VX_tex_define.vh"
module VX_tex_lerp #(
) (
input wire [`BLEND_FRAC-1:0] blend,
input wire [31:0] in1,
input wire [31:0] in2,
output wire [31:0] out
);
for (genvar i = 0; i < 4; ++i) begin
wire [8:0] blend_m1 = `BLEND_ONE - blend;
wire [16:0] sum = in1[i*8+:8] * blend_m1 + in2[i*8+:8] * blend;
`UNUSED_VAR (sum)
assign out[i*8+:8] = sum[15:8];
end
endmodule

View File

@@ -0,0 +1,128 @@
`include "../cache/VX_cache_define.vh"
module VX_tex_lsu_arb #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter WORD_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
// output request
output wire [LANES-1:0] req_valid_out,
output wire [LANES-1:0] req_rw_out,
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
input wire [LANES-1:0] req_ready_out,
// input response
input wire [LANES-1:0] rsp_valid_in,
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
wire [NUM_REQS-1:0] req_valid_in_any;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
assign req_valid_in_any[i] = (| req_valid_in[i]);
end
wire sel_valid;
wire [LOG_NUM_REQS-1:0] sel_idx;
wire [NUM_REQS-1:0] sel_1hot;
wire sel_enable = (| req_ready_out);
VX_rr_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (req_valid_in_any),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
wire [LANES-1:0] req_valid_out_unqual;
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
for (genvar i = 0; i < LANES; i++) begin
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
end
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
reg [NUM_REQS-1:0][LANES-1:0] rsp_valid_out_unqual;
always @(*) begin
rsp_valid_out_unqual = '0;
rsp_valid_out_unqual[rsp_sel] = rsp_valid_in;
end
assign rsp_valid_out = rsp_valid_out_unqual;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_data_out[i] = rsp_data_in;
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
end
assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

View File

@@ -0,0 +1,288 @@
`include "VX_tex_define.vh"
module VX_tex_memory #(
parameter CORE_ID = 0,
parameter REQ_INFO_WIDTH = 1,
parameter NUM_REQS = 1
) (
input wire clk,
input wire reset,
// memory interface
VX_dcache_req_if dcache_req_if,
VX_dcache_rsp_if dcache_rsp_if,
// inputs
input wire req_valid,
input wire [NUM_REQS-1:0] req_tmask,
input wire [`TEX_FILTER_BITS-1:0] req_filter,
input wire [`TEX_STRIDE_BITS-1:0] req_stride,
input wire [NUM_REQS-1:0][3:0][31:0] req_addr,
input wire [REQ_INFO_WIDTH-1:0] req_info,
output wire req_ready,
// outputs
output wire rsp_valid,
output wire [NUM_REQS-1:0] rsp_tmask,
output wire [NUM_REQS-1:0][3:0][31:0] rsp_data,
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
input wire rsp_ready
);
`UNUSED_PARAM (CORE_ID)
localparam RSP_CTR_W = $clog2(NUM_REQS * 4 + 1);
wire [3:0] dup_reqs;
wire [3:0][NUM_REQS-1:0][29:0] req_addr_w;
wire [3:0][NUM_REQS-1:0][1:0] align_offs;
// reorder address into quads
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
assign req_addr_w[j][i] = req_addr[i][j][31:2];
assign align_offs[j][i] = req_addr[i][j][1:0];
end
end
// find duplicate addresses
for (genvar i = 0; i < 4; ++i) begin
wire [NUM_REQS-1:0] addr_matches;
for (genvar j = 0; j < NUM_REQS; j++) begin
assign addr_matches[j] = (req_addr_w[i][0] == req_addr_w[i][j]) || ~req_tmask[j];
end
assign dup_reqs[i] = req_tmask[0] && (& addr_matches);
end
// save request addresses into fifo
wire reqq_push, reqq_pop, reqq_empty, reqq_full;
wire [3:0][NUM_REQS-1:0][29:0] q_req_addr;
wire [NUM_REQS-1:0] q_req_tmask;
wire [`TEX_FILTER_BITS-1:0] q_req_filter;
wire [REQ_INFO_WIDTH-1:0] q_req_info;
wire [`TEX_STRIDE_BITS-1:0] q_req_stride;
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
wire [3:0] q_dup_reqs;
assign reqq_push = req_valid && req_ready;
VX_fifo_queue #(
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFO_WIDTH + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
.SIZE (`LSUQ_SIZE),
.OUTPUT_REG (1)
) req_queue (
.clk (clk),
.reset (reset),
.push (reqq_push),
.pop (reqq_pop),
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_stride, align_offs, dup_reqs}),
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}),
.empty (reqq_empty),
.full (reqq_full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
// can take more requests?
assign req_ready = ~reqq_full;
///////////////////////////////////////////////////////////////////////////
wire req_texel_valid;
wire sent_all_ready, last_texel_sent;
wire req_texel_dup;
wire [NUM_REQS-1:0][29:0] req_texel_addr;
reg [1:0] req_texel_idx;
reg req_texels_done;
always @(posedge clk) begin
if (reset || last_texel_sent) begin
req_texel_idx <= 0;
end else if (req_texel_valid && sent_all_ready) begin
req_texel_idx <= req_texel_idx + 1;
end
end
always @(posedge clk) begin
if (reset || reqq_pop) begin
req_texels_done <= 0;
end else if (last_texel_sent) begin
req_texels_done <= 1;
end
end
assign req_texel_valid = ~reqq_empty && ~req_texels_done;
assign req_texel_addr = q_req_addr[req_texel_idx];
assign req_texel_dup = q_dup_reqs[req_texel_idx];
wire is_last_texel = (req_texel_idx == (q_req_filter ? 3 : 0));
assign last_texel_sent = req_texel_valid && sent_all_ready && is_last_texel;
// DCache Request
reg [NUM_REQS-1:0] texel_sent_mask;
wire [NUM_REQS-1:0] dcache_req_fire;
wire [NUM_REQS-1:0] req_dup_mask;
assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask))
|| (req_texel_dup & dcache_req_if.ready[0]);
always @(posedge clk) begin
if (reset || sent_all_ready) begin
texel_sent_mask <= 0;
end else begin
texel_sent_mask <= texel_sent_mask | dcache_req_fire;
end
end
assign req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1};
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
assign dcache_req_if.addr = req_texel_addr;
assign dcache_req_if.byteen = {NUM_REQS{4'b1111}};
assign dcache_req_if.data = 'x;
`ifdef DBG_CACHE_REQ_INFO
wire [`NW_BITS-1:0] q_req_wid;
wire [31:0] q_req_PC;
assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0];
assign dcache_req_if.tag = {NUM_REQS{q_req_PC, q_req_wid, req_texel_idx}};
`else
assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}};
`endif
// Dcache Response
reg [3:0][NUM_REQS-1:0][31:0] rsp_texels, rsp_texels_n;
wire [NUM_REQS-1:0][3:0][31:0] rsp_texels_qual;
reg [NUM_REQS-1:0][31:0] rsp_data_qual;
reg [RSP_CTR_W-1:0] rsp_rem_ctr;
wire [NUM_REQS-1:0] rsp_cur_tmask;
wire [$clog2(NUM_REQS + 1)-1:0] rsp_cur_cnt;
wire dcache_rsp_fire;
wire [1:0] rsp_texel_idx;
wire rsp_texel_dup;
assign rsp_texel_idx = dcache_rsp_if.tag[1:0];
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
assign dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
assign rsp_cur_tmask = rsp_texel_dup ? q_req_tmask : dcache_rsp_if.tmask;
assign rsp_cur_cnt = $countones(rsp_cur_tmask);
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [31:0] src_mask = {32{dcache_rsp_if.tmask[i]}};
wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : (dcache_rsp_if.data[i]) & src_mask);
reg [31:0] rsp_data_shifted;
always @(*) begin
rsp_data_shifted[31:16] = src_data[31:16];
rsp_data_shifted[15:0] = q_align_offs[rsp_texel_idx][i][1] ? src_data[31:16] : src_data[15:0];
rsp_data_shifted[7:0] = q_align_offs[rsp_texel_idx][i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
end
always @(*) begin
case (q_req_stride)
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
default: rsp_data_qual[i] = rsp_data_shifted;
endcase
end
end
always @(*) begin
rsp_texels_n = rsp_texels;
rsp_texels_n[rsp_texel_idx] |= rsp_data_qual;
end
always @(posedge clk) begin
if (reset || reqq_pop) begin
rsp_texels <= '0;
end else if (dcache_rsp_fire) begin
rsp_texels <= rsp_texels_n;
end
end
always @(posedge clk) begin
if (reset) begin
rsp_rem_ctr <= 0;
end else begin
if ((| dcache_req_fire) && 0 == rsp_rem_ctr) begin
rsp_rem_ctr <= q_req_filter ? {$countones(q_req_tmask), 2'b0} : {2'b0, $countones(q_req_tmask)};
end else if (dcache_rsp_fire) begin
rsp_rem_ctr <= rsp_rem_ctr - RSP_CTR_W'(rsp_cur_cnt);
end
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
assign rsp_texels_qual[i][j] = rsp_texels_n[j][i];
end
end
wire stall_out = rsp_valid && ~rsp_ready;
wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt));
assign reqq_pop = rsp_texels_done && ~stall_out;
VX_pipe_register #(
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (4 * NUM_REQS * 32)),
.RESETW (1)
) rsp_pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({rsp_texels_done, q_req_tmask, q_req_info, rsp_texels_qual}),
.data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data})
);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~stall_out || (rsp_rem_ctr != RSP_CTR_W'(rsp_cur_cnt));
`ifdef DBG_PRINT_TEX
wire [`NW_BITS-1:0] req_wid, rsp_wid;
wire [31:0] req_PC, rsp_PC;
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
always @(posedge clk) begin
if ((| dcache_req_fire)) begin
$write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag);
`PRINT_ARRAY1D(req_texel_addr, NUM_REQS);
$write(", is_dup=%b\n", req_texel_dup);
end
if (dcache_rsp_fire) begin
$write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag);
`PRINT_ARRAY1D(rsp_data_qual, NUM_REQS);
$write("\n");
end
if (req_valid && req_ready) begin
$write("%t: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=",
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride);
`PRINT_ARRAY2D(req_addr, 4, NUM_REQS);
$write("\n");
end
if (rsp_valid && rsp_ready) begin
$write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
`PRINT_ARRAY2D(rsp_data, 4, NUM_REQS);
$write("\n");
end
end
`endif
endmodule

View File

@@ -0,0 +1,136 @@
`include "VX_tex_define.vh"
module VX_tex_sampler #(
parameter CORE_ID = 0,
parameter REQ_INFO_WIDTH = 1,
parameter NUM_REQS = 1
) (
input wire clk,
input wire reset,
// inputs
input wire req_valid,
input wire [`NUM_THREADS-1:0] req_tmask,
input wire [`TEX_FORMAT_BITS-1:0] req_format,
input wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] req_blends,
input wire [NUM_REQS-1:0][3:0][31:0] req_data,
input wire [REQ_INFO_WIDTH-1:0] req_info,
output wire req_ready,
// ouputs
output wire rsp_valid,
output wire [`NUM_THREADS-1:0] rsp_tmask,
output wire [NUM_REQS-1:0][31:0] rsp_data,
output wire [REQ_INFO_WIDTH-1:0] rsp_info,
input wire rsp_ready
);
`UNUSED_PARAM (CORE_ID)
wire valid_s0;
wire [`NUM_THREADS-1:0] tmask_s0;
wire [REQ_INFO_WIDTH-1:0] req_info_s0;
wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh;
wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0;
wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v_s0;
wire [NUM_REQS-1:0][31:0] texel_v;
wire stall_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [3:0][31:0] fmt_texels;
for (genvar j = 0; j < 4; j++) begin
VX_tex_format #(
.CORE_ID (CORE_ID)
) tex_format (
.format (req_format),
.texel_in (req_data[i][j]),
.texel_out (fmt_texels[j])
);
end
VX_tex_lerp #(
) tex_lerp_ul (
.blend (req_blends[0][i]),
.in1 (fmt_texels[0]),
.in2 (fmt_texels[1]),
.out (texel_ul[i])
);
VX_tex_lerp #(
) tex_lerp_uh (
.blend (req_blends[0][i]),
.in1 (fmt_texels[2]),
.in2 (fmt_texels[3]),
.out (texel_uh[i])
);
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * `BLEND_FRAC) + (2 * NUM_REQS * 32)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({req_valid, req_tmask, req_info, req_blends[1], texel_ul, texel_uh}),
.data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0})
);
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_tex_lerp #(
) tex_lerp_v (
.blend (blend_v_s0[i]),
.in1 (texel_ul_s0[i]),
.in2 (texel_uh_s0[i]),
.out (texel_v[i])
);
end
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * 32)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_s0, tmask_s0, req_info_s0, texel_v}),
.data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data})
);
// can accept new request?
assign req_ready = ~stall_out;
`ifdef DBG_PRINT_TEX
wire [`NW_BITS-1:0] req_wid, rsp_wid;
wire [31:0] req_PC, rsp_PC;
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
always @(posedge clk) begin
if (req_valid && req_ready) begin
$write("%t: core%0d-tex-sampler-req: wid=%0d, PC=%0h, tmask=%b, format=%0d, data=",
$time, CORE_ID, req_wid, req_PC, req_tmask, req_format);
`PRINT_ARRAY2D(req_data, 4, NUM_REQS);
$write(", u0=");
`PRINT_ARRAY1D(req_blends[0], NUM_REQS);
$write(", v0=");
`PRINT_ARRAY1D(req_blends[1], NUM_REQS);
$write("\n");
end
if (rsp_valid && rsp_ready) begin
$write("%t: core%0d-tex-sampler-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
`PRINT_ARRAY1D(rsp_data, NUM_REQS);
$write("\n");
end
end
`endif
endmodule

View File

@@ -0,0 +1,27 @@
`include "VX_tex_define.vh"
module VX_tex_stride #(
parameter CORE_ID = 0
) (
input wire [`TEX_FORMAT_BITS-1:0] format,
output wire [`TEX_STRIDE_BITS-1:0] log_stride
);
`UNUSED_PARAM (CORE_ID)
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
always @(*) begin
case (format)
`TEX_FORMAT_A8: log_stride_r = 0;
`TEX_FORMAT_L8: log_stride_r = 0;
`TEX_FORMAT_L8A8: log_stride_r = 1;
`TEX_FORMAT_R5G6B5: log_stride_r = 1;
`TEX_FORMAT_R4G4B4A4: log_stride_r = 1;
//`TEX_FORMAT_R8G8B8A8
default: log_stride_r = 2;
endcase
end
assign log_stride = log_stride_r;
endmodule

View File

@@ -0,0 +1,226 @@
`include "VX_tex_define.vh"
module VX_tex_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Texture unit <-> Memory Unit
VX_dcache_req_if dcache_req_if,
VX_dcache_rsp_if dcache_rsp_if,
// Inputs
VX_tex_req_if tex_req_if,
VX_tex_csr_if tex_csr_if,
// Outputs
VX_tex_rsp_if tex_rsp_if
);
localparam REQ_INFO_WIDTH_S = `NR_BITS + 1 + `NW_BITS + 32;
localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + REQ_INFO_WIDTH_S;
localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH_A;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_DIM_BITS-1:0] tex_dims [1:0][`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
reg [`TEX_WRAP_BITS-1:0] tex_wraps [1:0][`NUM_TEX_UNITS-1:0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
// CSRs programming
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : begin
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
end
`CSR_TEX_FORMAT(i) : begin
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
end
`CSR_TEX_WRAP(i) : begin
tex_wraps[0][i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wraps[1][i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
end
`CSR_TEX_FILTER(i) : begin
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_MIPOFF(i) : begin
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
`CSR_TEX_WIDTH(i) : begin
tex_dims[0][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
end
`CSR_TEX_HEIGHT(i) : begin
tex_dims[1][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
end
endcase
end
end
end
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
wire [1:0][`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] sel_dims;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
assign sel_dims[0][i] = tex_dims[0][unit][mip_level];
assign sel_dims[1][i] = tex_dims[1][unit][mip_level];
end
// address generation
wire mem_req_valid;
wire [`NUM_THREADS-1:0] mem_req_tmask;
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
wire [`TEX_STRIDE_BITS-1:0] mem_req_stride;
wire [1:0][`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blends;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
wire [REQ_INFO_WIDTH_A-1:0] mem_req_info;
wire mem_req_ready;
VX_tex_addr #(
.CORE_ID (CORE_ID),
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_A),
.NUM_REQS (`NUM_THREADS)
) tex_addr (
.clk (clk),
.reset (reset),
.req_valid (tex_req_if.valid),
.req_tmask (tex_req_if.tmask),
.req_coords (tex_req_if.coords),
.req_format (tex_format[tex_req_if.unit]),
.req_filter (tex_filter[tex_req_if.unit]),
.req_wraps ({tex_wraps[1][tex_req_if.unit], tex_wraps[0][tex_req_if.unit]}),
.req_baseaddr(tex_baddr[tex_req_if.unit]),
.req_mipoffset(sel_mipoff),
.req_logdims(sel_dims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
.rsp_tmask (mem_req_tmask),
.rsp_filter (mem_req_filter),
.rsp_stride (mem_req_stride),
.rsp_addr (mem_req_addr),
.rsp_blends (mem_req_blends),
.rsp_info (mem_req_info),
.rsp_ready (mem_req_ready)
);
// retrieve texel values from memory
wire mem_rsp_valid;
wire [`NUM_THREADS-1:0] mem_rsp_tmask;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info;
wire mem_rsp_ready;
VX_tex_memory #(
.CORE_ID (CORE_ID),
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_M),
.NUM_REQS (`NUM_THREADS)
) tex_memory (
.clk (clk),
.reset (reset),
// memory interface
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
// inputs
.req_valid (mem_req_valid),
.req_tmask (mem_req_tmask),
.req_filter(mem_req_filter),
.req_stride(mem_req_stride),
.req_addr (mem_req_addr),
.req_info ({mem_req_blends, mem_req_info}),
.req_ready (mem_req_ready),
// outputs
.rsp_valid (mem_rsp_valid),
.rsp_tmask (mem_rsp_tmask),
.rsp_data (mem_rsp_data),
.rsp_info (mem_rsp_info),
.rsp_ready (mem_rsp_ready)
);
// apply sampler
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends;
wire [`TEX_FORMAT_BITS-1:0] rsp_format;
wire [REQ_INFO_WIDTH_S-1:0] rsp_info;
assign {rsp_blends, rsp_format, rsp_info} = mem_rsp_info;
VX_tex_sampler #(
.CORE_ID (CORE_ID),
.REQ_INFO_WIDTH (REQ_INFO_WIDTH_S),
.NUM_REQS (`NUM_THREADS)
) tex_sampler (
.clk (clk),
.reset (reset),
// inputs
.req_valid (mem_rsp_valid),
.req_tmask (mem_rsp_tmask),
.req_data (mem_rsp_data),
.req_format (rsp_format),
.req_blends (rsp_blends),
.req_info (rsp_info),
.req_ready (mem_rsp_ready),
// outputs
.rsp_valid (tex_rsp_if.valid),
.rsp_tmask (tex_rsp_if.tmask),
.rsp_data (tex_rsp_if.data),
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
.rsp_ready (tex_rsp_if.ready)
);
`ifdef DBG_PRINT_TEX
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
always @(posedge clk) begin
if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
$display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wraps[0][i]);
$display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wraps[1][i]);
$display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
$display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_dims[0][i][0]);
$display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_dims[1][i][0]);
end
end
end
always @(posedge clk) begin
if (tex_req_if.valid && tex_req_if.ready) begin
$display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
`PRINT_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
$write(", v=");
`PRINT_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
$write("\n");
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
$write("%t: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`PRINT_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
$write("\n");
end
end
`endif
endmodule

View File

@@ -0,0 +1,32 @@
`include "VX_tex_define.vh"
module VX_tex_wrap #(
parameter CORE_ID = 0
) (
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
input wire [31:0] coord_i,
output wire [`FIXED_FRAC-1:0] coord_o
);
`UNUSED_PARAM (CORE_ID)
reg [`FIXED_FRAC-1:0] coord_r;
wire [31:0] clamp = `CLAMP(coord_i, 0, `FIXED_MASK);
`UNUSED_VAR (clamp)
always @(*) begin
case (wrap_i)
`TEX_WRAP_CLAMP:
coord_r = clamp[`FIXED_FRAC-1:0];
`TEX_WRAP_MIRROR:
coord_r = coord_i[`FIXED_FRAC-1:0] ^ {`FIXED_FRAC{coord_i[`FIXED_FRAC]}};
default: //`TEX_WRAP_REPEAT
coord_r = coord_i[`FIXED_FRAC-1:0];
endcase
end
assign coord_o = coord_r;
endmodule