diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 2c6c1093..b48e11d1 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -288,11 +288,11 @@ // Core request tag bits `ifdef EXT_TEX_ENABLE -`define LSU_DACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSUQ_ADDR_BITS) -`define TEX_DACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + 2) -`define LSU_TEX_DACHE_TAG_BITS `MAX(`LSU_DACHE_TAG_BITS, `TEX_DACHE_TAG_BITS) +`define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSUQ_ADDR_BITS) +`define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + 2) +`define LSU_TEX_DCACHE_TAG_BITS `MAX(`LSU_DCACHE_TAG_BITS, `TEX_DCACHE_TAG_BITS) `define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + 1) -`define DCORE_TAG_WIDTH (`LSU_TEX_DACHE_TAG_BITS + 1) +`define DCORE_TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS + 1) `else `define DCORE_TAG_ID_BITS `LSUQ_ADDR_BITS `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `LSUQ_ADDR_BITS) diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 860d3775..09304144 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -51,31 +51,31 @@ module VX_execute #( VX_dcache_core_req_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_DACHE_TAG_BITS) + .CORE_TAG_WIDTH(`LSU_DCACHE_TAG_BITS) ) lsu_dcache_req_if(); VX_dcache_core_rsp_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_DACHE_TAG_BITS) + .CORE_TAG_WIDTH(`LSU_DCACHE_TAG_BITS) ) lsu_dcache_rsp_if(); VX_dcache_core_req_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`TEX_DACHE_TAG_BITS) + .CORE_TAG_WIDTH(`TEX_DCACHE_TAG_BITS) ) tex_dcache_req_if(); VX_dcache_core_rsp_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`TEX_DACHE_TAG_BITS) + .CORE_TAG_WIDTH(`TEX_DCACHE_TAG_BITS) ) tex_dcache_rsp_if(); VX_tex_csr_if tex_csr_if(); - wire [`NUM_THREADS-1:0][`LSU_TEX_DACHE_TAG_BITS-1:0] tex_tag_in; - wire [`LSU_TEX_DACHE_TAG_BITS-1:0] tex_tag_out; + wire [`NUM_THREADS-1:0][`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_in; + wire [`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_out; for (genvar i = 0; i < `NUM_THREADS; ++i) begin assign tex_tag_in[i][`LSUQ_ADDR_BITS-1:0] = `LSUQ_ADDR_BITS'(tex_dcache_req_if.tag[i][1:0]); @@ -93,7 +93,7 @@ module VX_execute #( .NUM_REQS (2), .LANES (`NUM_THREADS), .WORD_SIZE (4), - .TAG_IN_WIDTH (`LSU_TEX_DACHE_TAG_BITS), + .TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS), .TAG_OUT_WIDTH (`DCORE_TAG_WIDTH) ) tex_lsu_arb ( .clk (clk), diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index f1d39ebc..0817f447 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -115,8 +115,8 @@ module VX_gpu_unit #( assign tex_req_if.wb = gpu_req_if.wb; assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0]; - assign tex_req_if.u = gpu_req_if.rs1_data; - assign tex_req_if.v = gpu_req_if.rs2_data; + assign tex_req_if.coords[0] = gpu_req_if.rs1_data; + assign tex_req_if.coords[1] = gpu_req_if.rs2_data; assign tex_req_if.lod = gpu_req_if.rs3_data; VX_tex_unit #( diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 25f28ac8..f2c1fac0 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -275,7 +275,7 @@ module VX_lsu_unit #( `ifdef DBG_PRINT_CORE_DCACHE `IGNORE_WARNINGS_BEGIN - reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH:0] pending_reqs; + reg [`LSUQ_SIZE-1:0][`LSU_DCACHE_TAG_BITS:0] pending_reqs; `IGNORE_WARNINGS_END always @(posedge clk) begin @@ -312,7 +312,7 @@ module VX_lsu_unit #( $write("%t: *** D$%0d queue-full:", $time, CORE_ID); for (integer j = 0; j < `LSUQ_SIZE; j++) begin if (pending_reqs[j][0]) begin - $write(" %0d->%0h", j, pending_reqs[j][1 +: `DCORE_TAG_WIDTH]); + $write(" %0d->%0h", j, pending_reqs[j][1 +: `LSU_DCACHE_TAG_BITS]); end end $write("\n"); diff --git a/hw/rtl/fp_cores/VX_fpu_fpnew.v b/hw/rtl/fp_cores/VX_fpu_fpnew.v index 3a8a8106..450a8594 100644 --- a/hw/rtl/fp_cores/VX_fpu_fpnew.v +++ b/hw/rtl/fp_cores/VX_fpu_fpnew.v @@ -3,8 +3,7 @@ `include "defs_div_sqrt_mvp.sv" `TRACING_OFF -module VX_fpu_fpnew -#( +module VX_fpu_fpnew #( parameter TAGW = 1, parameter FMULADD = 1, parameter FDIVSQRT = 1, diff --git a/hw/rtl/interfaces/VX_tex_req_if.v b/hw/rtl/interfaces/VX_tex_req_if.v index d2430404..e00a2e0e 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.v +++ b/hw/rtl/interfaces/VX_tex_req_if.v @@ -13,8 +13,7 @@ interface VX_tex_req_if (); wire wb; wire [`NTEX_BITS-1:0] unit; - wire [`NUM_THREADS-1:0][31:0] u; - wire [`NUM_THREADS-1:0][31:0] v; + wire [1:0][`NUM_THREADS-1:0][31:0] coords; wire [`NUM_THREADS-1:0][31:0] lod; wire ready; diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index af96e0f9..149af193 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -1,67 +1,51 @@ `include "VX_tex_define.vh" module VX_tex_addr #( - parameter CORE_ID = 0, - parameter REQ_INFO_WIDTH = 1 + parameter CORE_ID = 0, + parameter REQ_INFO_WIDTH = 1, + parameter NUM_REQS = 1 ) ( input wire clk, input wire reset, - // handshake - - input wire valid_in, - output wire ready_in, - // inputs - input wire [`NW_BITS-1:0] req_wid, - input wire [`NUM_THREADS-1:0] req_tmask, - input wire [31:0] req_PC, - input wire [REQ_INFO_WIDTH-1:0] req_info, - - input wire [`TEX_FORMAT_BITS-1:0] format, - input wire [`TEX_FILTER_BITS-1:0] filter, - input wire [`TEX_WRAP_BITS-1:0] wrap_u, - input wire [`TEX_WRAP_BITS-1:0] wrap_v, - - input wire [`TEX_ADDR_BITS-1:0] base_addr, - - input wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] mip_offsets, - input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths, - input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights, - - input wire [`NUM_THREADS-1:0][31:0] coord_u, - input wire [`NUM_THREADS-1:0][31:0] coord_v, + input wire req_valid, + input wire [NUM_REQS-1:0] req_tmask, + input wire [1:0][NUM_REQS-1:0][31:0] req_coords, + input wire [`TEX_FORMAT_BITS-1:0] req_format, + input wire [`TEX_FILTER_BITS-1:0] req_filter, + input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps, + input wire [`TEX_ADDR_BITS-1:0] req_baseaddr, + input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoffset, + input wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] req_logdims, + input wire [REQ_INFO_WIDTH-1:0] req_info, + output wire req_ready, // outputs - output wire rsp_valid, - output wire [`NW_BITS-1:0] rsp_wid, - output wire [`NUM_THREADS-1:0] rsp_tmask, - output wire [31:0] rsp_PC, - output wire [`TEX_FILTER_BITS-1:0] rsp_filter, - output wire [`TEX_STRIDE_BITS-1:0] rsp_stride, - output wire [`NUM_THREADS-1:0][3:0][31:0] rsp_addr, - output wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_u, - output wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_v, - output wire [REQ_INFO_WIDTH-1:0] rsp_info, - input wire rsp_ready + output wire rsp_valid, + output wire [NUM_REQS-1:0] rsp_tmask, + output wire [`TEX_FILTER_BITS-1:0] rsp_filter, + output wire [`TEX_STRIDE_BITS-1:0] rsp_stride, + output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr, + output wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] rsp_blends, + output wire [REQ_INFO_WIDTH-1:0] rsp_info, + input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) - wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u, clamped_v, clamped_u_s0, clamped_v_s0; - wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; - wire [`NUM_THREADS-1:0][31:0] mip_addr, mip_addr_s0; - - wire valid_in_s0; - wire [`NW_BITS-1:0] req_wid_s0; - wire [`NUM_THREADS-1:0] req_tmask_s0; - wire [31:0] req_PC_s0; - wire [REQ_INFO_WIDTH-1:0] req_info_s0; + wire valid_s0; + wire [NUM_REQS-1:0] tmask_s0; wire [`TEX_FILTER_BITS-1:0] filter_s0; - wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths_s0; - wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights_s0; + wire [REQ_INFO_WIDTH-1:0] req_info_s0; + + wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0; + wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0; + wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; + wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0; + wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_dims_s0; wire stall_out; @@ -70,112 +54,95 @@ module VX_tex_addr #( VX_tex_stride #( .CORE_ID (CORE_ID) ) tex_stride ( - .format (format), + .format (req_format), .log_stride (log_stride) ); // addressing mode - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - wire [1:0][31:0] fu, fv; + for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar j = 0; j < 2; ++j) begin + wire [31:0] coord_lo, coord_hi; - assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0); - assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0); + assign coord_lo = req_coords[j][i] - (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0); + assign coord_hi = req_coords[j][i] + (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0); - assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_heights[i]) : 0); - assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_heights[i]) : 0); + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_lo ( + .wrap_i (req_wraps[j]), + .coord_i (coord_lo), + .coord_o (clamped_lo[j][i]) + ); - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_u0 ( - .wrap_i (wrap_u), - .coord_i (fu[0]), - .coord_o (clamped_u[i][0]) - ); - - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_u1 ( - .wrap_i (wrap_u), - .coord_i (fu[1]), - .coord_o (clamped_u[i][1]) - ); - - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_v0 ( - .wrap_i (wrap_v), - .coord_i (fv[0]), - .coord_o (clamped_v[i][0]) - ); - - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_v1 ( - .wrap_i (wrap_v), - .coord_i (fv[1]), - .coord_o (clamped_v[i][1]) - ); - - assign mip_addr[i] = base_addr + 32'(mip_offsets[i]); + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_hi ( + .wrap_i (req_wraps[j]), + .coord_i (coord_hi), + .coord_o (clamped_hi[j][i]) + ); + end + assign mip_addr[i] = req_baseaddr + 32'(req_mipoffset[i]); end VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + `NUM_THREADS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, req_info, log_widths, log_heights, mip_addr, clamped_u, clamped_v}), - .data_out ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, req_info_s0, log_widths_s0, log_heights_s0, mip_addr_s0, clamped_u_s0, clamped_v_s0}) + .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}), + .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) ); // addresses generation - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v; - wire [`NUM_THREADS-1:0][3:0][31:0] addr; + wire [1:0][NUM_REQS-1:0][`FIXED_INT-1:0] scaled_lo, scaled_hi; + wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] blends; + wire [NUM_REQS-1:0][3:0][31:0] addr; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - wire [1:0][`FIXED_INT-1:0] x, y; - - assign x[0] = `FIXED_INT'(clamped_u_s0[i][0] >> ((`FIXED_FRAC) - log_widths_s0[i])); - assign x[1] = `FIXED_INT'(clamped_u_s0[i][1] >> ((`FIXED_FRAC) - log_widths_s0[i])); - assign y[0] = `FIXED_INT'(clamped_v_s0[i][0] >> ((`FIXED_FRAC) - log_heights_s0[i])); - assign y[1] = `FIXED_INT'(clamped_v_s0[i][1] >> ((`FIXED_FRAC) - log_heights_s0[i])); - - assign addr[i][0] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0; - assign addr[i][1] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0; - assign addr[i][2] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0; - assign addr[i][3] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0; + for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar j = 0; j < 2; ++j) begin + assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i])); + assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i])); + assign blends[j][i] = filter_s0 ? clamped_lo_s0[j][i][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); + end end - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign blend_u[i] = filter_s0 ? clamped_u_s0[i][0][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); - assign blend_v[i] = filter_s0 ? clamped_v_s0[i][0][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0; + assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0; + assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0; + assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0; end assign stall_out = rsp_valid && ~rsp_ready; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFO_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, addr, blend_u, blend_v, req_info_s0}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info}) + .data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}), + .data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info}) ); - assign ready_in = ~stall_out; + assign req_ready = ~stall_out; `ifdef DBG_PRINT_TEX + wire [`NW_BITS-1:0] rsp_wid; + wire [31:0] rsp_PC; + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; + always @(posedge clk) begin if (rsp_valid && rsp_ready) begin - $write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, filter=%0d, tride=%0d, addr=", + $write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=", $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride); - `PRINT_ARRAY2D(rsp_addr, 4, `NUM_THREADS); + `PRINT_ARRAY2D(rsp_addr, 4, NUM_REQS); $write("\n"); end end diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index 84344a22..784097dd 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -1,7 +1,8 @@ `include "VX_tex_define.vh" module VX_tex_memory #( parameter CORE_ID = 0, - parameter REQ_INFO_WIDTH = 1 + parameter REQ_INFO_WIDTH = 1, + parameter NUM_REQS = 1 ) ( input wire clk, input wire reset, @@ -12,36 +13,32 @@ module VX_tex_memory #( // inputs input wire req_valid, - input wire [`NW_BITS-1:0] req_wid, - input wire [`NUM_THREADS-1:0] req_tmask, - input wire [31:0] req_PC, + input wire [NUM_REQS-1:0] req_tmask, input wire [`TEX_FILTER_BITS-1:0] req_filter, input wire [`TEX_STRIDE_BITS-1:0] req_stride, - input wire [`NUM_THREADS-1:0][3:0][31:0] req_addr, + input wire [NUM_REQS-1:0][3:0][31:0] req_addr, input wire [REQ_INFO_WIDTH-1:0] req_info, output wire req_ready, // outputs output wire rsp_valid, - output wire [`NW_BITS-1:0] rsp_wid, - output wire [`NUM_THREADS-1:0] rsp_tmask, - output wire [31:0] rsp_PC, - output wire [`NUM_THREADS-1:0][3:0][31:0] rsp_data, + output wire [NUM_REQS-1:0] rsp_tmask, + output wire [NUM_REQS-1:0][3:0][31:0] rsp_data, output wire [REQ_INFO_WIDTH-1:0] rsp_info, input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) - localparam RSP_CTR_W = $clog2(`NUM_THREADS * 4 + 1); + localparam RSP_CTR_W = $clog2(NUM_REQS * 4 + 1); wire [3:0] dup_reqs; - wire [3:0][`NUM_THREADS-1:0][29:0] req_addr_w; - wire [3:0][`NUM_THREADS-1:0][1:0] align_offs; + wire [3:0][NUM_REQS-1:0][29:0] req_addr_w; + wire [3:0][NUM_REQS-1:0][1:0] align_offs; // reorder address into quads - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 4; ++j) begin assign req_addr_w[j][i] = req_addr[i][j][31:2]; assign align_offs[j][i] = req_addr[i][j][1:0]; @@ -51,8 +48,8 @@ module VX_tex_memory #( // find duplicate addresses for (genvar i = 0; i < 4; ++i) begin - wire [`NUM_THREADS-1:0] addr_matches; - for (genvar j = 0; j < `NUM_THREADS; j++) begin + wire [NUM_REQS-1:0] addr_matches; + for (genvar j = 0; j < NUM_REQS; j++) begin assign addr_matches[j] = (req_addr_w[i][0] == req_addr_w[i][j]) || ~req_tmask[j]; end assign dup_reqs[i] = req_tmask[0] && (& addr_matches); @@ -62,20 +59,18 @@ module VX_tex_memory #( wire reqq_push, reqq_pop, reqq_empty, reqq_full; - wire [3:0][`NUM_THREADS-1:0][29:0] q_req_addr; - wire [`NW_BITS-1:0] q_req_wid; - wire [`NUM_THREADS-1:0] q_req_tmask; - wire [31:0] q_req_PC; + wire [3:0][NUM_REQS-1:0][29:0] q_req_addr; + wire [NUM_REQS-1:0] q_req_tmask; wire [`TEX_FILTER_BITS-1:0] q_req_filter; wire [REQ_INFO_WIDTH-1:0] q_req_info; wire [`TEX_STRIDE_BITS-1:0] q_req_stride; - wire [3:0][`NUM_THREADS-1:0][1:0] q_align_offs; + wire [3:0][NUM_REQS-1:0][1:0] q_align_offs; wire [3:0] q_dup_reqs; assign reqq_push = req_valid && req_ready; VX_fifo_queue #( - .DATAW ((`NUM_THREADS * 4 * 30) + `NW_BITS + `NUM_THREADS + 32 + REQ_INFO_WIDTH + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * `NUM_THREADS * 2) + 4), + .DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFO_WIDTH + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4), .SIZE (`LSUQ_SIZE), .BUFFERED (1) ) req_queue ( @@ -83,8 +78,8 @@ module VX_tex_memory #( .reset (reset), .push (reqq_push), .pop (reqq_pop), - .data_in ({req_addr_w, req_wid, req_tmask, req_PC, req_info, req_filter, req_stride, align_offs, dup_reqs}), - .data_out ({q_req_addr, q_req_wid, q_req_tmask, q_req_PC, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}), + .data_in ({req_addr_w, req_tmask, req_info, req_filter, req_stride, align_offs, dup_reqs}), + .data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}), .empty (reqq_empty), .full (reqq_full), `UNUSED_PIN (alm_full), @@ -100,7 +95,7 @@ module VX_tex_memory #( wire req_texel_valid; wire sent_all_ready, last_texel_sent; wire req_texel_dup; - wire [`NUM_THREADS-1:0][29:0] req_texel_addr; + wire [NUM_REQS-1:0][29:0] req_texel_addr; reg [1:0] req_texel_idx; reg req_texels_done; @@ -129,9 +124,9 @@ module VX_tex_memory #( // DCache Request - reg [`NUM_THREADS-1:0] texel_sent_mask; - wire [`NUM_THREADS-1:0] dcache_req_fire; - wire [`NUM_THREADS-1:0] req_dup_mask; + reg [NUM_REQS-1:0] texel_sent_mask; + wire [NUM_REQS-1:0] dcache_req_fire; + wire [NUM_REQS-1:0] req_dup_mask; assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; @@ -146,30 +141,31 @@ module VX_tex_memory #( end end - assign req_dup_mask = {{(`NUM_THREADS-1){~req_texel_dup}}, 1'b1}; + assign req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1}; - assign dcache_req_if.valid = {`NUM_THREADS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask; - assign dcache_req_if.rw = {`NUM_THREADS{1'b0}}; + assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask; + assign dcache_req_if.rw = {NUM_REQS{1'b0}}; assign dcache_req_if.addr = req_texel_addr; - assign dcache_req_if.byteen = {`NUM_THREADS{4'b1111}}; + assign dcache_req_if.byteen = {NUM_REQS{4'b1111}}; assign dcache_req_if.data = 'x; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag = {`NUM_THREADS{q_req_PC, q_req_wid, req_texel_idx}}; + wire [`NW_BITS-1:0] q_req_wid; + wire [31:0] q_req_PC; + assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0]; + assign dcache_req_if.tag = {NUM_REQS{q_req_PC, q_req_wid, req_texel_idx}}; `else - assign dcache_req_if.tag = {`NUM_THREADS{req_texel_idx}}; - `UNUSED_VAR (q_req_wid) - `UNUSED_VAR (q_req_PC) + assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}}; `endif // Dcache Response - reg [3:0][`NUM_THREADS-1:0][31:0] rsp_texels, rsp_texels_n; - wire [`NUM_THREADS-1:0][3:0][31:0] rsp_texels_qual; - reg [`NUM_THREADS-1:0][31:0] rsp_data_qual; + reg [3:0][NUM_REQS-1:0][31:0] rsp_texels, rsp_texels_n; + wire [NUM_REQS-1:0][3:0][31:0] rsp_texels_qual; + reg [NUM_REQS-1:0][31:0] rsp_data_qual; reg [RSP_CTR_W-1:0] rsp_rem_ctr; - wire [`NUM_THREADS-1:0] rsp_cur_tmask; - wire [$clog2(`NUM_THREADS + 1)-1:0] rsp_cur_cnt; + wire [NUM_REQS-1:0] rsp_cur_tmask; + wire [$clog2(NUM_REQS + 1)-1:0] rsp_cur_cnt; wire dcache_rsp_fire; wire [1:0] rsp_texel_idx; wire rsp_texel_dup; @@ -184,7 +180,7 @@ module VX_tex_memory #( assign rsp_cur_cnt = $countones(rsp_cur_tmask); - for (genvar i = 0; i < `NUM_THREADS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin wire [31:0] src_mask = {32{dcache_rsp_if.valid[i]}}; wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]) & src_mask; @@ -229,7 +225,7 @@ module VX_tex_memory #( end end - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 4; ++j) begin assign rsp_texels_qual[i][j] = rsp_texels_n[j][i]; end @@ -242,43 +238,48 @@ module VX_tex_memory #( assign reqq_pop = rsp_texels_done && ~stall_out; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + (4 * `NUM_THREADS * 32) + REQ_INFO_WIDTH), + .DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (4 * NUM_REQS * 32)), .RESETW (1) ) rsp_pipe_reg ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, rsp_texels_qual, q_req_info}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_data, rsp_info}) + .data_in ({rsp_texels_done, q_req_tmask, q_req_info, rsp_texels_qual}), + .data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data}) ); // Can accept new cache response? assign dcache_rsp_if.ready = ~stall_out || (rsp_rem_ctr != RSP_CTR_W'(rsp_cur_cnt)); `ifdef DBG_PRINT_TEX - always @(posedge clk) begin + wire [`NW_BITS-1:0] req_wid, rsp_wid; + wire [31:0] req_PC, rsp_PC; + assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0]; + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; + + always @(posedge clk) begin if ((| dcache_req_fire)) begin $write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=", $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag); - `PRINT_ARRAY1D(req_texel_addr, `NUM_THREADS); + `PRINT_ARRAY1D(req_texel_addr, NUM_REQS); $write(", is_dup=%b\n", req_texel_dup); end if (dcache_rsp_fire) begin $write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=", $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag); - `PRINT_ARRAY1D(rsp_data_qual, `NUM_THREADS); + `PRINT_ARRAY1D(rsp_data_qual, NUM_REQS); $write("\n"); end if (req_valid && req_ready) begin $write("%t: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=", $time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride); - `PRINT_ARRAY2D(req_addr, 4, `NUM_THREADS); + `PRINT_ARRAY2D(req_addr, 4, NUM_REQS); $write("\n"); end if (rsp_valid && rsp_ready) begin $write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=", $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask); - `PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS); + `PRINT_ARRAY2D(rsp_data, 4, NUM_REQS); $write("\n"); end end diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index d7c8ed21..75d0a5bb 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -1,52 +1,43 @@ `include "VX_tex_define.vh" module VX_tex_sampler #( - parameter CORE_ID = 0 + parameter CORE_ID = 0, + parameter REQ_INFO_WIDTH = 1, + parameter NUM_REQS = 1 ) ( input wire clk, input wire reset, // inputs - input wire req_valid, - input wire [`NW_BITS-1:0] req_wid, - input wire [`NUM_THREADS-1:0] req_tmask, - input wire [31:0] req_PC, - input wire [`NR_BITS-1:0] req_rd, - input wire req_wb, - input wire [`TEX_FORMAT_BITS-1:0] req_format, - input wire [`NUM_THREADS-1:0][3:0][31:0] req_data, - input wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] req_blend_u, - input wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] req_blend_v, + input wire req_valid, + input wire [`NUM_THREADS-1:0] req_tmask, + input wire [`TEX_FORMAT_BITS-1:0] req_format, + input wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] req_blends, + input wire [NUM_REQS-1:0][3:0][31:0] req_data, + input wire [REQ_INFO_WIDTH-1:0] req_info, output wire req_ready, // ouputs - output wire rsp_valid, - output wire [`NW_BITS-1:0] rsp_wid, - output wire [`NUM_THREADS-1:0] rsp_tmask, - output wire [31:0] rsp_PC, - output wire [`NR_BITS-1:0] rsp_rd, - output wire rsp_wb, - output wire [`NUM_THREADS-1:0][31:0] rsp_data, - input wire rsp_ready + output wire rsp_valid, + output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [NUM_REQS-1:0][31:0] rsp_data, + output wire [REQ_INFO_WIDTH-1:0] rsp_info, + input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) - wire [`NUM_THREADS-1:0][31:0] texel_ul, texel_uh; - wire [`NUM_THREADS-1:0][31:0] texel_ul_s0, texel_uh_s0; - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_v_s0; - wire [`NUM_THREADS-1:0][31:0] texel_v; - - wire req_valid_s0; - wire [`NW_BITS-1:0] req_wid_s0; - wire [`NUM_THREADS-1:0] req_tmask_s0; - wire [31:0] req_PC_s0; - wire [`NR_BITS-1:0] req_rd_s0; - wire req_wb_s0; + wire valid_s0; + wire [`NUM_THREADS-1:0] tmask_s0; + wire [REQ_INFO_WIDTH-1:0] req_info_s0; + wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh; + wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0; + wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v_s0; + wire [NUM_REQS-1:0][31:0] texel_v; wire stall_out; - for (genvar i = 0; i < `NUM_THREADS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin wire [3:0][31:0] fmt_texels; @@ -62,7 +53,7 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_ul ( - .blend (req_blend_u[i]), + .blend (req_blends[0][i]), .in1 (fmt_texels[0]), .in2 (fmt_texels[1]), .out (texel_ul[i]) @@ -70,7 +61,7 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_uh ( - .blend (req_blend_u[i]), + .blend (req_blends[0][i]), .in1 (fmt_texels[2]), .in2 (fmt_texels[3]), .out (texel_uh[i]) @@ -78,17 +69,17 @@ module VX_tex_sampler #( end VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `BLEND_FRAC) + (2 * `NUM_THREADS * 32)), + .DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * `BLEND_FRAC) + (2 * NUM_REQS * 32)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, req_blend_v, texel_ul, texel_uh}), - .data_out ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) + .data_in ({req_valid, req_tmask, req_info, req_blends[1], texel_ul, texel_uh}), + .data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) ); - for (genvar i = 0; i < `NUM_THREADS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin VX_tex_lerp #( ) tex_lerp_v ( .blend (blend_v_s0[i]), @@ -101,35 +92,42 @@ module VX_tex_sampler #( assign stall_out = rsp_valid && ~rsp_ready; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DATAW (1 + NUM_REQS + REQ_INFO_WIDTH + (NUM_REQS * 32)), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, texel_v}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}) + .data_in ({valid_s0, tmask_s0, req_info_s0, texel_v}), + .data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data}) ); // can accept new request? assign req_ready = ~stall_out; `ifdef DBG_PRINT_TEX - always @(posedge clk) begin + + wire [`NW_BITS-1:0] req_wid, rsp_wid; + wire [31:0] req_PC, rsp_PC; + + assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0]; + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; + + always @(posedge clk) begin if (req_valid && req_ready) begin $write("%t: core%0d-tex-sampler-req: wid=%0d, PC=%0h, tmask=%b, format=%0d, data=", $time, CORE_ID, req_wid, req_PC, req_tmask, req_format); - `PRINT_ARRAY2D(req_data, 4, `NUM_THREADS); + `PRINT_ARRAY2D(req_data, 4, NUM_REQS); $write(", u0="); - `PRINT_ARRAY1D(req_blend_u, `NUM_THREADS); + `PRINT_ARRAY1D(req_blends[0], NUM_REQS); $write(", v0="); - `PRINT_ARRAY1D(req_blend_v, `NUM_THREADS); + `PRINT_ARRAY1D(req_blends[1], NUM_REQS); $write("\n"); end if (rsp_valid && rsp_ready) begin $write("%t: core%0d-tex-sampler-rsp: wid=%0d, PC=%0h, tmask=%b, data=", $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask); - `PRINT_ARRAY1D(rsp_data, `NUM_THREADS); + `PRINT_ARRAY1D(rsp_data, NUM_REQS); $write("\n"); end end diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 59b59a8c..89f83368 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -18,20 +18,16 @@ module VX_tex_unit #( VX_tex_rsp_if tex_rsp_if ); - localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + `NR_BITS + 1; + localparam REQ_INFO_WIDTH_S = `NR_BITS + 1 + `NW_BITS + 32; + localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + REQ_INFO_WIDTH_S; localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH_A; - - `UNUSED_PARAM (CORE_ID) - `UNUSED_VAR (reset) - + reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - reg [`TEX_DIM_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - reg [`TEX_DIM_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; + reg [`TEX_DIM_BITS-1:0] tex_dims [1:0][`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0]; - reg [`TEX_WRAP_BITS-1:0] tex_wrap_u [`NUM_TEX_UNITS-1:0]; - reg [`TEX_WRAP_BITS-1:0] tex_wrap_v [`NUM_TEX_UNITS-1:0]; + reg [`TEX_WRAP_BITS-1:0] tex_wraps [1:0][`NUM_TEX_UNITS-1:0]; reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0]; // CSRs programming @@ -48,8 +44,8 @@ module VX_tex_unit #( tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; end `CSR_TEX_WRAP(i) : begin - tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; - tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; + tex_wraps[0][i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; + tex_wraps[1][i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; end `CSR_TEX_FILTER(i) : begin tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; @@ -58,10 +54,10 @@ module VX_tex_unit #( tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; end `CSR_TEX_WIDTH(i) : begin - tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + tex_dims[0][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; end `CSR_TEX_HEIGHT(i) : begin - tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + tex_dims[1][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; end default: assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0) @@ -73,84 +69,70 @@ module VX_tex_unit #( // mipmap attributes - wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs; - wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_widths; - wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_heights; + wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff; + wire [1:0][`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] sel_dims; for (genvar i = 0; i < `NUM_THREADS; ++i) begin wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0]; wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS]; - assign tex_mipoffs[i] = tex_mipoff[unit][mip_level]; - assign tex_widths[i] = tex_width[unit][mip_level]; - assign tex_heights[i] = tex_height[unit][mip_level]; + assign sel_mipoff[i] = tex_mipoff[unit][mip_level]; + assign sel_dims[0][i] = tex_dims[0][unit][mip_level]; + assign sel_dims[1][i] = tex_dims[1][unit][mip_level]; end // address generation wire mem_req_valid; - wire [`NW_BITS-1:0] mem_req_wid; wire [`NUM_THREADS-1:0] mem_req_tmask; - wire [31:0] mem_req_PC; wire [`TEX_FILTER_BITS-1:0] mem_req_filter; wire [`TEX_STRIDE_BITS-1:0] mem_req_stride; - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blend_u; - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blend_v; + wire [1:0][`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blends; wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; wire [REQ_INFO_WIDTH_A-1:0] mem_req_info; wire mem_req_ready; - - wire mem_rsp_valid; - wire [`NW_BITS-1:0] mem_rsp_wid; - wire [`NUM_THREADS-1:0] mem_rsp_tmask; - wire [31:0] mem_rsp_PC; - wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data; - wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info; - wire mem_rsp_ready; VX_tex_addr #( - .REQ_INFO_WIDTH (REQ_INFO_WIDTH_A) + .CORE_ID (CORE_ID), + .REQ_INFO_WIDTH (REQ_INFO_WIDTH_A), + .NUM_REQS (`NUM_THREADS) ) tex_addr ( .clk (clk), .reset (reset), - .valid_in (tex_req_if.valid), - .ready_in (tex_req_if.ready), - - .req_wid (tex_req_if.wid), + .req_valid (tex_req_if.valid), .req_tmask (tex_req_if.tmask), - .req_PC (tex_req_if.PC), - .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}), - - .format (tex_format[tex_req_if.unit]), - .filter (tex_filter[tex_req_if.unit]), - .wrap_u (tex_wrap_u[tex_req_if.unit]), - .wrap_v (tex_wrap_v[tex_req_if.unit]), - - .base_addr (tex_baddr[tex_req_if.unit]), - .mip_offsets(tex_mipoffs), - .log_widths (tex_widths), - .log_heights(tex_heights), - - .coord_u (tex_req_if.u), - .coord_v (tex_req_if.v), + .req_coords (tex_req_if.coords), + .req_format (tex_format[tex_req_if.unit]), + .req_filter (tex_filter[tex_req_if.unit]), + .req_wraps ({tex_wraps[1][tex_req_if.unit], tex_wraps[0][tex_req_if.unit]}), + .req_baseaddr(tex_baddr[tex_req_if.unit]), + .req_mipoffset(sel_mipoff), + .req_logdims(sel_dims), + .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}), + .req_ready (tex_req_if.ready), .rsp_valid (mem_req_valid), - .rsp_wid (mem_req_wid), - .rsp_tmask (mem_req_tmask), - .rsp_PC (mem_req_PC), + .rsp_tmask (mem_req_tmask), .rsp_filter (mem_req_filter), .rsp_stride (mem_req_stride), .rsp_addr (mem_req_addr), - .rsp_blend_u(mem_req_blend_u), - .rsp_blend_v(mem_req_blend_v), + .rsp_blends (mem_req_blends), .rsp_info (mem_req_info), .rsp_ready (mem_req_ready) ); - // retrieve texel values from memory + // retrieve texel values from memory + + wire mem_rsp_valid; + wire [`NUM_THREADS-1:0] mem_rsp_tmask; + wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data; + wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info; + wire mem_rsp_ready; + VX_tex_memory #( .CORE_ID (CORE_ID), - .REQ_INFO_WIDTH (REQ_INFO_WIDTH_M) + .REQ_INFO_WIDTH (REQ_INFO_WIDTH_M), + .NUM_REQS (`NUM_THREADS) ) tex_memory ( .clk (clk), .reset (reset), @@ -161,20 +143,16 @@ module VX_tex_unit #( // inputs .req_valid (mem_req_valid), - .req_wid (mem_req_wid), - .req_tmask (mem_req_tmask), - .req_PC (mem_req_PC), + .req_tmask (mem_req_tmask), .req_filter(mem_req_filter), .req_stride(mem_req_stride), .req_addr (mem_req_addr), - .req_info ({mem_req_blend_u, mem_req_blend_v, mem_req_info}), + .req_info ({mem_req_blends, mem_req_info}), .req_ready (mem_req_ready), // outputs .rsp_valid (mem_rsp_valid), - .rsp_wid (mem_rsp_wid), - .rsp_tmask (mem_rsp_tmask), - .rsp_PC (mem_rsp_PC), + .rsp_tmask (mem_rsp_tmask), .rsp_data (mem_rsp_data), .rsp_info (mem_rsp_info), .rsp_ready (mem_rsp_ready) @@ -182,40 +160,34 @@ module VX_tex_unit #( // apply sampler - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_u, rsp_blend_v; + wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends; wire [`TEX_FORMAT_BITS-1:0] rsp_format; - wire [`NR_BITS-1:0] rsp_rd; - wire rsp_wb; + wire [REQ_INFO_WIDTH_S-1:0] rsp_info; - assign {rsp_blend_u, rsp_blend_v, rsp_format, rsp_rd, rsp_wb} = mem_rsp_info; + assign {rsp_blends, rsp_format, rsp_info} = mem_rsp_info; VX_tex_sampler #( - .CORE_ID (CORE_ID) + .CORE_ID (CORE_ID), + .REQ_INFO_WIDTH (REQ_INFO_WIDTH_S), + .NUM_REQS (`NUM_THREADS) ) tex_sampler ( .clk (clk), .reset (reset), // inputs .req_valid (mem_rsp_valid), - .req_wid (mem_rsp_wid), - .req_tmask (mem_rsp_tmask), - .req_PC (mem_rsp_PC), + .req_tmask (mem_rsp_tmask), .req_data (mem_rsp_data), .req_format (rsp_format), - .req_blend_u(rsp_blend_u), - .req_blend_v(rsp_blend_v), - .req_rd (rsp_rd), - .req_wb (rsp_wb), + .req_blends (rsp_blends), + .req_info (rsp_info), .req_ready (mem_rsp_ready), // outputs .rsp_valid (tex_rsp_if.valid), - .rsp_wid (tex_rsp_if.wid), .rsp_tmask (tex_rsp_if.tmask), - .rsp_PC (tex_rsp_if.PC), - .rsp_rd (tex_rsp_if.rd), - .rsp_wb (tex_rsp_if.wb), .rsp_data (tex_rsp_if.data), + .rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}), .rsp_ready (tex_rsp_if.ready) ); @@ -227,12 +199,12 @@ module VX_tex_unit #( && tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin $display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]); $display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]); - $display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]); - $display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wraps[0][i]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wraps[1][i]); $display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]); $display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]); - $display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]); - $display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_dims[0][i][0]); + $display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_dims[1][i][0]); end end end @@ -240,9 +212,9 @@ module VX_tex_unit #( if (tex_req_if.valid && tex_req_if.ready) begin $display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=", $time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod); - `PRINT_ARRAY1D(tex_req_if.u, `NUM_THREADS); + `PRINT_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS); $write(", v="); - `PRINT_ARRAY1D(tex_req_if.v, `NUM_THREADS); + `PRINT_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS); $write("\n"); end if (tex_rsp_if.valid && tex_rsp_if.ready) begin