diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 69763381..6a2a8125 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -281,13 +281,19 @@ // Word size in bytes `define DWORD_SIZE 4 -// TAG sharing enable -`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE) +// TAG sharing enable +`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) +`ifdef EXT_TEX_ENABLE +`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + 2) +`else +`define DCORE_TAG_ID_BITS `LSUQ_ADDR_BITS +`endif // Core request tag bits `ifdef EXT_TEX_ENABLE -`define LSU_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) -`define DCORE_TAG_WIDTH (`LSU_TAG_WIDTH+1) +`define LSU_DACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSUQ_ADDR_BITS) +`define TEX_DACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + 2 + `LSUQ_ADDR_BITS) +`define DCORE_TAG_WIDTH (`MAX(`LSU_DACHE_TAG_BITS, `TEX_DACHE_TAG_BITS) + 1) `else `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) `endif diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 015093cb..c690ae75 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -51,34 +51,37 @@ module VX_execute #( VX_dcache_core_req_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_TAG_WIDTH) - ) tex_dcache_req_if(); - - VX_dcache_core_rsp_if #( - .LANES(`NUM_THREADS), - .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_TAG_WIDTH) - ) tex_dcache_rsp_if(); - - VX_dcache_core_req_if #( - .LANES(`NUM_THREADS), - .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_TAG_WIDTH) + .CORE_TAG_WIDTH(`LSU_DACHE_TAG_BITS) ) lsu_dcache_req_if(); VX_dcache_core_rsp_if #( .LANES(`NUM_THREADS), .WORD_SIZE(4), - .CORE_TAG_WIDTH(`LSU_TAG_WIDTH) + .CORE_TAG_WIDTH(`LSU_DACHE_TAG_BITS) ) lsu_dcache_rsp_if(); + VX_dcache_core_req_if #( + .LANES(`NUM_THREADS), + .WORD_SIZE(4), + .CORE_TAG_WIDTH(`TEX_DACHE_TAG_BITS) + ) tex_dcache_req_if(); + + VX_dcache_core_rsp_if #( + .LANES(`NUM_THREADS), + .WORD_SIZE(4), + .CORE_TAG_WIDTH(`TEX_DACHE_TAG_BITS) + ) tex_dcache_rsp_if(); + VX_tex_csr_if tex_csr_if(); + wire [1:0] tmp; + `UNUSED_VAR (tmp) + VX_tex_lsu_arb #( .NUM_REQS (2), .LANES (`NUM_THREADS), .WORD_SIZE (4), - .TAG_IN_WIDTH (`LSU_TAG_WIDTH), + .TAG_IN_WIDTH (`MAX(`LSU_DACHE_TAG_BITS, `TEX_DACHE_TAG_BITS)), .TAG_OUT_WIDTH (`DCORE_TAG_WIDTH) ) tex_lsu_arb ( .clk (clk), @@ -90,7 +93,7 @@ module VX_execute #( .req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}), .req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}), .req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}), - .req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}), + .req_tag_in ({tex_dcache_req_if.tag, {2'b0, lsu_dcache_req_if.tag}}), .req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}), // Dcache request @@ -105,7 +108,7 @@ module VX_execute #( // Tex/LSU response .rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}), .rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}), - .rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}), + .rsp_tag_out ({tex_dcache_rsp_if.tag, {tmp, lsu_dcache_rsp_if.tag}}), .rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}), // Dcache response diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 01a5f00b..42cb79b0 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -45,7 +45,7 @@ module VX_lsu_unit #( wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches); `IGNORE_WARNINGS_BEGIN - reg [`LSUQ_SIZE-1:0][`DCORE_TAG_ID_BITS-1:0] pending_tags; + reg [`LSUQ_SIZE-1:0][`LSUQ_ADDR_BITS-1:0] pending_tags; `IGNORE_WARNINGS_END wire ready_in; @@ -75,12 +75,12 @@ module VX_lsu_unit #( `UNUSED_VAR (rsp_type) reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask; - reg [`NUM_THREADS-1:0] rsp_rem_mask_n; + wire [`NUM_THREADS-1:0] rsp_rem_mask_n; reg [`NUM_THREADS-1:0] req_sent_mask; wire req_sent_all; - wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr; + wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr; wire mbuf_full; wire [`NUM_THREADS-1:0][1:0] req_offset, rsp_offset; @@ -88,15 +88,17 @@ module VX_lsu_unit #( assign req_offset[i] = req_addr[i][1:0]; end - wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready)) + wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; + + wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; + + wire mbuf_push = (| dcache_req_fire) && (0 == req_sent_mask) // first submission only && req_wb; // loads only - wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; - - wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup); + wire mbuf_pop = dcache_rsp_fire && (rsp_rem_mask_n == 0 || rsp_is_dup); - assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0]; + assign mbuf_raddr = dcache_rsp_if.tag[`LSUQ_ADDR_BITS-1:0]; VX_index_buffer #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1), @@ -114,23 +116,20 @@ module VX_lsu_unit #( .full (mbuf_full) ); - assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask)) - || (req_is_dup && dcache_req_if.ready[0]); + assign req_sent_all = (&(dcache_req_fire | req_sent_mask | ~req_tmask)) + || (req_is_dup & dcache_req_if.valid[0] & dcache_req_if.ready[0]); always @(posedge clk) begin - if (reset) begin + if (reset || req_sent_all) begin req_sent_mask <= 0; - end else begin - if (req_sent_all) - req_sent_mask <= 0; - else - req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready); + end else if (!req_sent_all) begin + req_sent_mask <= req_sent_mask | dcache_req_fire; end end // need to hold the acquired tag index until the full request is submitted - reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold; - wire [`DCORE_TAG_ID_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold; + reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold; + wire [`LSUQ_ADDR_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold; always @(posedge clk) begin if (mbuf_push) req_tag_hold <= mbuf_waddr; @@ -142,12 +141,13 @@ module VX_lsu_unit #( rsp_rem_mask[mbuf_waddr] <= req_tmask; pending_tags[mbuf_waddr] <= req_tag; end - if (mbuf_pop_part) begin + if (dcache_rsp_fire) begin rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n; end end - wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready); + wire req_ready_dep = (req_wb && ~mbuf_full) + || (~req_wb && st_commit_if.ready); wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; @@ -188,7 +188,7 @@ module VX_lsu_unit #( assign dcache_req_if.data = mem_req_data; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}}; + assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}}; `else assign dcache_req_if.tag = {`NUM_THREADS{req_tag}}; `endif @@ -255,7 +255,7 @@ module VX_lsu_unit #( assign dcache_rsp_if.ready = ~load_rsp_stall; // scope registration - `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready); + `SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire); `SCOPE_ASSIGN (dcache_req_wid, req_wid); `SCOPE_ASSIGN (dcache_req_pc, req_pc); `SCOPE_ASSIGN (dcache_req_addr, req_addr); @@ -269,15 +269,15 @@ module VX_lsu_unit #( `ifdef DBG_PRINT_CORE_DCACHE always @(posedge clk) begin - if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin + if ((| dcache_req_fire)) begin if ((| dcache_req_if.rw)) $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); + $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); else $display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); + $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); end - if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin + if (dcache_rsp_fire) begin $display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b", $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup); end diff --git a/hw/rtl/tex_unit/VX_tex_addr_gen.v b/hw/rtl/tex_unit/VX_tex_addr_gen.v index f6eba173..e38671fb 100644 --- a/hw/rtl/tex_unit/VX_tex_addr_gen.v +++ b/hw/rtl/tex_unit/VX_tex_addr_gen.v @@ -2,7 +2,7 @@ module VX_tex_addr_gen #( parameter CORE_ID = 0, - parameter REQ_TAG_WIDTH = 1 + parameter REQ_INFO_WIDTH = 1 ) ( input wire clk, input wire reset, @@ -14,17 +14,19 @@ module VX_tex_addr_gen #( // inputs + input wire [`NW_BITS-1:0] req_wid, input wire [`NUM_THREADS-1:0] req_tmask, - input wire [REQ_TAG_WIDTH-1:0] req_tag, + input wire [31:0] req_PC, + input wire [REQ_INFO_WIDTH-1:0] req_info, input wire [`TEX_FILTER_BITS-1:0] filter, input wire [`TEX_WRAP_BITS-1:0] wrap_u, input wire [`TEX_WRAP_BITS-1:0] wrap_v, input wire [`TEX_ADDR_BITS-1:0] base_addr, - input wire [`TEX_STRIDE_BITS-1:0] log2_stride, - input wire [`TEX_WIDTH_BITS-1:0] log2_width, - input wire [`TEX_HEIGHT_BITS-1:0] log2_height, + input wire [`TEX_STRIDE_BITS-1:0] log_stride, + input wire [`TEX_WIDTH_BITS-1:0] log_width, + input wire [`TEX_HEIGHT_BITS-1:0] log_height, input wire [`NUM_THREADS-1:0][31:0] coord_u, input wire [`NUM_THREADS-1:0][31:0] coord_v, @@ -32,14 +34,17 @@ module VX_tex_addr_gen #( // outputs - output wire mem_req_valid, - output wire [`NUM_THREADS-1:0] mem_req_tmask, + output wire mem_req_valid, + output wire [`NW_BITS-1:0] mem_req_wid, + output wire [`NUM_THREADS-1:0] mem_req_tmask, + output wire [31:0] mem_req_PC, output wire [`TEX_FILTER_BITS-1:0] mem_req_filter, + output wire [`TEX_STRIDE_BITS-1:0] mem_req_stride, output wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_u, output wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_v, - output wire [REQ_TAG_WIDTH-1:0] mem_req_tag, + output wire [REQ_INFO_WIDTH-1:0] mem_req_info, output wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr, - input wire mem_req_ready + input wire mem_req_ready ); `UNUSED_PARAM (CORE_ID) @@ -55,10 +60,10 @@ module VX_tex_addr_gen #( wire [31:0] fu[1:0]; wire [31:0] fv[1:0]; - assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log2_width) : 0); - assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log2_height) : 0); - assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log2_width) : 0); - assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log2_height) : 0); + assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_width) : 0); + assign fv[0] = coord_v[i] - (filter ? (`FIXED_HALF >> log_height) : 0); + assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_width) : 0); + assign fv[1] = coord_v[i] + (filter ? (`FIXED_HALF >> log_height) : 0); VX_tex_wrap #( .CORE_ID (CORE_ID) @@ -102,28 +107,28 @@ module VX_tex_addr_gen #( wire [`FIXED_FRAC-1:0] x [1:0]; wire [`FIXED_FRAC-1:0] y [1:0]; - assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log2_width); - assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log2_width); - assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log2_height); - assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log2_height); + assign x[0] = u[0][i] >> ((`FIXED_FRAC) - log_width); + assign x[1] = u[1][i] >> ((`FIXED_FRAC) - log_width); + assign y[0] = v[0][i] >> ((`FIXED_FRAC) - log_height); + assign y[1] = v[1][i] >> ((`FIXED_FRAC) - log_height); - assign addr[i][0] = base_addr + (32'(x[0]) + (32'(y[0]) << log2_width)) << log2_stride; - assign addr[i][1] = base_addr + (32'(x[1]) + (32'(y[0]) << log2_width)) << log2_stride; - assign addr[i][2] = base_addr + (32'(x[0]) + (32'(y[1]) << log2_width)) << log2_stride; - assign addr[i][3] = base_addr + (32'(x[1]) + (32'(y[1]) << log2_width)) << log2_stride; + assign addr[i][0] = base_addr + (32'(x[0]) + (32'(y[0]) << log_width)) << log_stride; + assign addr[i][1] = base_addr + (32'(x[1]) + (32'(y[0]) << log_width)) << log_stride; + assign addr[i][2] = base_addr + (32'(x[0]) + (32'(y[1]) << log_width)) << log_stride; + assign addr[i][3] = base_addr + (32'(x[1]) + (32'(y[1]) << log_width)) << log_stride; end wire stall_out = mem_req_valid && ~mem_req_ready; VX_pipe_register #( - .DATAW (1 + `NUM_THREADS + `TEX_FILTER_BITS + REQ_TAG_WIDTH + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `FIXED_FRAC)), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `FIXED_FRAC)), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_in, req_tmask, filter, req_tag, addr, u[0], v[0]}), - .data_out ({mem_req_valid, mem_req_tmask, mem_req_filter, mem_req_tag, mem_req_addr, mem_req_u, mem_req_v}) + .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, req_info, addr, u[0], v[0]}), + .data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_info, mem_req_addr, mem_req_u, mem_req_v}) ); assign ready_in = ~stall_out; diff --git a/hw/rtl/VX_tex_lsu_arb.v b/hw/rtl/tex_unit/VX_tex_lsu_arb.v similarity index 100% rename from hw/rtl/VX_tex_lsu_arb.v rename to hw/rtl/tex_unit/VX_tex_lsu_arb.v diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index 77e5d018..dbf3a734 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -1,11 +1,9 @@ `include "VX_tex_define.vh" module VX_tex_memory #( - parameter CORE_ID = 0, - parameter REQ_TAG_WIDTH = 1 + parameter CORE_ID = 0, + parameter REQ_INFO_WIDTH = 1 ) ( - `SCOPE_IO_VX_lsu_unit - input wire clk, input wire reset, @@ -14,102 +12,66 @@ module VX_tex_memory #( VX_dcache_core_rsp_if dcache_rsp_if, // inputs - input wire req_valid, - input wire [`NUM_THREADS-1:0] req_tmask, - input wire [`TEX_FILTER_BITS-1:0] req_filter, + input wire req_valid, + input wire [`NW_BITS-1:0] req_wid, + input wire [`NUM_THREADS-1:0] req_tmask, + input wire [31:0] req_PC, + input wire [`TEX_FILTER_BITS-1:0] req_filter, + input wire [`TEX_STRIDE_BITS-1:0] req_stride, input wire [`NUM_THREADS-1:0][3:0][31:0] req_addr, - input wire [REQ_TAG_WIDTH-1:0] req_tag, - output wire req_ready, + input wire [REQ_INFO_WIDTH-1:0] req_info, + output wire req_ready, // outputs - output wire rsp_valid, - output wire [`NUM_THREADS-1:0] rsp_tmask, - output wire [`TEX_FILTER_BITS-1:0] rsp_filter, + output wire rsp_valid, + output wire [`NW_BITS-1:0] rsp_wid, + output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [31:0] rsp_PC, + output wire [`TEX_FILTER_BITS-1:0] rsp_filter, output wire [`NUM_THREADS-1:0][3:0][31:0] rsp_data, - output wire [REQ_TAG_WIDTH-1:0] rsp_tag, - input wire rsp_ready + output wire [REQ_INFO_WIDTH-1:0] rsp_info, + input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) - - /*wire req_valid; - wire [`NUM_THREADS-1:0] req_tmask; - wire [`NUM_THREADS-1:0][31:0] req_addr; - wire [`LSU_BITS-1:0] req_type; - wire [`NUM_THREADS-1:0][31:0] req_data; - wire [`NR_BITS-1:0] req_rd; - wire req_wb; - wire [`NW_BITS-1:0] req_wid; - wire [31:0] req_pc; - wire req_is_dup; - wire [`NUM_THREADS-1:0][31:0] full_address; - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset; + wire [3:0] dup_reqs; + wire [3:0][`NUM_THREADS-1:0][29:0] req_addr_w; + wire [3:0][`NUM_THREADS-1:0][1:0] align_offs; + + // reorder address into quads + + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar j = 0; j < 4; ++j) begin + assign req_addr_w[j][i] = req_addr[i][j][31:2]; + assign align_offs[j][i] = req_addr[i][j][1:0]; + end end - wire [`NUM_THREADS-1:0] addr_matches; - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign addr_matches[i] = (full_address[0][31:2] == full_address[i][31:2]) || ~lsu_req_if.tmask[i]; - end - wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches); - -`IGNORE_WARNINGS_BEGIN - reg [`LSUQ_SIZE-1:0][`DCORE_TAG_ID_BITS-1:0] pending_tags; -`IGNORE_WARNINGS_END + // find duplicate addresses - wire ready_in; - wire stall_in = ~ready_in && req_valid; - - VX_pipe_register #( - .DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)), - .RESETW (1) - ) req_pipe_reg ( - .clk (clk), - .reset (reset), - .enable (!stall_in), - .data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_address, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), - .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_type, req_rd, req_wb, req_data}) - ); - - // Can accept new request? - assign lsu_req_if.ready = ~stall_in; - - wire [`NW_BITS-1:0] rsp_wid; - wire [31:0] rsp_pc; - wire [`NR_BITS-1:0] rsp_rd; - wire rsp_wb; - wire [`LSU_BITS-1:0] rsp_type; - wire rsp_is_dup; - - `UNUSED_VAR (rsp_type) - - reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask; - reg [`NUM_THREADS-1:0] rsp_rem_mask_n; - - reg [`NUM_THREADS-1:0] req_sent_mask; - wire req_sent_all; - - wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr; - wire mbuf_full; - - wire [`NUM_THREADS-1:0][1:0] req_offset, rsp_offset; - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign req_offset[i] = req_addr[i][1:0]; + for (genvar i = 0; i < 4; ++i) begin + wire [`NUM_THREADS-1:0] addr_matches; + for (genvar j = 0; j < `NUM_THREADS; j++) begin + assign addr_matches[j] = (req_addr_w[i][0] == req_addr_w[i][j]) || ~req_tmask[j]; + end + assign dup_reqs[i] = req_tmask[0] && (& addr_matches); end - wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready)) - && (0 == req_sent_mask) // first submission only - && req_wb; // loads only + // save requet metadata into index buffer - wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; - - wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup); - - assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0]; + wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr; + wire mbuf_push, mbuf_pop, mbuf_full; + wire [REQ_INFO_WIDTH-1:0] ib_req_info; + wire [`TEX_FILTER_BITS-1:0] ib_req_filter; + wire [`TEX_STRIDE_BITS-1:0] ib_stride; + wire [3:0][`NUM_THREADS-1:0][1:0] ib_align_offs; + wire [3:0] ib_dup_reqs; + assign mbuf_push = req_valid && req_ready; + VX_index_buffer #( - .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1), + .DATAW (REQ_INFO_WIDTH + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * `NUM_THREADS * 2) + 4), .SIZE (`LSUQ_SIZE) ) req_metadata ( .clk (clk), @@ -117,189 +79,135 @@ module VX_tex_memory #( .write_addr (mbuf_waddr), .acquire_slot (mbuf_push), .read_addr (mbuf_raddr), - .write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}), - .read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}), + .write_data ({req_info, req_filter, req_stride, align_offs, dup_reqs}), + .read_data ({ib_req_info, ib_req_filter, ib_stride, ib_align_offs, ib_dup_reqs}), .release_addr (mbuf_raddr), .release_slot (mbuf_pop), .full (mbuf_full) ); - assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask)) - || (req_is_dup && dcache_req_if.ready[0]); + // can take more requests? + assign req_ready = ~mbuf_full; + + // save request addresses into fifo + + wire reqq_empty, reqq_full; + wire reqq_push, reqq_pop; + wire [3:0][`NUM_THREADS-1:0][29:0] q_req_addr; + wire [`LSUQ_ADDR_BITS-1:0] q_ib_waddr; + wire [`NW_BITS-1:0] q_req_wid; + wire [`NUM_THREADS-1:0] q_req_tmask; + wire [31:0] q_req_PC; + wire [`TEX_FILTER_BITS-1:0] q_req_filter; + wire [3:0] q_dup_reqs; + + assign reqq_push = mbuf_push; + + VX_fifo_queue #( + .DATAW (`NUM_THREADS * 4 * 30 + `LSUQ_ADDR_BITS + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + 4), + .SIZE (`LSUQ_SIZE), + .BUFFERED (1) + ) req_queue ( + .clk (clk), + .reset (reset), + .push (reqq_push), + .pop (reqq_pop), + .data_in ({req_addr_w, mbuf_waddr, req_wid, req_tmask, req_PC, req_filter, dup_reqs}), + .data_out ({q_req_addr, q_ib_waddr, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, q_dup_reqs}), + .empty (reqq_empty), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (size) + ); + + /////////////////////////////////////////////////////////////////////////// + + wire [`NUM_THREADS-1:0][29:0] texel_addr; + wire texel_valid, texel_sent, last_texel_sent; + wire texel_is_dup; + reg [1:0] texel_idx; always @(posedge clk) begin - if (reset) begin - req_sent_mask <= 0; - end else begin - if (req_sent_all) - req_sent_mask <= 0; - else - req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready); - end - end - - // need to hold the acquired tag index until the full request is submitted - reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold; - wire [`DCORE_TAG_ID_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold; - always @(posedge clk) begin - if (mbuf_push) - req_tag_hold <= mbuf_waddr; - end - - assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid; - always @(posedge clk) begin - if (mbuf_push) begin - rsp_rem_mask[mbuf_waddr] <= req_tmask; - pending_tags[mbuf_waddr] <= req_tag; - end - if (mbuf_pop_part) begin - rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n; + if (reset || last_texel_sent) begin + texel_idx <= 0; + end else if (texel_sent) begin + texel_idx <= texel_idx + 1; end end - // wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready); - wire req_ready_dep = (req_wb && ~mbuf_full); + assign texel_valid = ~reqq_empty; + assign texel_addr = q_req_addr[texel_idx]; + assign texel_is_dup = q_dup_reqs[texel_idx]; - wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; + wire is_last_texel = (texel_idx == (q_req_filter ? 3 : 0)); + assign last_texel_sent = texel_sent && is_last_texel; + + assign reqq_pop = last_texel_sent; // DCache Request - reg [`NUM_THREADS-1:0][29:0] mem_req_addr; - reg [`NUM_THREADS-1:0][3:0] mem_req_byteen; - reg [`NUM_THREADS-1:0][31:0] mem_req_data; + reg [`NUM_THREADS-1:0] texel_sent_mask; + wire [`NUM_THREADS-1:0] dcache_req_fire; - always @(*) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - mem_req_byteen[i] = {4{req_wb}}; - case (`LSU_WSIZE(req_type)) - 0: mem_req_byteen[i][req_offset[i]] = 1; - 1: begin - mem_req_byteen[i][req_offset[i]] = 1; - mem_req_byteen[i][{req_addr[i][1], 1'b1}] = 1; - end - default : mem_req_byteen[i] = {4{1'b1}}; - endcase + assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; - mem_req_data[i] = 'x; - case (req_offset[i]) - 1: mem_req_data[i][31:8] = req_data[i][23:0]; - 2: mem_req_data[i][31:16] = req_data[i][15:0]; - 3: mem_req_data[i][31:24] = req_data[i][7:0]; - default: mem_req_data[i] = req_data[i]; - endcase + assign texel_sent = (&(dcache_req_fire | texel_sent_mask | ~q_req_tmask)) + || (texel_is_dup & dcache_req_if.valid[0] & dcache_req_if.ready[0]); - mem_req_addr[i] = req_addr[i][31:2]; + always @(posedge clk) begin + if (reset) begin + texel_sent_mask <= 0; + end else begin + if (texel_sent) + texel_sent_mask <= 0; + else + texel_sent_mask <= texel_sent_mask | (dcache_req_if.valid & dcache_req_if.ready); end end - assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask; - assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}}; - assign dcache_req_if.addr = mem_req_addr; - assign dcache_req_if.byteen = mem_req_byteen; - assign dcache_req_if.data = mem_req_data; + wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~texel_is_dup}}, 1'b1}; + + assign dcache_req_if.valid = {`NUM_THREADS{texel_valid}} & q_req_tmask & dup_mask & ~texel_sent_mask; + assign dcache_req_if.rw = {`NUM_THREADS{1'b0}}; + assign dcache_req_if.addr = texel_addr; + assign dcache_req_if.byteen = {`NUM_THREADS{4'b1111}}; + assign dcache_req_if.data = 'x; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}}; + assign dcache_req_if.tag = {`NUM_THREADS{q_req_PC, q_req_wid, texel_idx, q_ib_waddr}}; `else - assign dcache_req_if.tag = {`NUM_THREADS{req_tag}}; + assign dcache_req_if.tag = {`NUM_THREADS{q_ib_waddr}}; `endif - - assign ready_in = req_ready_dep && req_sent_all; - // send store commit + // Dcache Response - //wire is_store_rsp = req_valid && ~req_wb && req_sent_all; + reg [3:0][`NUM_THREADS-1:0][31:0] rsp_texels; + reg [`LSUQ_SIZE-1:0][3:0][`NUM_THREADS-1:0] rsp_rem_mask; + wire dcache_rsp_fire; + wire [1:0] rsp_texel_idx; + wire rsp_is_dup; - // assign st_commit_if.valid = is_store_rsp; - // assign st_commit_if.wid = req_wid; - // assign st_commit_if.tmask = req_tmask; - // assign st_commit_if.PC = req_pc; - // assign st_commit_if.rd = 0; - // assign st_commit_if.wb = 0; - // assign st_commit_if.eop = 1'b1; - // assign st_commit_if.data = 0; + assign dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; - // load response formatting - - reg [`NUM_THREADS-1:0][31:0] rsp_data; - wire [`NUM_THREADS-1:0] rsp_tmask; - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; - - reg [31:0] rsp_data_shifted; - always @(*) begin - rsp_data_shifted[31:16] = src_data[31:16]; - rsp_data_shifted[15:0] = rsp_offset[i][1] ? src_data[31:16] : src_data[15:0]; - rsp_data_shifted[7:0] = rsp_offset[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0]; - end - - always @(*) begin - case (`LSU_FMT(rsp_type)) - `FMT_B: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0])); - `FMT_H: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0])); - `FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0])); - `FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0])); - default: rsp_data[i] = rsp_data_shifted; - endcase - end - end - - assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid; - - // send load commit - - wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid; - - VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1), - .RESETW (1) - ) rsp_pipe_reg ( - .clk (clk), - .reset (reset), - .enable (!load_rsp_stall), - .data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), - .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) - ); - - // Can accept new cache response? - assign dcache_rsp_if.ready = ~load_rsp_stall; - - // scope registration - `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready); - `SCOPE_ASSIGN (dcache_req_wid, req_wid); - `SCOPE_ASSIGN (dcache_req_pc, req_pc); - `SCOPE_ASSIGN (dcache_req_addr, req_addr); - `SCOPE_ASSIGN (dcache_req_rw, ~req_wb); - `SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen); - `SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data); - `SCOPE_ASSIGN (dcache_req_tag, req_tag); - `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}}); - `SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data); - `SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr); - -`ifdef DBG_PRINT_CORE_DCACHE - always @(posedge clk) begin - if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin - if ((| dcache_req_if.rw)) - $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); - else - $display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b", - $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); - end - if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin - $display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b", - $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup); - end - if (mbuf_full) begin - $write("%t: D$%0d queue-full:", $time, CORE_ID); - for (integer j = 0; j < `LSUQ_SIZE; j++) begin - $write(" tag%0d=%0h", j, pending_tags[j]); - end - $write("\n"); + wire [`NUM_THREADS-1:0] rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr][rsp_texel_idx] & ~dcache_rsp_if.valid; + always @(posedge clk) begin + if ((|dcache_req_fire) && (0 == texel_sent_mask)) begin + rsp_rem_mask[q_ib_waddr][rsp_texel_idx] <= q_req_tmask; + end + if (dcache_rsp_fire) begin + rsp_rem_mask[mbuf_raddr][rsp_texel_idx] <= rsp_rem_mask_n; end end -`endif*/ - + + assign mbuf_raddr = dcache_rsp_if.tag[`LSUQ_ADDR_BITS-1:0]; + + assign rsp_texel_idx = dcache_rsp_if.tag[`LSUQ_ADDR_BITS-1+:2]; + + assign rsp_is_dup = ib_dup_reqs[rsp_texel_idx]; + + assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr][rsp_texel_idx]: dcache_rsp_if.valid; + + assign mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n || rsp_is_dup); + endmodule diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 180f1a16..42e976d2 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -18,8 +18,8 @@ module VX_tex_unit #( VX_tex_rsp_if tex_rsp_if ); - localparam REQ_TAG_WIDTH_A = `TEX_FORMAT_BITS + `NW_BITS + 32 + `NR_BITS + 1; - localparam REQ_TAG_WIDTH_M = (2 * `NUM_THREADS * `FIXED_FRAC) + REQ_TAG_WIDTH_A; + localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + `NR_BITS + 1; + localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `FIXED_FRAC) + REQ_INFO_WIDTH_A; `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) @@ -69,23 +69,28 @@ module VX_tex_unit #( // address generation wire mem_req_valid; + wire [`NW_BITS-1:0] mem_req_wid; wire [`NUM_THREADS-1:0] mem_req_tmask; + wire [31:0] mem_req_PC; wire [`TEX_FILTER_BITS-1:0] mem_req_filter; + wire [`TEX_STRIDE_BITS-1:0] mem_req_stride; wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_u; wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_v; wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; - wire [REQ_TAG_WIDTH_A-1:0] mem_req_tag; + wire [REQ_INFO_WIDTH_A-1:0] mem_req_info; wire mem_req_ready; wire mem_rsp_valid; + wire [`NW_BITS-1:0] mem_rsp_wid; wire [`NUM_THREADS-1:0] mem_rsp_tmask; + wire [31:0] mem_rsp_PC; wire [`TEX_FILTER_BITS-1:0] mem_rsp_filter; wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data; - wire [REQ_TAG_WIDTH_M-1:0] mem_rsp_tag; + wire [REQ_INFO_WIDTH_M-1:0] mem_rsp_info; wire mem_rsp_ready; VX_tex_addr_gen #( - .REQ_TAG_WIDTH (REQ_TAG_WIDTH_A) + .REQ_INFO_WIDTH (REQ_INFO_WIDTH_A) ) tex_addr_gen ( .clk (clk), .reset (reset), @@ -93,27 +98,33 @@ module VX_tex_unit #( .valid_in (tex_req_if.valid), .ready_in (tex_req_if.ready), + .req_wid (tex_req_if.wid), + .req_tmask (tex_req_if.tmask), + .req_PC (tex_req_if.PC), + .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}), + .filter (tex_filter[tex_req_if.unit]), .wrap_u (tex_wrap_u[tex_req_if.unit]), - .wrap_v (tex_wrap_v[tex_req_if.unit]), - .req_tmask (tex_req_if.tmask), - .req_tag ({tex_format[tex_req_if.unit], tex_req_if.wid, tex_req_if.PC, tex_req_if.rd, tex_req_if.wb}), - + .wrap_v (tex_wrap_v[tex_req_if.unit]), + .base_addr (tex_addr[tex_req_if.unit]), - .log2_stride (tex_stride[tex_req_if.unit]), - .log2_width (tex_width[tex_req_if.unit]), - .log2_height (tex_height[tex_req_if.unit]), + .log_stride (tex_stride[tex_req_if.unit]), + .log_width (tex_width[tex_req_if.unit]), + .log_height (tex_height[tex_req_if.unit]), .coord_u (tex_req_if.u), .coord_v (tex_req_if.v), .lod (tex_req_if.lod), - .mem_req_valid (mem_req_valid), - .mem_req_tmask (mem_req_tmask), + .mem_req_valid (mem_req_valid), + .mem_req_wid (mem_req_wid), + .mem_req_tmask (mem_req_tmask), + .mem_req_PC (mem_req_PC), .mem_req_filter (mem_req_filter), + .mem_req_stride (mem_req_stride), .mem_req_u (mem_req_u), .mem_req_v (mem_req_v), - .mem_req_tag (mem_req_tag), + .mem_req_info (mem_req_info), .mem_req_addr (mem_req_addr), .mem_req_ready (mem_req_ready) ); @@ -121,8 +132,8 @@ module VX_tex_unit #( // retrieve texel values from memory VX_tex_memory #( - .CORE_ID (CORE_ID), - .REQ_TAG_WIDTH (REQ_TAG_WIDTH_M) + .CORE_ID (CORE_ID), + .REQ_INFO_WIDTH (REQ_INFO_WIDTH_M) ) tex_memory ( .clk (clk), .reset (reset), @@ -133,18 +144,23 @@ module VX_tex_unit #( // inputs .req_valid (mem_req_valid), + .req_wid (mem_req_wid), .req_tmask (mem_req_tmask), + .req_PC (mem_req_PC), .req_filter(mem_req_filter), + .req_stride(mem_req_stride), .req_addr (mem_req_addr), - .req_tag ({mem_req_u, mem_req_v, mem_req_tag}), + .req_info ({mem_req_u, mem_req_v, mem_req_info}), .req_ready (mem_req_ready), // outputs .rsp_valid (mem_rsp_valid), + .rsp_wid (mem_rsp_wid), .rsp_tmask (mem_rsp_tmask), + .rsp_PC (mem_rsp_PC), .rsp_filter(mem_rsp_filter), .rsp_data (mem_rsp_data), - .rsp_tag (mem_rsp_tag), + .rsp_info (mem_rsp_info), .rsp_ready (mem_rsp_ready) ); @@ -153,12 +169,10 @@ module VX_tex_unit #( wire [`TEX_FORMAT_BITS-1:0] rsp_format; wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] rsp_u; wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] rsp_v; - wire [`NW_BITS-1:0] rsp_wid; - wire [31:0] rsp_PC; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; - assign {rsp_format, rsp_u, rsp_v, rsp_wid, rsp_PC, rsp_rd, rsp_wb} = mem_rsp_tag; + assign {rsp_format, rsp_u, rsp_v, rsp_rd, rsp_wb} = mem_rsp_info; VX_tex_sampler #( .CORE_ID (CORE_ID) @@ -168,14 +182,14 @@ module VX_tex_unit #( // inputs .req_valid (mem_rsp_valid), + .req_wid (mem_rsp_wid), .req_tmask (mem_rsp_tmask), + .req_PC (mem_rsp_PC), .req_texels (mem_rsp_data), .req_filter (mem_rsp_filter), .req_format (rsp_format), .req_u (rsp_u), - .req_v (rsp_v), - .req_wid (rsp_wid), - .req_PC (rsp_PC), + .req_v (rsp_v), .req_rd (rsp_rd), .req_wb (rsp_wb), .req_ready (mem_rsp_ready),