diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index dda0b297..ca665f43 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -35,17 +35,17 @@ module VX_tex_addr #( // outputs - output wire mem_req_valid, - output wire [`NW_BITS-1:0] mem_req_wid, - output wire [`NUM_THREADS-1:0] mem_req_tmask, - output wire [31:0] mem_req_PC, - output wire [`TEX_FILTER_BITS-1:0] mem_req_filter, - output wire [`TEX_STRIDE_BITS-1:0] mem_req_stride, - output wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr, - output wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_u, - output wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_v, - output wire [REQ_INFO_WIDTH-1:0] mem_req_info, - input wire mem_req_ready + output wire rsp_valid, + output wire [`NW_BITS-1:0] rsp_wid, + output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [31:0] rsp_PC, + output wire [`TEX_FILTER_BITS-1:0] rsp_filter, + output wire [`TEX_STRIDE_BITS-1:0] rsp_stride, + output wire [`NUM_THREADS-1:0][3:0][31:0] rsp_addr, + output wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_u, + output wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_v, + output wire [REQ_INFO_WIDTH-1:0] rsp_info, + input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) @@ -130,34 +130,34 @@ module VX_tex_addr #( assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride; end - wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] u0, v0; + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v; for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign u0[i] = clamped_u[i][0]; - assign v0[i] = clamped_v[i][0]; + assign blend_u[i] = clamped_u[i][0][`BLEND_FRAC-1:0]; + assign blend_v[i] = clamped_v[i][0][`BLEND_FRAC-1:0]; end - wire stall_out = mem_req_valid && ~mem_req_ready; + wire stall_out = rsp_valid && ~rsp_ready; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `FIXED_FRAC) + REQ_INFO_WIDTH), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, u0, v0, req_info}), - .data_out ({mem_req_valid, mem_req_wid, mem_req_tmask, mem_req_PC, mem_req_filter, mem_req_stride, mem_req_addr, mem_req_u, mem_req_v, mem_req_info}) + .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, blend_u, blend_v, req_info}), + .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info}) ); assign ready_in = ~stall_out; `ifdef DBG_PRINT_TEX always @(posedge clk) begin - if (mem_req_valid && mem_req_ready) begin + if (rsp_valid && rsp_ready) begin $write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, filter=%0d, tride=%0d, addr=", - $time, CORE_ID, mem_req_wid, mem_req_PC, mem_req_tmask, mem_req_filter, mem_req_stride); - `PRINT_ARRAY2D(mem_req_addr, 4, `NUM_THREADS); + $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride); + `PRINT_ARRAY2D(rsp_addr, 4, `NUM_THREADS); $write("\n"); end end diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index a77293ac..67eb0ac3 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -273,9 +273,15 @@ module VX_tex_memory #( `PRINT_ARRAY1D(rsp_data_qual, `NUM_THREADS); $write("\n"); end + if (req_valid && req_ready) begin + $write("%t: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=", + $time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride); + `PRINT_ARRAY2D(req_addr, 4, `NUM_THREADS); + $write("\n"); + end if (rsp_valid && rsp_ready) begin - $write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, filter=%0d, data=", - $time, CORE_ID, rsp_wid, rsp_PC, rsp_filter); + $write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, filter=%0d, data=", + $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter); `PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS); $write("\n"); end diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index d8cb2ff9..8f8d04c5 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -15,9 +15,9 @@ module VX_tex_sampler #( input wire req_wb, input wire [`TEX_FILTER_BITS-1:0] req_filter, input wire [`TEX_FORMAT_BITS-1:0] req_format, - input wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] req_u, - input wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] req_v, - input wire [`NUM_THREADS-1:0][3:0][31:0] req_texels, + input wire [`NUM_THREADS-1:0][3:0][31:0] req_data, + input wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] req_blend_u, + input wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] req_blend_v, output wire req_ready, // ouputs @@ -33,7 +33,7 @@ module VX_tex_sampler #( `UNUSED_PARAM (CORE_ID) - wire [`NUM_THREADS-1:0][31:0] req_data; + wire [`NUM_THREADS-1:0][31:0] result; wire stall_out; @@ -42,22 +42,19 @@ module VX_tex_sampler #( wire [3:0][31:0] fmt_texels; wire [31:0] texel_ul, texel_uh, texel_v; - wire [`BLEND_FRAC-1:0] blend_u = req_u[i][`BLEND_FRAC-1:0]; - wire [`BLEND_FRAC-1:0] blend_v = req_v[i][`BLEND_FRAC-1:0]; - for (genvar j = 0; j < 4; j++) begin VX_tex_format #( .CORE_ID (CORE_ID) ) tex_format ( .format (req_format), - .texel_in (req_texels[i][j]), + .texel_in (req_data[i][j]), .texel_out (fmt_texels[j]) ); end VX_tex_lerp #( ) tex_lerp_ul ( - .blend (blend_u), + .blend (req_blend_u[i]), .in1 (fmt_texels[0]), .in2 (fmt_texels[1]), .out (texel_ul) @@ -65,7 +62,7 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_uh ( - .blend (blend_u), + .blend (req_blend_u[i]), .in1 (fmt_texels[2]), .in2 (fmt_texels[3]), .out (texel_uh) @@ -73,13 +70,13 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_v ( - .blend (blend_v), + .blend (req_blend_v[i]), .in1 (texel_ul), .in2 (texel_uh), .out (texel_v) ); - assign req_data[i] = req_filter ? texel_v : fmt_texels[0]; + assign result[i] = req_filter ? texel_v : fmt_texels[0]; end assign stall_out = rsp_valid && ~rsp_ready; @@ -91,11 +88,32 @@ module VX_tex_sampler #( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, req_data}), + .data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, result}), .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}) ); // can accept new request? - assign req_ready = ~stall_out; + assign req_ready = ~stall_out; + +`ifdef DBG_PRINT_TEX + always @(posedge clk) begin + if (req_valid && req_ready) begin + $write("%t: core%0d-sampler-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, format=%0d, data=", + $time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_format); + `PRINT_ARRAY2D(req_data, 4, `NUM_THREADS); + $write("u0="); + `PRINT_ARRAY2D(req_u0, 4, `NUM_THREADS); + $write("v0="); + `PRINT_ARRAY2D(req_v0, 4, `NUM_THREADS); + $write("\n"); + end + if (rsp_valid && rsp_ready) begin + $write("%t: core%0d-sampler-rsp: wid=%0d, PC=%0h, tmask=%b, data=", + $time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask); + `PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS); + $write("\n"); + end + end +`endif endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 921c49c2..2fe8a42f 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -19,7 +19,7 @@ module VX_tex_unit #( ); localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + `NR_BITS + 1; - localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `FIXED_FRAC) + REQ_INFO_WIDTH_A; + localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH_A; `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) @@ -93,8 +93,8 @@ module VX_tex_unit #( wire [31:0] mem_req_PC; wire [`TEX_FILTER_BITS-1:0] mem_req_filter; wire [`TEX_STRIDE_BITS-1:0] mem_req_stride; - wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_u; - wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] mem_req_v; + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blend_u; + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blend_v; wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; wire [REQ_INFO_WIDTH_A-1:0] mem_req_info; wire mem_req_ready; @@ -111,41 +111,41 @@ module VX_tex_unit #( VX_tex_addr #( .REQ_INFO_WIDTH (REQ_INFO_WIDTH_A) ) tex_addr ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), - .valid_in (tex_req_if.valid), - .ready_in (tex_req_if.ready), + .valid_in (tex_req_if.valid), + .ready_in (tex_req_if.ready), - .req_wid (tex_req_if.wid), - .req_tmask (tex_req_if.tmask), - .req_PC (tex_req_if.PC), - .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}), + .req_wid (tex_req_if.wid), + .req_tmask (tex_req_if.tmask), + .req_PC (tex_req_if.PC), + .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb}), - .format (tex_format[tex_req_if.unit]), - .filter (tex_filter[tex_req_if.unit]), - .wrap_u (tex_wrap_u[tex_req_if.unit]), - .wrap_v (tex_wrap_v[tex_req_if.unit]), + .format (tex_format[tex_req_if.unit]), + .filter (tex_filter[tex_req_if.unit]), + .wrap_u (tex_wrap_u[tex_req_if.unit]), + .wrap_v (tex_wrap_v[tex_req_if.unit]), - .base_addr (tex_baddr[tex_req_if.unit]), - .mip_offsets (tex_mipoffs), - .log_widths (tex_widths), - .log_heights (tex_heights), + .base_addr (tex_baddr[tex_req_if.unit]), + .mip_offsets(tex_mipoffs), + .log_widths (tex_widths), + .log_heights(tex_heights), - .coord_u (tex_req_if.u), - .coord_v (tex_req_if.v), + .coord_u (tex_req_if.u), + .coord_v (tex_req_if.v), - .mem_req_valid (mem_req_valid), - .mem_req_wid (mem_req_wid), - .mem_req_tmask (mem_req_tmask), - .mem_req_PC (mem_req_PC), - .mem_req_filter (mem_req_filter), - .mem_req_stride (mem_req_stride), - .mem_req_addr (mem_req_addr), - .mem_req_u (mem_req_u), - .mem_req_v (mem_req_v), - .mem_req_info (mem_req_info), - .mem_req_ready (mem_req_ready) + .rsp_valid (mem_req_valid), + .rsp_wid (mem_req_wid), + .rsp_tmask (mem_req_tmask), + .rsp_PC (mem_req_PC), + .rsp_filter (mem_req_filter), + .rsp_stride (mem_req_stride), + .rsp_addr (mem_req_addr), + .rsp_blend_u(mem_req_blend_u), + .rsp_blend_v(mem_req_blend_v), + .rsp_info (mem_req_info), + .rsp_ready (mem_req_ready) ); // retrieve texel values from memory @@ -168,7 +168,7 @@ module VX_tex_unit #( .req_filter(mem_req_filter), .req_stride(mem_req_stride), .req_addr (mem_req_addr), - .req_info ({mem_req_u, mem_req_v, mem_req_info}), + .req_info ({mem_req_blend_u, mem_req_blend_v, mem_req_info}), .req_ready (mem_req_ready), // outputs @@ -184,13 +184,12 @@ module VX_tex_unit #( // apply sampler + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] rsp_blend_u, rsp_blend_v; wire [`TEX_FORMAT_BITS-1:0] rsp_format; - wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] rsp_u; - wire [`NUM_THREADS-1:0][`FIXED_FRAC-1:0] rsp_v; wire [`NR_BITS-1:0] rsp_rd; wire rsp_wb; - assign {rsp_u, rsp_v, rsp_format, rsp_rd, rsp_wb} = mem_rsp_info; + assign {rsp_blend_u, rsp_blend_v, rsp_format, rsp_rd, rsp_wb} = mem_rsp_info; VX_tex_sampler #( .CORE_ID (CORE_ID) @@ -203,11 +202,11 @@ module VX_tex_unit #( .req_wid (mem_rsp_wid), .req_tmask (mem_rsp_tmask), .req_PC (mem_rsp_PC), - .req_texels (mem_rsp_data), + .req_data (mem_rsp_data), .req_filter (mem_rsp_filter), .req_format (rsp_format), - .req_u (rsp_u), - .req_v (rsp_v), + .req_blend_u(rsp_blend_u), + .req_blend_v(rsp_blend_v), .req_rd (rsp_rd), .req_wb (rsp_wb), .req_ready (mem_rsp_ready),