From 4683def6dd45c49a4d8d10e66d1f4abae8d66048 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 4 Apr 2021 07:59:43 -0700 Subject: [PATCH] timimg fixes --- hw/rtl/tex_unit/VX_tex_addr.v | 74 ++++++++++++++++++++------------ hw/rtl/tex_unit/VX_tex_define.vh | 3 +- hw/rtl/tex_unit/VX_tex_lerp.v | 30 +++---------- hw/rtl/tex_unit/VX_tex_sampler.v | 54 ++++++++++++++++------- hw/rtl/tex_unit/VX_tex_unit.v | 12 +++--- hw/syn/quartus/project.tcl | 2 +- 6 files changed, 99 insertions(+), 76 deletions(-) diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index ca665f43..c1d727a7 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -27,8 +27,8 @@ module VX_tex_addr #( input wire [`TEX_ADDR_BITS-1:0] base_addr, input wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] mip_offsets, - input wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] log_widths, - input wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] log_heights, + input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths, + input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights, input wire [`NUM_THREADS-1:0][31:0] coord_u, input wire [`NUM_THREADS-1:0][31:0] coord_v, @@ -50,9 +50,20 @@ module VX_tex_addr #( `UNUSED_PARAM (CORE_ID) - wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u; - wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_v; - wire [`TEX_STRIDE_BITS-1:0] log_stride; + wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u, clamped_v, clamped_u_s0, clamped_v_s0; + wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; + wire [`NUM_THREADS-1:0][31:0] mip_addr, mip_addr_s0; + + wire valid_in_s0; + wire [`NW_BITS-1:0] req_wid_s0; + wire [`NUM_THREADS-1:0] req_tmask_s0; + wire [31:0] req_PC_s0; + wire [REQ_INFO_WIDTH-1:0] req_info_s0; + wire [`TEX_FILTER_BITS-1:0] filter_s0; + wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths_s0; + wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights_s0; + + wire stall_out; // stride @@ -66,9 +77,7 @@ module VX_tex_addr #( // addressing mode for (genvar i = 0; i < `NUM_THREADS; ++i) begin - - wire [31:0] fu[1:0]; - wire [31:0] fv[1:0]; + wire [1:0][31:0] fu, fv; assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0); assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0); @@ -107,47 +116,56 @@ module VX_tex_addr #( .coord_i (fv[1]), .coord_o (clamped_v[i][1]) ); + + assign mip_addr[i] = base_addr + 32'(mip_offsets[i]); end + + VX_pipe_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + `NUM_THREADS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), + .RESETW (1) + ) pipe_reg0 ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, req_info, log_widths, log_heights, mip_addr, clamped_u, clamped_v}), + .data_out ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, req_info_s0, log_widths_s0, log_heights_s0, mip_addr_s0, clamped_u_s0, clamped_v_s0}) + ); // addresses generation + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v; wire [`NUM_THREADS-1:0][3:0][31:0] addr; for (genvar i = 0; i < `NUM_THREADS; ++i) begin + wire [1:0][`FIXED_INT-1:0] x, y; - wire [`FIXED_FRAC-1:0] x [1:0]; - wire [`FIXED_FRAC-1:0] y [1:0]; + assign x[0] = `FIXED_INT'(clamped_u_s0[i][0] >> ((`FIXED_FRAC) - log_widths_s0[i])); + assign x[1] = `FIXED_INT'(clamped_u_s0[i][1] >> ((`FIXED_FRAC) - log_widths_s0[i])); + assign y[0] = `FIXED_INT'(clamped_v_s0[i][0] >> ((`FIXED_FRAC) - log_heights_s0[i])); + assign y[1] = `FIXED_INT'(clamped_v_s0[i][1] >> ((`FIXED_FRAC) - log_heights_s0[i])); - assign x[0] = clamped_u[i][0] >> ((`FIXED_FRAC) - log_widths[i]); - assign x[1] = clamped_u[i][1] >> ((`FIXED_FRAC) - log_widths[i]); - - assign y[0] = clamped_v[i][0] >> ((`FIXED_FRAC) - log_heights[i]); - assign y[1] = clamped_v[i][1] >> ((`FIXED_FRAC) - log_heights[i]); - - assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride; - assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride; - assign addr[i][2] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[1]) << log_widths[i])) << log_stride; - assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride; + assign addr[i][0] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0; + assign addr[i][1] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0; + assign addr[i][2] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0; + assign addr[i][3] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0; end - wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign blend_u[i] = clamped_u[i][0][`BLEND_FRAC-1:0]; - assign blend_v[i] = clamped_v[i][0][`BLEND_FRAC-1:0]; + assign blend_u[i] = clamped_u_s0[i][0][`BLEND_FRAC-1:0]; + assign blend_v[i] = clamped_v_s0[i][0][`BLEND_FRAC-1:0]; end - wire stall_out = rsp_valid && ~rsp_ready; + assign stall_out = rsp_valid && ~rsp_ready; VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH), .RESETW (1) - ) pipe_reg ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, blend_u, blend_v, req_info}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info}) + .data_in ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, addr, blend_u, blend_v, req_info_s0}), + .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info}) ); assign ready_in = ~stall_out; diff --git a/hw/rtl/tex_unit/VX_tex_define.vh b/hw/rtl/tex_unit/VX_tex_define.vh index 380ee659..e88d3fb1 100644 --- a/hw/rtl/tex_unit/VX_tex_define.vh +++ b/hw/rtl/tex_unit/VX_tex_define.vh @@ -14,8 +14,7 @@ `define TEX_ADDR_BITS 32 `define TEX_FORMAT_BITS 3 `define TEX_WRAP_BITS 2 -`define TEX_WIDTH_BITS 4 -`define TEX_HEIGHT_BITS 4 +`define TEX_DIM_BITS 4 `define TEX_FILTER_BITS 1 `define TEX_MIPOFF_BITS (2*12+1) diff --git a/hw/rtl/tex_unit/VX_tex_lerp.v b/hw/rtl/tex_unit/VX_tex_lerp.v index d2af2850..07a6afb4 100644 --- a/hw/rtl/tex_unit/VX_tex_lerp.v +++ b/hw/rtl/tex_unit/VX_tex_lerp.v @@ -7,29 +7,11 @@ module VX_tex_lerp #( input wire [31:0] in2, output wire [31:0] out ); - wire [63:0] in1_w, in2_w; - wire [63:0] lerp1, lerp2; - - `UNUSED_VAR (lerp1) - `UNUSED_VAR (lerp2) - - assign in1_w[15:00] = {8'h00, in1[07:00]}; - assign in1_w[31:16] = {8'h00, in1[15:08]}; - assign in1_w[47:32] = {8'h00, in1[23:16]}; - assign in1_w[63:48] = {8'h00, in1[31:24]}; - - assign in2_w[15:00] = {8'h00, in2[07:00]}; - assign in2_w[31:16] = {8'h00, in2[15:08]}; - assign in2_w[47:32] = {8'h00, in2[23:16]}; - assign in2_w[63:48] = {8'h00, in2[31:24]}; - - assign lerp1 = (in2_w - in1_w) * blend; - - assign lerp2 = in1_w + {8'h00,lerp1[63:56], 8'h00,lerp1[47:40], 8'h00,lerp1[31:24], 8'h00,lerp1[15:8]}; - - assign out[07:00] = lerp2[07:00]; - assign out[15:08] = lerp2[23:16]; - assign out[23:16] = lerp2[39:32]; - assign out[31:24] = lerp2[55:48]; + for (genvar i = 0; i < 4; ++i) begin + wire [8:0] m1 = (8'hff - blend); + wire [16:0] sum = in1[i*8+:8] * blend + in2[i*8+:8] * m1; + `UNUSED_VAR (sum) + assign out[i*8+:8] = sum[15:8]; + end endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index 328511d6..f912631c 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -32,15 +32,25 @@ module VX_tex_sampler #( ); `UNUSED_PARAM (CORE_ID) - - wire [`NUM_THREADS-1:0][31:0] result; + + wire [`NUM_THREADS-1:0][31:0] texel_ul, texel_uh; + wire [`NUM_THREADS-1:0][31:0] texel_ul_s0, texel_uh_s0; + wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_v_qual, blend_v_s0; + wire [`NUM_THREADS-1:0][31:0] texel_v; + + wire req_valid_s0; + wire [`NW_BITS-1:0] req_wid_s0; + wire [`NUM_THREADS-1:0] req_tmask_s0; + wire [31:0] req_PC_s0; + wire [`NR_BITS-1:0] req_rd_s0; + wire req_wb_s0; wire stall_out; for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [3:0][31:0] fmt_texels; - wire [31:0] texel_ul, texel_uh, texel_v; + wire [3:0][31:0] fmt_texels; + wire [31:0] texel_ul_unqual; for (genvar j = 0; j < 4; j++) begin VX_tex_format #( @@ -57,7 +67,7 @@ module VX_tex_sampler #( .blend (req_blend_u[i]), .in1 (fmt_texels[0]), .in2 (fmt_texels[1]), - .out (texel_ul) + .out (texel_ul_unqual) ); VX_tex_lerp #( @@ -65,18 +75,32 @@ module VX_tex_sampler #( .blend (req_blend_u[i]), .in1 (fmt_texels[2]), .in2 (fmt_texels[3]), - .out (texel_uh) + .out (texel_uh[i]) ); + assign blend_v_qual[i] = req_filter ? `BLEND_FRAC'(0) : req_blend_v[i]; + assign texel_ul[i] = req_filter ? fmt_texels[0] : texel_ul_unqual; + end + + VX_pipe_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `BLEND_FRAC) + (2 * `NUM_THREADS * 32)), + .RESETW (1) + ) pipe_reg0 ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, blend_v_qual, texel_ul, texel_uh}), + .data_out ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) + ); + + for (genvar i = 0; i < `NUM_THREADS; i++) begin VX_tex_lerp #( ) tex_lerp_v ( - .blend (req_blend_v[i]), - .in1 (texel_ul), - .in2 (texel_uh), - .out (texel_v) + .blend (blend_v_s0[i]), + .in1 (texel_ul_s0[i]), + .in2 (texel_uh_s0[i]), + .out (texel_v[i]) ); - - assign result[i] = req_filter ? texel_v : fmt_texels[0]; end assign stall_out = rsp_valid && ~rsp_ready; @@ -84,12 +108,12 @@ module VX_tex_sampler #( VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), .RESETW (1) - ) pipe_reg ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, result}), - .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}) + .data_in ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, texel_v}), + .data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}) ); // can accept new request? diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index da52f07c..93230bad 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -25,8 +25,8 @@ module VX_tex_unit #( `UNUSED_VAR (reset) reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; + reg [`TEX_DIM_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; + reg [`TEX_DIM_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0]; @@ -58,10 +58,10 @@ module VX_tex_unit #( tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; end `CSR_TEX_WIDTH(i) : begin - tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0]; + tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; end `CSR_TEX_HEIGHT(i) : begin - tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0]; + tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; end default: assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0) @@ -74,8 +74,8 @@ module VX_tex_unit #( // mipmap attributes wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs; - wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] tex_widths; - wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights; + wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_widths; + wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_heights; for (genvar i = 0; i < `NUM_THREADS; ++i) begin wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0]; diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index fccc5439..9c9b6ca2 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -45,7 +45,7 @@ set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON #set_global_assignment -name USE_HIGH_SPEED_ADDER ON #set_global_assignment -name MUX_RESTRUCTURE ON -#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA #set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" #set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS #set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0