From 20ae993e5173dd2eb5bdc2901546f67079fb75b6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 20 Mar 2021 10:50:54 -0400 Subject: [PATCH] texunit partial update --- hw/rtl/VX_csr_data.v | 20 ++-- hw/rtl/VX_csr_unit.v | 2 +- hw/rtl/VX_decode.v | 2 +- hw/rtl/VX_define.vh | 4 +- hw/rtl/VX_gpu_unit.v | 18 ++-- hw/rtl/interfaces/VX_gpu_req_if.v | 1 + hw/rtl/interfaces/VX_tex_csr_if.v | 10 +- hw/rtl/interfaces/VX_tex_req_if.v | 2 +- hw/rtl/tex_unit/VX_tex_addr_gen.v | 29 +++--- hw/rtl/tex_unit/VX_tex_clamp.v | 22 ----- hw/rtl/tex_unit/VX_tex_format.v | 2 + hw/rtl/tex_unit/VX_tex_memory.v | 22 +++-- hw/rtl/tex_unit/VX_tex_mgr.v | 19 ---- hw/rtl/tex_unit/VX_tex_pt_addr.v | 40 -------- hw/rtl/tex_unit/VX_tex_sampler.v | 62 +++++++++++- hw/rtl/tex_unit/VX_tex_unit.v | 154 +++++++++++++----------------- hw/rtl/tex_unit/VX_tex_wrap.v | 8 +- 17 files changed, 194 insertions(+), 223 deletions(-) delete mode 100644 hw/rtl/tex_unit/VX_tex_clamp.v delete mode 100644 hw/rtl/tex_unit/VX_tex_mgr.v delete mode 100644 hw/rtl/tex_unit/VX_tex_pt_addr.v diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index a8302f0f..40a042c7 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -26,7 +26,7 @@ module VX_csr_data #( input wire write_enable, input wire[`CSR_ADDR_BITS-1:0] write_addr, input wire[`NW_BITS-1:0] write_wid, - input wire[`CSR_WIDTH-1:0] write_data, + input wire[31:0] write_data, input wire busy ); @@ -63,15 +63,15 @@ module VX_csr_data #( `CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0]; `CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0]; - `CSR_SATP: csr_satp <= write_data; - `CSR_MSTATUS: csr_mstatus <= write_data; - `CSR_MEDELEG: csr_medeleg <= write_data; - `CSR_MIDELEG: csr_mideleg <= write_data; - `CSR_MIE: csr_mie <= write_data; - `CSR_MTVEC: csr_mtvec <= write_data; - `CSR_MEPC: csr_mepc <= write_data; - `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data; - `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; + `CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0]; + `CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0]; + `CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0]; + `CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0]; + `CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0]; + `CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0]; + `CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0]; + `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0]; + `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0]; default: begin if (write_addr < `CSR_TEX_BEGIN(0) || write_addr > `CSR_TEX_BEGIN(`CSR_TEX_STATES)) begin diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index c6e1fbcf..19f36fda 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -76,7 +76,7 @@ module VX_csr_unit #( .write_enable (write_enable), .write_addr (csr_addr_s1), .write_wid (csr_pipe_rsp_if.wid), - .write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]), + .write_data (csr_updated_data_s1), .busy (busy) ); diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 2fc3ffcd..b44fb346 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -361,7 +361,7 @@ module VX_decode #( `ifdef EXT_TEX_ENABLE 3'h5: begin op_type = `OP_BITS'(`GPU_TEX); - op_mod = instr[26:25]; + op_mod = `MOD_BITS'(instr[26:25]); use_rd = 1; use_rs1 = 1; use_rs2 = 1; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 1bb0e1af..243a37b3 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -392,11 +392,11 @@ `define NTEX_BITS `LOG2UP(`NUM_TEX_UNITS) `define TEX_ADDR_BITS 32 -`define TEX_FMT_BITS 3 +`define TEX_FORMAT_BITS 3 `define TEX_WRAP_BITS 2 `define TEX_WIDTH_BITS 12 `define TEX_HEIGHT_BITS 12 -`define TEX_STRIDE_BITS 12 +`define TEX_STRIDE_BITS 2 `define TEX_FILTER_BITS 1 //////////////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 86d274e3..aa12002e 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -99,6 +99,8 @@ module VX_gpu_unit #( // texture `ifdef EXT_TEX_ENABLE + + `UNUSED_VAR (gpu_req_if.op_mod) VX_tex_req_if tex_req_if; VX_tex_rsp_if tex_rsp_if; @@ -144,7 +146,13 @@ module VX_gpu_unit #( assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb; assign rsp_data = tex_rsp_if.valid ? tex_rsp_if.data : warp_ctl_data; -`else +`else + + `UNUSED_VAR (gpu_req_if.op_mod) + `UNUSED_VAR (gpu_req_if.rs2_data) + `UNUSED_VAR (gpu_req_if.rs3_data) + `UNUSED_VAR (gpu_req_if.wb) + `UNUSED_VAR (gpu_req_if.rd) assign stall_in = stall_out; assign is_warp_ctl = 1; @@ -155,13 +163,7 @@ module VX_gpu_unit #( assign rsp_PC = gpu_req_if.PC; assign rsp_rd = 0; assign rsp_wb = 0; - assign rsp_data = warp_ctl_data; - - `UNUSED_VAR (gpu_req_if.op_mod) - `UNUSED_VAR (gpu_req_if.rs2_data) - `UNUSED_VAR (gpu_req_if.rs3_data) - `UNUSED_VAR (gpu_req_if.wb) - `UNUSED_VAR (gpu_req_if.rd) + assign rsp_data = warp_ctl_data; `endif diff --git a/hw/rtl/interfaces/VX_gpu_req_if.v b/hw/rtl/interfaces/VX_gpu_req_if.v index 8ce8663d..499358d2 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_req_if.v @@ -12,6 +12,7 @@ interface VX_gpu_req_if(); wire [31:0] PC; wire [31:0] next_PC; wire [`GPU_BITS-1:0] op_type; + wire [`MOD_BITS-1:0] op_mod; wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs3_data; diff --git a/hw/rtl/interfaces/VX_tex_csr_if.v b/hw/rtl/interfaces/VX_tex_csr_if.v index 02422290..9315a59d 100644 --- a/hw/rtl/interfaces/VX_tex_csr_if.v +++ b/hw/rtl/interfaces/VX_tex_csr_if.v @@ -5,15 +5,9 @@ interface VX_tex_csr_if (); - // wire read_enable; - // wire[`CSR_ADDR_BITS-1:0] read_addr; - // wire[`NW_BITS-1:0] read_wid; - // wire[31:0] read_data; - wire write_enable; - wire[`CSR_ADDR_BITS-1:0] write_addr; - // wire[`NW_BITS-1:0] write_wid; - wire[`CSR_WIDTH-1:0] write_data; + wire [`CSR_ADDR_BITS-1:0] write_addr; + wire [31:0] write_data; endinterface diff --git a/hw/rtl/interfaces/VX_tex_req_if.v b/hw/rtl/interfaces/VX_tex_req_if.v index 48555145..d2430404 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.v +++ b/hw/rtl/interfaces/VX_tex_req_if.v @@ -12,7 +12,7 @@ interface VX_tex_req_if (); wire [`NR_BITS-1:0] rd; wire wb; - wire [`NUM_THREADS-1:0][`NTEX_BITS-1:0] unit; + wire [`NTEX_BITS-1:0] unit; wire [`NUM_THREADS-1:0][31:0] u; wire [`NUM_THREADS-1:0][31:0] v; wire [`NUM_THREADS-1:0][31:0] lod; diff --git a/hw/rtl/tex_unit/VX_tex_addr_gen.v b/hw/rtl/tex_unit/VX_tex_addr_gen.v index 62ab1431..8b0cbe17 100644 --- a/hw/rtl/tex_unit/VX_tex_addr_gen.v +++ b/hw/rtl/tex_unit/VX_tex_addr_gen.v @@ -16,36 +16,43 @@ module VX_tex_addr_gen #( // inputs - output wire [REQ_TAG_WIDTH-1:0] req_tag, + input wire [`NUM_THREADS-1:0] req_tmask, + input wire [REQ_TAG_WIDTH-1:0] req_tag, + input wire [`TEX_FILTER_BITS-1:0] filter, input wire [`TEX_WRAP_BITS-1:0] wrap_u, input wire [`TEX_WRAP_BITS-1:0] wrap_v, input wire [`TEX_ADDR_BITS-1:0] base_addr, - input wire [1:0] log2_stride, + input wire [`TEX_STRIDE_BITS-1:0] log2_stride, input wire [`TEX_WIDTH_BITS-1:0] log2_width, input wire [`TEX_HEIGHT_BITS-1:0] log2_height, - input wire [3:0] lod, - - input wire [31:0] coord_u, - input wire [31:0] coord_v, + + input wire [`NUM_THREADS-1:0][31:0] coord_u, + input wire [`NUM_THREADS-1:0][31:0] coord_v, + input wire [`NUM_THREADS-1:0][31:0] lod, // outputs - output wire [3:0] mem_req_valid, + output wire mem_req_valid, + output wire [`NUM_THREADS-1:0] mem_req_tmask, + output wire [`TEX_FILTER_BITS-1:0] mem_req_filter, output wire [REQ_TAG_WIDTH-1:0] mem_req_tag, - output wire [3:0][31:0] mem_req_addr, + output wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr, input wire mem_req_ready ); - `UNUSED_VAR (filter) + `UNUSED_PARAM (CORE_ID) + + /*`UNUSED_VAR (filter) `UNUSED_VAR (lod) wire [31:0] u, y; wire [31:0] x_offset, y_offset; wire [31:0] addr0; - // addressing mode + // addressing mode + assign x_offset = u >> (5'(FRAC_BITS) - log2_width); assign y_offset = v >> (5'(FRAC_BITS) - log2_height); @@ -65,6 +72,6 @@ module VX_tex_addr_gen #( .data_out ({mem_req_valid, mem_req_addr, mem_req_tag}) ); - assign ready_in = ~stall_out; + assign ready_in = ~stall_out;*/ endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_clamp.v b/hw/rtl/tex_unit/VX_tex_clamp.v deleted file mode 100644 index 84efd086..00000000 --- a/hw/rtl/tex_unit/VX_tex_clamp.v +++ /dev/null @@ -1,22 +0,0 @@ -`include "VX_define.vh" - -module VX_tex_addr_gen #( - parameter FRAC_BITS = 20, - parameter INT_BITS = 32 - FRAC_BITS -) ( - input wire [`TEX_WRAP_BITS-1:0] wrap_i; - input wire [31:0] coord_i, - input wire [31:0] coord_o -) - - always @(*) begin - case (wrap_i) - `ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; - `ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; - `ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; - //`ALU_SLL, - default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0]; - endcase - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_format.v b/hw/rtl/tex_unit/VX_tex_format.v index 663cf8af..8e713110 100644 --- a/hw/rtl/tex_unit/VX_tex_format.v +++ b/hw/rtl/tex_unit/VX_tex_format.v @@ -3,6 +3,8 @@ module VX_tex_format #( ) ( // TODO ) + `UNUSED_PARAM (CORE_ID) + // TODO endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index 4b714565..26abf69e 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -1,8 +1,8 @@ `include "VX_define.vh" module VX_tex_memory #( - parameter CORE_ID = 0, - parameter TAG_IN_WIDTH = 1 + parameter CORE_ID = 0, + parameter REQ_TAG_WIDTH = 1 ) ( `SCOPE_IO_VX_lsu_unit @@ -14,21 +14,25 @@ module VX_tex_memory #( VX_dcache_core_rsp_if dcache_rsp_if, // inputs - input wire [3:0] req_valid, - input wire [3:0][31:0] req_addr, - input wire [TAG_IN_WIDTH-1:0] req_tag, + input wire req_valid, + input wire [`NUM_THREADS-1:0] req_tmask, + input wire [`TEX_FILTER_BITS-1:0] req_filter, + input wire [`NUM_THREADS-1:0][3:0][31:0] req_addr, + input wire [REQ_TAG_WIDTH-1:0] req_tag, output wire req_ready, // outputs output wire rsp_valid, - output wire [3:0][31:0] rsp_data, - output wire [TAG_IN_WIDTH-1:0] rsp_tag, + output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [`TEX_FILTER_BITS-1:0] rsp_filter, + output wire [`NUM_THREADS-1:0][3:0][31:0] rsp_data, + output wire [REQ_TAG_WIDTH-1:0] rsp_tag, input wire rsp_ready ); `UNUSED_PARAM (CORE_ID) - wire req_valid; + /*wire req_valid; wire [`NUM_THREADS-1:0] req_tmask; wire [`NUM_THREADS-1:0][31:0] req_addr; wire [`LSU_BITS-1:0] req_type; @@ -296,6 +300,6 @@ module VX_tex_memory #( $write("\n"); end end -`endif +`endif*/ endmodule diff --git a/hw/rtl/tex_unit/VX_tex_mgr.v b/hw/rtl/tex_unit/VX_tex_mgr.v deleted file mode 100644 index a7de9180..00000000 --- a/hw/rtl/tex_unit/VX_tex_mgr.v +++ /dev/null @@ -1,19 +0,0 @@ -`include "VX_platform.vh" - -module VX_tex_mgr ( - input wire clk, - input wire reset -); - - //-- - -endmodule - - - - - - - - - diff --git a/hw/rtl/tex_unit/VX_tex_pt_addr.v b/hw/rtl/tex_unit/VX_tex_pt_addr.v deleted file mode 100644 index ca0045f0..00000000 --- a/hw/rtl/tex_unit/VX_tex_pt_addr.v +++ /dev/null @@ -1,40 +0,0 @@ -`include "VX_platform.vh" -`include "VX_define.vh" - -module VX_tex_pt_addr #( - parameter FRAC_BITS = 20, - parameter INT_BITS = 32 - FRAC_BITS -) ( - input wire clk, - input wire reset, - - input wire valid_in, - output wire ready_out, - - input wire [`CSR_WIDTH-1:0] tex_addr, - input wire [`CSR_WIDTH-1:0] tex_width, - input wire [`CSR_WIDTH-1:0] tex_height, - - input wire [31:0] tex_u, - input wire [31:0] tex_v, - - output wire [31:0] pt_addr, - - output wire valid_out, - input wire ready_in -); - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - - reg [31:0] x_offset; - reg [31:0] y_offset; - - assign x_offset = tex_u >> (32'(FRAC_BITS) - tex_width); - assign y_offset = tex_v >> (32'(FRAC_BITS) - tex_height); - assign pt_addr = (tex_addr << (32 - `CSR_WIDTH)) + x_offset + (y_offset << tex_width); - - assign valid_out = valid_in; - assign ready_out = ready_in; - -endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index 06e2fde6..60a1ba4c 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -4,9 +4,67 @@ module VX_tex_sampler #( parameter CORE_ID = 0 ) ( input wire clk, - input wire reset + input wire reset, + + // inputs + input wire req_valid, + input wire [`NW_BITS-1:0] req_wid, + input wire [`NUM_THREADS-1:0] req_tmask, + input wire [31:0] req_PC, + input wire [`NR_BITS-1:0] req_rd, + input wire req_wb, + input wire [`TEX_FILTER_BITS-1:0] req_filter, + input wire [`TEX_FORMAT_BITS-1:0] req_format, + input wire [`NUM_THREADS-1:0][3:0][31:0] req_texels, + output wire req_ready, + + // ouputs + output wire rsp_valid, + output wire [`NW_BITS-1:0] rsp_wid, + output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [31:0] rsp_PC, + output wire [`NR_BITS-1:0] rsp_rd, + output wire rsp_wb, + output wire [`NUM_THREADS-1:0][31:0] rsp_data, + input wire rsp_ready ); - // TODO + `UNUSED_PARAM (CORE_ID) + + /* + assign tex_req_if.ready = (& pt_addr_ready); + + assign lsu_req_if.valid = (& pt_addr_valid); + + assign lsu_req_if.wid = tex_req_if.wid; + assign lsu_req_if.tmask = tex_req_if.tmask; + assign lsu_req_if.PC = tex_req_if.PC; + assign lsu_req_if.rd = tex_req_if.rd; + assign lsu_req_if.wb = tex_req_if.wb; + assign lsu_req_if.offset = 32'h0000; + assign lsu_req_if.op_type = `OP_BITS'({1'b0, 3'b000}); //func3 for word load?? + assign lsu_req_if.store_data = {`NUM_THREADS{32'h0000}}; + + // wait buffer for fragments / replace with cache/state fragment fifo for bilerp + // no filtering for point sampling -> directly from dcache to output response + + VX_pipe_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .RESETW (1) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}), + .data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data}) + ); + + // output + assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid; + + // can accept new request? + assign stall_in = stall_out; + + assign ld_commit_if.ready = ~stall_in;*/ endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index 543b3f31..292fabe0 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -19,22 +19,13 @@ module VX_tex_unit #( VX_tex_rsp_if tex_rsp_if ); - localparam MEM_REQ_TAGW = `NW_BITS + 32 + 1 + `NR_BITS + `NTEX_BITS; + localparam REQ_TAG_WIDTH = `TEX_FORMAT_BITS + `NW_BITS + 32 + `NR_BITS + 1; `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) - wire rsp_valid; - wire [`NW_BITS-1:0] rsp_wid; - wire [`NUM_THREADS-1:0] rsp_tmask; - wire [31:0] rsp_PC; - wire [`NR_BITS-1:0] rsp_rd; - wire rsp_wb; - wire [`NUM_THREADS-1:0][31:0] rsp_data; - wire stall_in, stall_out; - reg [`TEX_ADDR_BITS-1:0] tex_addr [`NUM_TEX_UNITS-1: 0]; - reg [`TEX_FMT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1: 0]; + reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1: 0]; reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1: 0]; reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1: 0]; reg [`TEX_STRIDE_BITS-1:0] tex_stride [`NUM_TEX_UNITS-1: 0]; @@ -58,14 +49,14 @@ module VX_tex_unit #( end begin if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) - `CSR_TEX_ADDR(i) : tex_addr[i] <= tex_csr_if.write_data; - `CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data; - `CSR_TEX_WIDTH(i) : tex_width[i] <= tex_csr_if.write_data; - `CSR_TEX_HEIGHT(i) : tex_height[i] <= tex_csr_if.write_data; - `CSR_TEX_STRIDE(i) : tex_stride[i] <= tex_csr_if.write_data; - `CSR_TEX_WRAP_U(i) : tex_wrap_u[i] <= tex_csr_if.write_data; - `CSR_TEX_WRAP_V(i) : tex_wrap_v[i] <= tex_csr_if.write_data; - `CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data; + `CSR_TEX_ADDR(i) : tex_addr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; + `CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; + `CSR_TEX_WIDTH(i) : tex_width[i] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0]; + `CSR_TEX_HEIGHT(i) : tex_height[i] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0]; + `CSR_TEX_STRIDE(i) : tex_stride[i] <= tex_csr_if.write_data[`TEX_STRIDE_BITS-1:0]; + `CSR_TEX_WRAP_U(i) : tex_wrap_u[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0]; + `CSR_TEX_WRAP_V(i) : tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0]; + `CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; default: assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0) && tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)); @@ -77,18 +68,23 @@ module VX_tex_unit #( // address generation - wire [3:0] mem_req_valid; - wire [3:0][31:0] mem_req_addr; - wire [TAG_IN_WIDTH-1:0] mem_req_tag; + wire mem_req_valid; + wire [`NUM_THREADS-1:0] mem_req_tmask; + wire [`TEX_FILTER_BITS-1:0] mem_req_filter; + wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; + wire [REQ_TAG_WIDTH-1:0] mem_req_tag; wire mem_req_ready; wire mem_rsp_valid; - wire [3:0][31:0] mem_rsp_data; - wire [TAG_IN_WIDTH-1:0] mem_rsp_tag; + wire [`NUM_THREADS-1:0] mem_rsp_tmask; + wire [`TEX_FILTER_BITS-1:0] mem_rsp_filter; + wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data; + wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; VX_tex_addr_gen #( - .FRAC_BITS(20) + .FRAC_BITS (20), + .REQ_TAG_WIDTH (REQ_TAG_WIDTH) ) tex_addr_gen ( .clk (clk), .reset (reset), @@ -96,10 +92,11 @@ module VX_tex_unit #( .valid_in (tex_req_if.valid), .ready_in (tex_req_if.ready), - .req_tag ({tex_req_if.wid, tex_req_if.PC, tex_req_if.rd, tex_req_if.wb}), .filter (tex_filter[tex_req_if.unit]), - .wrap_u (tex_wrap_ufilter[tex_req_if.unit]), + .wrap_u (tex_wrap_u[tex_req_if.unit]), .wrap_v (tex_wrap_v[tex_req_if.unit]), + .req_tmask (tex_req_if.tmask), + .req_tag ({tex_format[tex_req_if.unit], tex_req_if.wid, tex_req_if.PC, tex_req_if.rd, tex_req_if.wb}), .base_addr (tex_addr[tex_req_if.unit]), .log2_stride (tex_stride[tex_req_if.unit]), @@ -111,6 +108,8 @@ module VX_tex_unit #( .lod (tex_req_if.lod), .mem_req_valid (mem_req_valid), + .mem_req_tmask (mem_req_tmask), + .mem_req_filter (mem_req_filter), .mem_req_tag (mem_req_tag), .mem_req_addr (mem_req_addr), .mem_req_ready (mem_req_ready) @@ -120,7 +119,7 @@ module VX_tex_unit #( VX_tex_memory #( .CORE_ID (CORE_ID), - .REQ_TAG_WIDTH (MEM_REQ_TAGW) + .REQ_TAG_WIDTH (REQ_TAG_WIDTH) ) tex_memory ( .clk (clk), .reset (reset), @@ -130,77 +129,60 @@ module VX_tex_unit #( .dcache_rsp_if (dcache_rsp_if), // inputs - req_valid (mem_req_valid), - req_addr (mem_req_addr), - req_tag (mem_req_tag), - req_ready (mem_req_ready), + .req_valid (mem_req_valid), + .req_tmask (mem_req_tmask), + .req_filter(mem_req_filter), + .req_addr (mem_req_addr), + .req_tag (mem_req_tag), + .req_ready (mem_req_ready), // outputs - rsp_valid (mem_rsp_valid), - rsp_texel (mem_rsp_data), - rsp_tag (mem_rsp_tag), - rsp_ready (mem_rsp_ready) + .rsp_valid (mem_rsp_valid), + .rsp_tmask (mem_rsp_tmask), + .rsp_filter(mem_rsp_filter), + .rsp_data (mem_rsp_data), + .rsp_tag (mem_rsp_tag), + .rsp_ready (mem_rsp_ready) ); // apply sampler + wire [`TEX_FORMAT_BITS-1:0] rsp_format; + wire [`NW_BITS-1:0] rsp_wid; + wire [31:0] rsp_PC; + wire [`NR_BITS-1:0] rsp_rd; + wire rsp_wb; + + assign {rsp_format, rsp_wid, rsp_PC, rsp_rd, rsp_wb} = mem_rsp_tag; + VX_tex_sampler #( .CORE_ID (CORE_ID) ) tex_sampler ( .clk (clk), - .reset (reset) + .reset (reset), // inputs - //.valid_in (mem_rsp_valid), - //.texel (mem_rsp_data), - //.req_wid (mem_rsp_tag), - //.req_PC (mem_rsp_tag), - //.format (mem_rsp_tag), - //.ready_in (mem_rsp_ready), - ); + .req_valid (mem_rsp_valid), + .req_tmask (mem_rsp_tmask), + .req_texels (mem_rsp_data), + .req_filter (mem_rsp_filter), + .req_format (rsp_format), + .req_wid (rsp_wid), + .req_PC (rsp_PC), + .req_rd (rsp_rd), + .req_wb (rsp_wb), + .req_ready (mem_rsp_ready), - assign tex_req_if.ready = (& pt_addr_ready); - - assign lsu_req_if.valid = (& pt_addr_valid); - - assign lsu_req_if.wid = tex_req_if.wid; - assign lsu_req_if.tmask = tex_req_if.tmask; - assign lsu_req_if.PC = tex_req_if.PC; - assign lsu_req_if.rd = tex_req_if.rd; - assign lsu_req_if.wb = tex_req_if.wb; - assign lsu_req_if.offset = 32'h0000; - assign lsu_req_if.op_type = `OP_BITS'({1'b0, 3'b000}); //func3 for word load?? - assign lsu_req_if.store_data = {`NUM_THREADS{32'h0000}}; - - // wait buffer for fragments / replace with cache/state fragment fifo for bilerp - // no filtering for point sampling -> directly from dcache to output response - - assign rsp_valid = ld_commit_if.valid; - assign rsp_wid = ld_commit_if.wid; - assign rsp_tmask = ld_commit_if.tmask; - assign rsp_PC = ld_commit_if.PC; - assign rsp_rd = ld_commit_if.rd; - assign rsp_wb = ld_commit_if.wb; - assign rsp_data = ld_commit_if.data; - - VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), - .RESETW (1) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .enable (~stall_out), - .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}), - .data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data}) - ); - - // output - assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid; - - // can accept new request? - assign stall_in = stall_out; - - assign ld_commit_if.ready = ~stall_in; + // outputs + .rsp_valid (tex_rsp_if.valid), + .rsp_wid (tex_rsp_if.wid), + .rsp_tmask (tex_rsp_if.tmask), + .rsp_PC (tex_rsp_if.PC), + .rsp_rd (tex_rsp_if.rd), + .rsp_wb (tex_rsp_if.wb), + .rsp_data (tex_rsp_if.data), + .rsp_ready (tex_rsp_if.ready) + ); `ifdef DBG_PRINT_TEX always @(posedge clk) begin diff --git a/hw/rtl/tex_unit/VX_tex_wrap.v b/hw/rtl/tex_unit/VX_tex_wrap.v index 01458269..3fce0543 100644 --- a/hw/rtl/tex_unit/VX_tex_wrap.v +++ b/hw/rtl/tex_unit/VX_tex_wrap.v @@ -1,7 +1,7 @@ `include "VX_define.vh" module VX_tex_wrap #( - parameter CORE_ID = 0, + parameter CORE_ID = 0, parameter FRAC_BITS = 20, parameter INT_BITS = 32 - FRAC_BITS ) ( @@ -10,7 +10,9 @@ module VX_tex_wrap #( input wire [31:0] coord_o ) - always @(*) begin + `UNUSED_PARAM (CORE_ID) + + /*always @(*) begin case (wrap_i) `ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; `ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; @@ -18,6 +20,6 @@ module VX_tex_wrap #( //`ALU_SLL, default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0]; endcase - end + end*/ endmodule \ No newline at end of file