diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 8a7d3657..d9d56b7e 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -120,20 +120,12 @@ case $DRIVER in ;; esac -case $APP in - basic) - APP_PATH=$VORTEX_HOME/driver/tests/basic - ;; - demo) - APP_PATH=$VORTEX_HOME/driver/tests/demo - ;; - dogfood) - APP_PATH=$VORTEX_HOME/driver/tests/dogfood - ;; - *) - APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP - ;; -esac +if [ -d "$VORTEX_HOME/driver/tests/$APP" ]; +then + APP_PATH=$VORTEX_HOME/driver/tests/$APP +else + APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP +fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG" diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 47611fe5..932dac05 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index c665ffcb..06d5a93f 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -16,6 +16,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO diff --git a/driver/tests/tex_demo/demo b/driver/tests/tex_demo/demo index 922905c5..69c2c6e8 100755 Binary files a/driver/tests/tex_demo/demo and b/driver/tests/tex_demo/demo differ diff --git a/driver/tests/tex_demo/demo.cpp b/driver/tests/tex_demo/demo.cpp index 229734a5..a28d675d 100644 --- a/driver/tests/tex_demo/demo.cpp +++ b/driver/tests/tex_demo/demo.cpp @@ -141,7 +141,6 @@ int main(int argc, char *argv[]) { kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - kernel_arg.device_ptr = device; std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 9751bb48..2b6c2719 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -153,7 +153,7 @@ module VX_alu_unit #( assign mul_ready_out = !stall_out; - assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op); + assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op); assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid; assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask; assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC; @@ -164,7 +164,7 @@ module VX_alu_unit #( `else - assign stall_in = 0; + assign stall_in = stall_out; assign result_valid = alu_req_if.valid; assign result_wid = alu_req_if.wid; diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index fa9d2af4..2fc873fc 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -85,6 +85,10 @@ `define EXT_F_ENABLE `endif +`ifndef EXT_TEX_DISABLE +`define EXT_TEX_ENABLE +`endif + // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 05fe8250..c9ac3357 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -13,7 +13,10 @@ module VX_csr_data #( VX_cmt_to_csr_if cmt_to_csr_if, VX_fpu_to_csr_if fpu_to_csr_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, +`endif input wire read_enable, input wire[`CSR_ADDR_BITS-1:0] read_addr, @@ -80,10 +83,12 @@ module VX_csr_data #( end end - //write tex csrs - assign tex_csr_if.write_addr = write_addr; - assign tex_csr_if.write_data = write_data; + // TEX CSRs +`ifdef EXT_TEX_ENABLE assign tex_csr_if.write_enable = write_enable; + assign tex_csr_if.write_addr = write_addr; + assign tex_csr_if.write_data = write_data; +`endif always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 325dbdd1..c6e1fbcf 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -13,8 +13,11 @@ module VX_csr_unit #( VX_cmt_to_csr_if cmt_to_csr_if, VX_fpu_to_csr_if fpu_to_csr_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, - +`endif + VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, @@ -63,7 +66,9 @@ module VX_csr_unit #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), + `ifdef EXT_TEX_ENABLE .tex_csr_if (tex_csr_if), + `endif .read_enable (csr_pipe_req_if.valid), .read_addr (csr_pipe_req_if.addr), .read_wid (csr_pipe_req_if.wid), diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 7ddc8000..d30ca443 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -358,6 +358,7 @@ module VX_decode #( use_rs2 = 1; is_wstall = 1; end + `ifdef EXT_TEX_ENABLE 3'h5: begin op_type = `OP_BITS'(`GPU_TEX); use_rd = 1; @@ -365,6 +366,7 @@ module VX_decode #( use_rs2 = 1; use_rs3 = 1; end + `endif default:; endcase end @@ -373,7 +375,7 @@ module VX_decode #( end // disable write to integer register r0 - wire use_rd_qual = use_rd && (rd_fp || (rd != 0)); + wire wb = use_rd && (rd_fp || (rd != 0)); // EX_ALU needs rs1=0 for LUI operation wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1; @@ -385,7 +387,7 @@ module VX_decode #( assign decode_if.ex_type = ex_type; assign decode_if.op_type = op_type; assign decode_if.op_mod = op_mod; - assign decode_if.wb = use_rd_qual; + assign decode_if.wb = wb; `ifdef EXT_F_ENABLE assign decode_if.rd = {rd_fp, rd}; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 139b2a05..e8b7668a 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -156,7 +156,6 @@ `define CSR_RW 2'h0 `define CSR_RS 2'h1 `define CSR_RC 2'h2 -`define CSR_OTHER 2'h3 `define CSR_BITS 2 `define CSR_OP(x) x[`CSR_BITS-1:0] @@ -185,7 +184,6 @@ `define GPU_JOIN 3'h3 `define GPU_BAR 3'h4 `define GPU_TEX 3'h5 -`define GPU_OTHER 3'h7 `define GPU_BITS 3 `define GPU_OP(x) x[`GPU_BITS-1:0] diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 5ac78f35..224d0731 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -45,7 +45,10 @@ module VX_execute #( output wire ebreak ); VX_fpu_to_csr_if fpu_to_csr_if(); + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if(); +`endif wire[`NUM_WARPS-1:0] csr_pending; wire[`NUM_WARPS-1:0] fpu_pending; @@ -84,7 +87,9 @@ module VX_execute #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), + `ifdef EXT_TEX_ENABLE .tex_csr_if (tex_csr_if), + `endif .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), @@ -131,9 +136,11 @@ module VX_execute #( .clk (clk), .reset (reset), .gpu_req_if (gpu_req_if), + `ifdef EXT_TEX_ENABLE + .tex_csr_if (tex_csr_if), + `endif .warp_ctl_if (warp_ctl_if), - .gpu_commit_if (gpu_commit_if), - .tex_csr_if (tex_csr_if) + .gpu_commit_if (gpu_commit_if) ); assign ebreak = alu_req_if.valid diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 9cad586d..1469423d 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -10,7 +10,10 @@ module VX_gpu_unit #( // Inputs VX_gpu_req_if gpu_req_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, +`endif // Outputs VX_warp_ctl_if warp_ctl_if, @@ -18,23 +21,30 @@ module VX_gpu_unit #( ); `UNUSED_PARAM (CORE_ID) - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) + + wire rsp_valid; + wire [`NW_BITS-1:0] rsp_wid; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; + wire [`NR_BITS-1:0] rsp_rd; + wire rsp_wb; + wire [`NUM_THREADS-1:0][31:0] rsp_data; gpu_tmc_t tmc; gpu_wspawn_t wspawn; gpu_barrier_t barrier; gpu_split_t split; - VX_tex_req_if tex_req_if; - VX_tex_rsp_if tex_rsp_if; + wire [(`NUM_THREADS * 32)-1:0] warp_ctl_data; + wire is_warp_ctl; + + wire stall_in, stall_out; wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN); wire is_tmc = (gpu_req_if.op_type == `GPU_TMC); wire is_split = (gpu_req_if.op_type == `GPU_SPLIT); wire is_bar = (gpu_req_if.op_type == `GPU_BAR); - wire is_tex = (gpu_req_if.op_type == `GPU_TEX); - + // tmc wire [`NUM_THREADS-1:0] tmc_new_mask; @@ -76,10 +86,28 @@ module VX_gpu_unit #( assign barrier.valid = is_bar; assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0]; - assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + + // pack warp ctl result + `IGNORE_WARNINGS_BEGIN + assign warp_ctl_data = {tmc, wspawn, barrier, split}; + `IGNORE_WARNINGS_END // texture - assign tex_req_if.valid = is_tex; + +`ifdef EXT_TEX_ENABLE + + VX_tex_req_if tex_req_if; + VX_tex_rsp_if tex_rsp_if; + + wire is_tex = (gpu_req_if.op_type == `GPU_TEX); + + assign tex_req_if.valid = gpu_req_if.valid && is_tex; + assign tex_req_if.wid = gpu_req_if.wid; + assign tex_req_if.tmask = gpu_req_if.tmask; + assign tex_req_if.PC = gpu_req_if.PC; + assign tex_req_if.rd = gpu_req_if.rd; + assign tex_req_if.wb = gpu_req_if.wb; for (genvar i = 0; i < `NUM_THREADS; i++) begin assign tex_req_if.u[i] = gpu_req_if.rs1_data[i]; @@ -87,54 +115,78 @@ module VX_gpu_unit #( assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i]; end - `UNUSED_VAR (tex_req_if.u) - `UNUSED_VAR (tex_req_if.v) - `UNUSED_VAR (tex_req_if.valid) - `UNUSED_VAR (tex_req_if.lod_t) - - VX_tex_unit #( .CORE_ID(CORE_ID) ) texture_unit ( - .clk (clk), - .reset (reset), - - .tex_req_if (tex_req_if), - .tex_csr_if (tex_csr_if), - .tex_rsp_if (tex_rsp_if) + .clk (clk), + .reset (reset), + .tex_req_if (tex_req_if), + .tex_csr_if (tex_csr_if), + .tex_rsp_if (tex_rsp_if) ); - assign gpu_req_if.valid = is_tex; - assign gpu_req_if.wb = tex_rsp_if.ready; + assign tex_rsp_if.ready = !stall_out; + + assign stall_in = (is_tex && ~tex_req_if.ready) + || (~is_tex && (tex_rsp_if.valid || stall_out)); + + assign is_warp_ctl = !(is_tex || tex_rsp_if.valid); + + assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex); + assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid; + assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask; + assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC; + assign rsp_rd = tex_rsp_if.rd; + assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb; + assign rsp_data = tex_rsp_if.valid ? tex_rsp_if.data : warp_ctl_data; + +`else + + assign stall_in = stall_out; + assign is_warp_ctl = 1; + + assign rsp_valid = gpu_req_if.valid; + assign rsp_wid = gpu_req_if.wid; + assign rsp_tmask = gpu_req_if.tmask; + assign rsp_PC = gpu_req_if.PC; + assign rsp_rd = 0; + assign rsp_wb = 0; + assign rsp_data = warp_ctl_data; + + `UNUSED_VAR (gpu_req_if.rd) + `UNUSED_VAR (gpu_req_if.wb) + +`endif + + wire is_warp_ctl_r; // output - wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid; + assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), - .enable (!stall), - .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), - .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) + .enable (!stall_out), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}), + .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, gpu_commit_if.data, is_warp_ctl_r}) ); assign gpu_commit_if.eop = 1'b1; - assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready; - assign warp_ctl_if.wid = gpu_commit_if.wid; + // warp control reponse + + `IGNORE_WARNINGS_BEGIN + assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.barrier, warp_ctl_if.split} = gpu_commit_if.data; + `IGNORE_WARNINGS_END + assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r; + assign warp_ctl_if.wid = gpu_commit_if.wid; // can accept new request? - assign gpu_req_if.ready = ~stall; + assign gpu_req_if.ready = ~stall_in; - `SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready); - `SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid); - `SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask); - `SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type); - `SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]); - `SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]); `SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid); `SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid); `SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc); diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 7f35602b..37300527 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -195,7 +195,7 @@ module VX_issue #( $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); end if (gpu_req_if.valid && gpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data); end end `endif diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 5ec9c74a..ca1fbc79 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -75,10 +75,10 @@ `define UP(x) (((x) > 0) ? x : 1) -`define SAFE_RNG(h,l) `MAX(h,l) : l +`define SAFE_RNG(h, l) `MAX(h,l) : l -`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)] +`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)] -`define LTRIM(x,s) x[s-1:0] +`define LTRIM(x, s) x[s-1:0] `endif \ No newline at end of file diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 81f63aeb..36c6dbc9 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -128,6 +128,7 @@ task print_ex_op ( `GPU_SPLIT: $write("SPLIT"); `GPU_JOIN: $write("JOIN"); `GPU_BAR: $write("BAR"); + `GPU_TEX: $write("TEX"); default: $write("?"); endcase end diff --git a/hw/rtl/interfaces/VX_tex_req_if.v b/hw/rtl/interfaces/VX_tex_req_if.v index e8290587..7d4d9af8 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.v +++ b/hw/rtl/interfaces/VX_tex_req_if.v @@ -4,19 +4,17 @@ `include "VX_define.vh" interface VX_tex_req_if (); - wire valid; - wire [`NUM_THREADS-1:0][31:0] u; - wire [`NUM_THREADS-1:0][31:0] v; - wire [`NUM_THREADS-1:0][31:0] lod_t; - // wire [`NUM_THREADS-1:0][7:0] t; - // wire [`MADDRW-1:0] addr; - // wire [`MAXWTW-1:0] width; - // wire [`MAXHTW-1:0] height; - // wire [`MAXFTW-1:0] format; - // wire [`MAXFMW-1:0] filter; - // wire [`MAXAMW-1:0] clamp; - // wire [`TAGW-1:0] tag; - // wire ready; + + wire valid; + wire [`NW_BITS-1:0] wid; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; + wire [`NR_BITS-1:0] rd; + wire wb; + wire [`NUM_THREADS-1:0][31:0] u; + wire [`NUM_THREADS-1:0][31:0] v; + wire [`NUM_THREADS-1:0][31:0] lod_t; + wire ready; endinterface `endif diff --git a/hw/rtl/interfaces/VX_tex_rsp_if.v b/hw/rtl/interfaces/VX_tex_rsp_if.v index 3ca929d5..e0e3cbea 100644 --- a/hw/rtl/interfaces/VX_tex_rsp_if.v +++ b/hw/rtl/interfaces/VX_tex_rsp_if.v @@ -4,11 +4,18 @@ `include "VX_define.vh" interface VX_tex_rsp_if (); - // wire valid; - // wire [`TAGW-1:0] tag; - wire [`NUM_THREADS-1:0][31:0] data; - wire ready; + + wire valid; + wire [`NW_BITS-1:0] wid; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; + wire [`NR_BITS-1:0] rd; + wire wb; + wire [`NUM_THREADS-1:0][31:0] data; + wire ready; + endinterface + `endif diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index dd6ba8a3..1485e32c 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -6,56 +6,26 @@ module VX_tex_unit #( ) ( input wire clk, input wire reset, + // Inputs VX_tex_req_if tex_req_if, VX_tex_csr_if tex_csr_if, // Outputs VX_tex_rsp_if tex_rsp_if - // VX_commit_if gpu_commit_if - // // Texture Request - // input wire tex_req_valid, - // input wire [`TADDRW-1:0] tex_req_u, - // input wire [`TADDRW-1:0] tex_req_v, - // input wire [`MADDRW-1:0] tex_req_addr, - // input wire [`MAXWTW-1:0] tex_req_width, - // input wire [`MAXHTW-1:0] tex_req_height, - // input wire [`MAXFTW-1:0] tex_req_format, - // input wire [`MAXFMW-1:0] tex_req_filter, - // input wire [`MAXAMW-1:0] tex_req_clamp, - // input wire [`TAGW-1:0] tex_req_tag, - // output wire tex_req_ready, - - // // Texture Response - // output wire tex_rsp_valid, - // output wire [`TAGW-1:0] tex_rsp_tag, - // input wire [`DATAW-1:0] tex_rsp_data, - // input wire tex_rsp_ready, - - // Cache Request - // output wire [NUMCRQS-1:0] cache_req_valids, - // output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs, - // input wire cache_req_ready, - - // Cache Response - // input wire cache_rsp_valid, - // input wire [MADDRW-1:0] cache_rsp_addr, - // input wire [DATAW-1:0] cache_rsp_data, - // output wire cache_rsp_ready ); `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) - `UNUSED_VAR(tex_addr) - `UNUSED_VAR(tex_format) - `UNUSED_VAR(tex_width) - `UNUSED_VAR(tex_height) - `UNUSED_VAR(tex_stride) - `UNUSED_VAR(tex_wrap_u) - `UNUSED_VAR(tex_wrap_v) - `UNUSED_VAR(tex_min_filter) - `UNUSED_VAR(tex_max_filter) + wire rsp_valid; + wire [`NW_BITS-1:0] rsp_wid; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; + wire [`NR_BITS-1:0] rsp_rd; + wire rsp_wb; + wire [`NUM_THREADS-1:0][31:0] rsp_data; + wire stall_in, stall_out; reg [`CSR_WIDTH-1:0] tex_addr [`NUM_TEX_UNITS-1: 0]; reg [`CSR_WIDTH-1:0] tex_format [`NUM_TEX_UNITS-1: 0]; @@ -67,44 +37,81 @@ module VX_tex_unit #( reg [`CSR_WIDTH-1:0] tex_min_filter [`NUM_TEX_UNITS-1: 0]; reg [`CSR_WIDTH-1:0] tex_max_filter [`NUM_TEX_UNITS-1: 0]; + `UNUSED_VAR (tex_addr) + `UNUSED_VAR (tex_format) + `UNUSED_VAR (tex_width) + `UNUSED_VAR (tex_height) + `UNUSED_VAR (tex_stride) + `UNUSED_VAR (tex_wrap_u) + `UNUSED_VAR (tex_wrap_v) + `UNUSED_VAR (tex_min_filter) + `UNUSED_VAR (tex_max_filter) + //tex csr programming, need to make make consistent with `NUM_TEX_UNITS always @(posedge clk ) begin if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) - `CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data; - `CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; - `CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; - `CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; - `CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data; - `CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; - `CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; + `CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data; + `CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; + `CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; + `CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; + `CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data; + `CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; + `CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; `CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data; `CSR_TEX0_MAX_FILTER : tex_max_filter[0] <= tex_csr_if.write_data; - `CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data; - `CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; - `CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; - `CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; - `CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data; - `CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; - `CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; + `CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data; + `CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; + `CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; + `CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; + `CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data; + `CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; + `CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; `CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data; `CSR_TEX1_MAX_FILTER : tex_max_filter[1] <= tex_csr_if.write_data; - default: - assert(tex_csr_if.write_addr > `CSR_TEX_END || tex_csr_if.write_addr < `CSR_TEX_BEGIN) else $error("%t: invalid CSR write address: %0h", $time, tex_csr_if.write_addr); + default:; endcase end end - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign tex_rsp_if.data[i] = 32'hFAAF; - end + // texture response + `UNUSED_VAR (tex_req_if.u) + `UNUSED_VAR (tex_req_if.v) + `UNUSED_VAR (tex_req_if.lod_t) - assign tex_rsp_if.ready = 1'b1; + assign stall_in = stall_out; - `ifdef DBG_PRINT_TEX_CSRS + assign rsp_valid = tex_req_if.valid; + assign rsp_wid = tex_req_if.wid; + assign rsp_tmask = tex_req_if.tmask; + assign rsp_PC = tex_req_if.PC; + assign rsp_rd = tex_req_if.rd; + assign rsp_wb = tex_req_if.wb; + assign rsp_data = {`NUM_THREADS{32'hFAAF}}; // dummy color value + + // output + assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid; + + VX_pipe_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .RESETW (1) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}), + .data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data}) + ); + + // can accept new request? + assign tex_req_if.ready = ~stall_in; + +`ifdef DBG_PRINT_TEX always @(posedge clk) begin - if (tex_csr_if.write_addr <= `CSR_TEX_END || tex_csr_if.write_addr >= `CSR_TEX_BEGIN) begin + if (tex_csr_if.write_enable + && (tex_csr_if.write_addr <= `CSR_TEX_END + || tex_csr_if.write_addr >= `CSR_TEX_BEGIN)) begin $display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]); $display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]); $display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]); @@ -116,7 +123,6 @@ module VX_tex_unit #( $display("%t: core%0d-tex_csr: csr_tex0_max_filter, csr_data=%0h", $time, CORE_ID, tex_max_filter[0]); end end - `endif - +`endif endmodule \ No newline at end of file diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index d4d8033f..7003c846 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -147,18 +147,12 @@ "wsched_warp_pc": "32" }, "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { - "?gpu_req_fire": 1, - "gpu_req_wid": "`NW_BITS", - "gpu_req_tmask": "`NUM_THREADS", - "gpu_req_op_type": "`GPU_BITS", - "gpu_req_rs1": "32", - "gpu_req_rs2": "32", "?gpu_rsp_valid": 1, "gpu_rsp_wid": "`NW_BITS", "gpu_rsp_tmc": "`GPU_TMC_SIZE", "gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE", "gpu_rsp_split": "`GPU_SPLIT_SIZE", - "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" + "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" }, "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { "?dcache_req_fire":"`NUM_THREADS", diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index b468ea62..82422c27 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO