From 58a2140b92cde42f4a94709f90a3bbabb82baa05 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 15 Oct 2021 19:58:13 -0700 Subject: [PATCH] merge update --- ci/regression.sh | 13 +++++- hw/rtl/VX_commit.sv | 2 +- hw/rtl/VX_config.vh | 4 -- hw/rtl/VX_csr_data.sv | 40 +++++++++---------- hw/rtl/VX_csr_unit.sv | 3 +- hw/rtl/VX_decode.sv | 4 +- hw/rtl/VX_define.vh | 8 ++-- hw/rtl/VX_execute.sv | 10 ++--- hw/rtl/VX_gpu_unit.sv | 30 ++++++++------ hw/rtl/VX_instr_demux.sv | 7 ++-- hw/rtl/VX_writeback.sv | 24 +++++------ hw/rtl/afu/vortex_afu.sv | 11 ++--- hw/rtl/interfaces/VX_gpu_req_if.sv | 8 +++- hw/rtl/interfaces/VX_tex_csr_if.sv | 14 ++++++- hw/rtl/interfaces/VX_tex_req_if.sv | 26 ++++++++++++ hw/rtl/interfaces/VX_tex_rsp_if.sv | 22 ++++++++++ .../{VX_tex_addr.v => VX_tex_addr.sv} | 0 .../{VX_tex_format.v => VX_tex_format.sv} | 0 .../{VX_tex_lerp.v => VX_tex_lerp.sv} | 0 .../{VX_tex_memory.v => VX_tex_memory.sv} | 6 +-- .../{VX_tex_sampler.v => VX_tex_sampler.sv} | 0 .../tex_unit/{VX_tex_sat.v => VX_tex_sat.sv} | 0 .../{VX_tex_stride.v => VX_tex_stride.sv} | 0 .../{VX_tex_unit.v => VX_tex_unit.sv} | 10 ++--- .../{VX_tex_wrap.v => VX_tex_wrap.sv} | 0 hw/syn/opae/Makefile | 1 + sim/rtlsim/Makefile | 4 +- sim/vlsim/Makefile | 4 +- tests/opencl/oclprintf/Makefile | 2 +- tests/opencl/results.txt | 0 tests/regression/Makefile | 14 ++----- 31 files changed, 169 insertions(+), 98 deletions(-) rename hw/rtl/tex_unit/{VX_tex_addr.v => VX_tex_addr.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_format.v => VX_tex_format.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_lerp.v => VX_tex_lerp.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_memory.v => VX_tex_memory.sv} (98%) rename hw/rtl/tex_unit/{VX_tex_sampler.v => VX_tex_sampler.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_sat.v => VX_tex_sat.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_stride.v => VX_tex_stride.sv} (100%) rename hw/rtl/tex_unit/{VX_tex_unit.v => VX_tex_unit.sv} (97%) rename hw/rtl/tex_unit/{VX_tex_wrap.v => VX_tex_wrap.sv} (100%) delete mode 100644 tests/opencl/results.txt diff --git a/ci/regression.sh b/ci/regression.sh index 24954616..546219c3 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -22,6 +22,15 @@ make -C tests/opencl run-simx echo "coverage tests done!" } +tex() +{ +echo "begin texture tests..." + +CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --app=tex + +echo "coverage texture done!" +} + cluster() { echo "begin clustering tests..." @@ -137,13 +146,15 @@ echo "stress1 tests done!" usage() { - echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]" + echo "usage: regression [-coverage] [-tex] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]" } while [ "$1" != "" ]; do case $1 in -coverage ) coverage ;; + -tex ) tex + ;; -cluster ) cluster ;; -debug ) debug diff --git a/hw/rtl/VX_commit.sv b/hw/rtl/VX_commit.sv index a8e1764b..8d25fae0 100644 --- a/hw/rtl/VX_commit.sv +++ b/hw/rtl/VX_commit.sv @@ -78,12 +78,12 @@ module VX_commit #( `ifdef EXT_F_ENABLE .fpu_commit_if (fpu_commit_if), `endif + .gpu_commit_if (gpu_commit_if), .writeback_if (writeback_if) ); // store and gpu commits don't writeback assign st_commit_if.ready = 1'b1; - assign gpu_commit_if.ready = 1'b1; `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 21191216..b52a1ab2 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -77,10 +77,6 @@ `define EXT_F_ENABLE `endif -`ifndef EXT_TEX_DISABLE -`define EXT_TEX_ENABLE -`endif - // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_csr_data.sv b/hw/rtl/VX_csr_data.sv index 53954e91..b071a347 100644 --- a/hw/rtl/VX_csr_data.sv +++ b/hw/rtl/VX_csr_data.sv @@ -18,7 +18,7 @@ module VX_csr_data #( VX_fpu_to_csr_if.slave fpu_to_csr_if, `endif `ifdef EXT_TEX_ENABLE - VX_tex_csr_if.slave tex_csr_if, + VX_tex_csr_if.master tex_csr_if, `endif input wire read_enable, @@ -49,13 +49,13 @@ module VX_csr_data #( reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr; - always @(posedge clk) begin + always @(posedge clk) begin `ifdef EXT_F_ENABLE if (reset) begin fcsr <= '0; - end + end if (fpu_to_csr_if.write_enable) begin - fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] + fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] | fpu_to_csr_if.write_fflags; end `endif @@ -64,25 +64,21 @@ module VX_csr_data #( `CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0]; `CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0]; `CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0]; - - `CSR_SATP: csr_satp <= write_data; - - `CSR_MSTATUS: csr_mstatus <= write_data; - `CSR_MEDELEG: csr_medeleg <= write_data; - `CSR_MIDELEG: csr_mideleg <= write_data; - `CSR_MIE: csr_mie <= write_data; - `CSR_MTVEC: csr_mtvec <= write_data; - - `CSR_MEPC: csr_mepc <= write_data; - - `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data; - `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; - - default: begin - assert (write_addr >= `CSR_TEX_BEGIN(0) && write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES)) - else `ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr)); + `CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0]; + `CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0]; + `CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0]; + `CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0]; + `CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0]; + `CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0]; + `CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0]; + `CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0]; + `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0]; + default: begin + `ASSERT(write_addr >= `CSR_TEX_BEGIN(0) + && write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES), + ("%t: invalid CSR write address: %0h", $time, write_addr)); end - endcase + endcase end end diff --git a/hw/rtl/VX_csr_unit.sv b/hw/rtl/VX_csr_unit.sv index cfa89687..0b05ca9c 100644 --- a/hw/rtl/VX_csr_unit.sv +++ b/hw/rtl/VX_csr_unit.sv @@ -21,7 +21,7 @@ module VX_csr_unit #( input wire[`NUM_WARPS-1:0] fpu_pending, `endif `ifdef EXT_TEX_ENABLE - VX_tex_csr_if.slave tex_csr_if, + VX_tex_csr_if.master tex_csr_if, `endif output wire[`NUM_WARPS-1:0] pending, @@ -49,6 +49,7 @@ module VX_csr_unit #( .fetch_to_csr_if(fetch_to_csr_if), `ifdef EXT_F_ENABLE .fpu_to_csr_if (fpu_to_csr_if), + `endif `ifdef EXT_TEX_ENABLE .tex_csr_if (tex_csr_if), `endif diff --git a/hw/rtl/VX_decode.sv b/hw/rtl/VX_decode.sv index 7d97bbcc..4ac6fe13 100644 --- a/hw/rtl/VX_decode.sv +++ b/hw/rtl/VX_decode.sv @@ -378,8 +378,8 @@ module VX_decode #( end `ifdef EXT_TEX_ENABLE 3'h5: begin - op_type = `OP_BITS'(`GPU_TEX); - op_mod = `MOD_BITS'(func2); + op_type = `INST_OP_BITS'(`INST_GPU_TEX); + op_mod = `INST_MOD_BITS'(func2); use_rd = 1; `USED_IREG (rs1); `USED_IREG (rs2); diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index a8ad5d4f..6df04d39 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -285,17 +285,17 @@ // Core request tag bits `define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) `ifdef EXT_TEX_ENABLE -`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) +`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE) `define TEX_TAG_ID_BITS (2) `define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS) -`define DCACHE_DCORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS) +`define DCACHE_CORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS) `define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS) `define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS) `define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS) `else -`define DCACHE_DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) +`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE) `endif -`define DCACHE_DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS) +`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS) // Memory request data bits `define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8) diff --git a/hw/rtl/VX_execute.sv b/hw/rtl/VX_execute.sv index 127e3f5e..58a904db 100644 --- a/hw/rtl/VX_execute.sv +++ b/hw/rtl/VX_execute.sv @@ -103,7 +103,7 @@ module VX_execute #( .LANES (`NUM_THREADS), .DATA_SIZE (4), .TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS), - .TAG_SEL_IDX (`NC_ADDR_BITS + `SM_ENABLE) + .TAG_SEL_IDX (`NC_FLAG_BITS + `SM_ENABLE) ) tex_lsu_arb ( .clk (clk), .reset (reset), @@ -189,10 +189,7 @@ module VX_execute #( .perf_pipeline_if(perf_pipeline_if), `endif .cmt_to_csr_if (cmt_to_csr_if), - .fpu_to_csr_if (fpu_to_csr_if), - `ifdef EXT_TEX_ENABLE - .tex_csr_if (tex_csr_if), - `endif + .fetch_to_csr_if(fetch_to_csr_if), .csr_req_if (csr_req_if), .csr_commit_if (csr_commit_if), `ifdef EXT_F_ENABLE @@ -202,6 +199,9 @@ module VX_execute #( `else `UNUSED_PIN (pending), `endif + `ifdef EXT_TEX_ENABLE + .tex_csr_if (tex_csr_if), + `endif .busy (busy) ); diff --git a/hw/rtl/VX_gpu_unit.sv b/hw/rtl/VX_gpu_unit.sv index b632e722..9a02b835 100644 --- a/hw/rtl/VX_gpu_unit.sv +++ b/hw/rtl/VX_gpu_unit.sv @@ -52,8 +52,13 @@ module VX_gpu_unit #( wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC); wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT); wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR); + wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED); + + wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid]; + wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid]; - // tmc + wire [`NUM_THREADS-1:0] taken_tmask; + wire [`NUM_THREADS-1:0] not_taken_tmask; for (genvar i = 0; i < `NUM_THREADS; i++) begin wire taken = (gpu_req_if.rs1_data[i] != 0); @@ -70,7 +75,7 @@ module VX_gpu_unit #( // wspawn - wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0]; + wire [31:0] wspawn_pc = rs2_data; wire [`NUM_WARPS-1:0] wspawn_wmask; for (genvar i = 0; i < `NUM_WARPS; i++) begin assign wspawn_wmask[i] = (i < rs1_data); @@ -90,8 +95,8 @@ module VX_gpu_unit #( // barrier assign barrier.valid = is_bar; - assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0]; - assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + assign barrier.id = rs1_data[`NB_BITS-1:0]; + assign barrier.size_m1 = (`NW_BITS)'(rs2_data - 1); // pack warp ctl result assign warp_ctl_data = {tmc, wspawn, split, barrier}; @@ -105,7 +110,7 @@ module VX_gpu_unit #( VX_tex_req_if tex_req_if(); VX_tex_rsp_if tex_rsp_if(); - wire is_tex = (gpu_req_if.op_type == `GPU_TEX); + wire is_tex = (gpu_req_if.op_type == `INST_GPU_TEX); assign tex_req_if.valid = gpu_req_if.valid && is_tex; assign tex_req_if.wid = gpu_req_if.wid; @@ -114,19 +119,19 @@ module VX_gpu_unit #( assign tex_req_if.rd = gpu_req_if.rd; assign tex_req_if.wb = gpu_req_if.wb; - assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0]; + assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0]; assign tex_req_if.coords[0] = gpu_req_if.rs1_data; assign tex_req_if.coords[1] = gpu_req_if.rs2_data; - assign tex_req_if.lod = gpu_req_if.rs3_data; + assign tex_req_if.lod = gpu_req_if.rs3_data; VX_tex_unit #( .CORE_ID(CORE_ID) ) tex_unit ( - .clk (clk), - .reset (reset), - .tex_req_if (tex_req_if), - .tex_csr_if (tex_csr_if), - .tex_rsp_if (tex_rsp_if), + .clk (clk), + .reset (reset), + .tex_req_if (tex_req_if), + .tex_csr_if (tex_csr_if), + .tex_rsp_if (tex_rsp_if), .dcache_req_if (dcache_req_if), .dcache_rsp_if (dcache_rsp_if) ); @@ -149,7 +154,6 @@ module VX_gpu_unit #( `else `UNUSED_VAR (gpu_req_if.op_mod) - `UNUSED_VAR (gpu_req_if.rs2_data) `UNUSED_VAR (gpu_req_if.rs3_data) `UNUSED_VAR (gpu_req_if.wb) `UNUSED_VAR (gpu_req_if.rd) diff --git a/hw/rtl/VX_instr_demux.sv b/hw/rtl/VX_instr_demux.sv index b761e9d9..60261245 100644 --- a/hw/rtl/VX_instr_demux.sv +++ b/hw/rtl/VX_instr_demux.sv @@ -124,18 +124,17 @@ module VX_instr_demux ( wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU); wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type); - wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid]; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)), + .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)), .OUT_REG (1) ) gpu_buffer ( .clk (clk), .reset (reset), .valid_in (gpu_req_valid), .ready_in (gpu_req_ready), - .data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpu_rs2_data}), - .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}), + .data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}), + .data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}), .valid_out (gpu_req_if.valid), .ready_out (gpu_req_if.ready) ); diff --git a/hw/rtl/VX_writeback.sv b/hw/rtl/VX_writeback.sv index f55c8401..7c2bcfb1 100644 --- a/hw/rtl/VX_writeback.sv +++ b/hw/rtl/VX_writeback.sv @@ -10,7 +10,6 @@ module VX_writeback #( VX_commit_if.slave alu_commit_if, VX_commit_if.slave ld_commit_if, VX_commit_if.slave csr_commit_if, - VX_commit_if.slave csr_commit_if, `ifdef EXT_F_ENABLE VX_commit_if.slave fpu_commit_if, `endif @@ -50,28 +49,28 @@ module VX_writeback #( wire [NUM_RSPS-1:0] rsp_ready; wire stall; - assign rsp_valid = { + assign rsp_valid = { + `ifdef EXT_TEX_ENABLE + gpu_commit_if.valid && gpu_commit_if.wb, + `endif csr_commit_if.valid && csr_commit_if.wb, alu_commit_if.valid && alu_commit_if.wb, `ifdef EXT_F_ENABLE fpu_commit_if.valid && fpu_commit_if.wb, `endif - ld_commit_if.valid && ld_commit_if.wb, - `ifdef EXT_TEX_ENABLE - gpu_commit_if.valid && gpu_commit_if.wb, - `ifend + ld_commit_if.valid && ld_commit_if.wb }; - assign rsp_data = { + assign rsp_data = { + `ifdef EXT_TEX_ENABLE + {gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop}, + `endif {csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop}, {alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop}, `ifdef EXT_F_ENABLE {fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop}, - `endif - { ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop}, - `ifdef EXT_TEX_ENABLE - {gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop}, `endif + { ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop} }; VX_stream_arbiter #( @@ -103,8 +102,7 @@ module VX_writeback #( `ifdef EXT_TEX_ENABLE assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb; `endif -`endif - +`endif assign stall = ~writeback_if.ready && writeback_if.valid; diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 6d1e2488..adee880f 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -47,11 +47,12 @@ localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_DATA_SIZE); localparam AVS_RD_QUEUE_SIZE = 4; -localparam AVS_REQ_TAGW_VX_ = `VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH); -localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, AVS_REQ_TAGW_VX_); -localparam AVS_REQ_TAGW_CCI_ = CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH); -localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, AVS_REQ_TAGW_CCI_); -localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI); +localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH; +localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH); +localparam _AVS_REQ_TAGW_VX2 = `MAX(_VX_MEM_TAG_WIDTH, _AVS_REQ_TAGW_VX); +localparam _AVS_REQ_TAGW_CCI = CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH); +localparam _AVS_REQ_TAGW_CCI2 = `MAX(CCI_ADDR_WIDTH, _AVS_REQ_TAGW_CCI); +localparam AVS_REQ_TAGW = `MAX(_AVS_REQ_TAGW_VX2, _AVS_REQ_TAGW_CCI2); localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RW_PENDING_SIZE= 256; diff --git a/hw/rtl/interfaces/VX_gpu_req_if.sv b/hw/rtl/interfaces/VX_gpu_req_if.sv index e3511043..50ac8c7c 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.sv +++ b/hw/rtl/interfaces/VX_gpu_req_if.sv @@ -12,9 +12,11 @@ interface VX_gpu_req_if(); wire [31:0] PC; wire [31:0] next_PC; wire [`INST_GPU_BITS-1:0] op_type; + wire [`INST_MOD_BITS-1:0] op_mod; wire [`NT_BITS-1:0] tid; wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [31:0] rs2_data; + wire [`NUM_THREADS-1:0][31:0] rs2_data; + wire [`NUM_THREADS-1:0][31:0] rs3_data; wire [`NR_BITS-1:0] rd; wire wb; @@ -27,9 +29,11 @@ interface VX_gpu_req_if(); output PC, output next_PC, output op_type, + output op_mod, output tid, output rs1_data, output rs2_data, + output rs3_data, output rd, output wb, input ready @@ -42,9 +46,11 @@ interface VX_gpu_req_if(); input PC, input next_PC, input op_type, + input op_mod, input tid, input rs1_data, input rs2_data, + input rs3_data, input rd, input wb, output ready diff --git a/hw/rtl/interfaces/VX_tex_csr_if.sv b/hw/rtl/interfaces/VX_tex_csr_if.sv index 9315a59d..a83c9479 100644 --- a/hw/rtl/interfaces/VX_tex_csr_if.sv +++ b/hw/rtl/interfaces/VX_tex_csr_if.sv @@ -7,7 +7,19 @@ interface VX_tex_csr_if (); wire write_enable; wire [`CSR_ADDR_BITS-1:0] write_addr; - wire [31:0] write_data; + wire [31:0] write_data; + + modport master ( + output write_enable, + output write_addr, + output write_data + ); + + modport slave ( + input write_enable, + input write_addr, + input write_data + ); endinterface diff --git a/hw/rtl/interfaces/VX_tex_req_if.sv b/hw/rtl/interfaces/VX_tex_req_if.sv index e00a2e0e..f1eaa1be 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.sv +++ b/hw/rtl/interfaces/VX_tex_req_if.sv @@ -18,6 +18,32 @@ interface VX_tex_req_if (); wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output rd, + output wb, + output unit, + output coords, + output lod, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input rd, + input wb, + input unit, + input coords, + input lod, + output ready + ); + endinterface `endif diff --git a/hw/rtl/interfaces/VX_tex_rsp_if.sv b/hw/rtl/interfaces/VX_tex_rsp_if.sv index e0e3cbea..b3dbd65d 100644 --- a/hw/rtl/interfaces/VX_tex_rsp_if.sv +++ b/hw/rtl/interfaces/VX_tex_rsp_if.sv @@ -14,6 +14,28 @@ interface VX_tex_rsp_if (); wire [`NUM_THREADS-1:0][31:0] data; wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output rd, + output wb, + output data, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input rd, + input wb, + input data, + output ready + ); + endinterface `endif diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_addr.v rename to hw/rtl/tex_unit/VX_tex_addr.sv diff --git a/hw/rtl/tex_unit/VX_tex_format.v b/hw/rtl/tex_unit/VX_tex_format.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_format.v rename to hw/rtl/tex_unit/VX_tex_format.sv diff --git a/hw/rtl/tex_unit/VX_tex_lerp.v b/hw/rtl/tex_unit/VX_tex_lerp.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_lerp.v rename to hw/rtl/tex_unit/VX_tex_lerp.sv diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.sv similarity index 98% rename from hw/rtl/tex_unit/VX_tex_memory.v rename to hw/rtl/tex_unit/VX_tex_memory.sv index a1e52beb..af335ba2 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.sv @@ -70,9 +70,9 @@ module VX_tex_memory #( assign reqq_push = req_valid && req_ready; VX_fifo_queue #( - .DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4), - .SIZE (`LSUQ_SIZE), - .OUTPUT_REG (1) + .DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4), + .SIZE (`LSUQ_SIZE), + .OUT_REG (1) ) req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_sampler.v rename to hw/rtl/tex_unit/VX_tex_sampler.sv diff --git a/hw/rtl/tex_unit/VX_tex_sat.v b/hw/rtl/tex_unit/VX_tex_sat.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_sat.v rename to hw/rtl/tex_unit/VX_tex_sat.sv diff --git a/hw/rtl/tex_unit/VX_tex_stride.v b/hw/rtl/tex_unit/VX_tex_stride.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_stride.v rename to hw/rtl/tex_unit/VX_tex_stride.sv diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.sv similarity index 97% rename from hw/rtl/tex_unit/VX_tex_unit.v rename to hw/rtl/tex_unit/VX_tex_unit.sv index 5db12f16..dbcf5314 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.sv @@ -7,15 +7,15 @@ module VX_tex_unit #( input wire reset, // Texture unit <-> Memory Unit - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.master dcache_req_if, + VX_dcache_rsp_if.slave dcache_rsp_if, // Inputs - VX_tex_req_if tex_req_if, - VX_tex_csr_if tex_csr_if, + VX_tex_req_if.slave tex_req_if, + VX_tex_csr_if.slave tex_csr_if, // Outputs - VX_tex_rsp_if tex_rsp_if + VX_tex_rsp_if.master tex_rsp_if ); localparam REQ_INFOW_S = `NR_BITS + 1 + `NW_BITS + 32; diff --git a/hw/rtl/tex_unit/VX_tex_wrap.v b/hw/rtl/tex_unit/VX_tex_wrap.sv similarity index 100% rename from hw/rtl/tex_unit/VX_tex_wrap.v rename to hw/rtl/tex_unit/VX_tex_wrap.sv diff --git a/hw/syn/opae/Makefile b/hw/syn/opae/Makefile index 58d82a5d..7c1ab010 100644 --- a/hw/syn/opae/Makefile +++ b/hw/syn/opae/Makefile @@ -20,6 +20,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 6059e711..194e6ebc 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -20,13 +20,15 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO DBG_FLAGS += -DVCD_OUTPUT FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) +TEX_INCLUDE = -I$(RTL_DIR)/tex_unit +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE) SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp diff --git a/sim/vlsim/Makefile b/sim/vlsim/Makefile index 7de01df9..1dcc4f64 100644 --- a/sim/vlsim/Makefile +++ b/sim/vlsim/Makefile @@ -21,6 +21,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO @@ -30,7 +31,8 @@ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += fpga.cpp opae_sim.cpp FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) +TEX_INCLUDE = -I$(RTL_DIR)/tex_unit +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE) RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip TOP = vortex_afu_shim diff --git a/tests/opencl/oclprintf/Makefile b/tests/opencl/oclprintf/Makefile index 92df9612..34d0146a 100644 --- a/tests/opencl/oclprintf/Makefile +++ b/tests/opencl/oclprintf/Makefile @@ -22,7 +22,7 @@ CXXFLAGS += -I$(POCL_RT_PATH)/include LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex -PROJECT = printf +PROJECT = oclprintf SRCS = main.cc diff --git a/tests/opencl/results.txt b/tests/opencl/results.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/regression/Makefile b/tests/regression/Makefile index 29f9e198..f0574ac6 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -8,7 +8,6 @@ all: $(MAKE) -C diverge $(MAKE) -C sort $(MAKE) -C fence - $(MAKE) -C tex $(MAKE) -C no_mf_ext $(MAKE) -C no_smem @@ -20,9 +19,8 @@ run-simx: $(MAKE) -C io_addr run-simx $(MAKE) -C printf run-simx $(MAKE) -C diverge run-simx - #$(MAKE) -C sort run-simx + $(MAKE) -C sort run-simx $(MAKE) -C fence run-simx - #$(MAKE) -C tex run-simx $(MAKE) -C no_mf_ext run-simx $(MAKE) -C no_smem run-simx @@ -34,7 +32,7 @@ run-rtlsim: $(MAKE) -C io_addr run-rtlsim $(MAKE) -C printf run-rtlsim $(MAKE) -C diverge run-rtlsim - #$(MAKE) -C sort run-rtlsim + $(MAKE) -C sort run-rtlsim $(MAKE) -C fence run-rtlsim $(MAKE) -C no_mf_ext run-rtlsim $(MAKE) -C no_smem run-rtlsim @@ -47,9 +45,8 @@ run-vlsim: $(MAKE) -C io_addr run-vlsim $(MAKE) -C printf run-vlsim $(MAKE) -C diverge run-vlsim - #$(MAKE) -C sort run-vlsim + $(MAKE) -C sort run-vlsim $(MAKE) -C fence run-vlsim - $(MAKE) -C tex run-vlsim $(MAKE) -C no_mf_ext run-vlsim $(MAKE) -C no_smem run-vlsim @@ -63,7 +60,6 @@ clean: $(MAKE) -C diverge clean $(MAKE) -C sort clean $(MAKE) -C fence clean - $(MAKE) -C tex clean $(MAKE) -C no_mf_ext clean $(MAKE) -C no_smem clean @@ -77,7 +73,5 @@ clean-all: $(MAKE) -C diverge clean-all $(MAKE) -C sort clean-all $(MAKE) -C fence clean-all - $(MAKE) -C tex clean-all $(MAKE) -C no_mf_ext clean-all - $(MAKE) -C no_smem clean-all - + $(MAKE) -C no_smem clean-all \ No newline at end of file