Texture Instruction - Fixed Color

This commit is contained in:
Krishna Yalamarthy
2021-03-12 18:33:04 -05:00
parent b8b3267757
commit f3f62e9e7b
49 changed files with 2636 additions and 63 deletions

412
hw/VX_config.h Normal file
View File

@@ -0,0 +1,412 @@
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at 2021-03-12 17:51:37.263369
#ifndef VX_USER_CONFIG
#define VX_USER_CONFIG
#endif
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at 2021-03-12 17:51:37.265050
// Translated from VX_config.vh:
#ifndef VX_CONFIG
#define VX_CONFIG
#ifndef NUM_CLUSTERS
#define NUM_CLUSTERS 1
#endif
#ifndef NUM_CORES
#define NUM_CORES 1
#endif
#ifndef NUM_WARPS
#define NUM_WARPS 4
#endif
#ifndef NUM_THREADS
#define NUM_THREADS 4
#endif
#ifndef NUM_BARRIERS
#define NUM_BARRIERS 4
#endif
#ifndef L2_ENABLE
#define L2_ENABLE 0
#endif
#ifndef L3_ENABLE
#define L3_ENABLE 0
#endif
#ifndef SM_ENABLE
#define SM_ENABLE 1
#endif
#ifndef GLOBAL_BLOCK_SIZE
#define GLOBAL_BLOCK_SIZE 64
#endif
#ifndef L1_BLOCK_SIZE
#define L1_BLOCK_SIZE (NUM_THREADS * 4)
#endif
#ifndef STARTUP_ADDR
#define STARTUP_ADDR 0x80000000
#endif
#ifndef IO_BUS_BASE_ADDR
#define IO_BUS_BASE_ADDR 0xFF000000
#endif
#ifndef SHARED_MEM_BASE_ADDR
#define SHARED_MEM_BASE_ADDR IO_BUS_BASE_ADDR
#endif
#ifndef SHARED_MEM_BASE_ADDR_ALIGN
#define SHARED_MEM_BASE_ADDR_ALIGN 64
#endif
#ifndef IO_BUS_ADDR_COUT
#define IO_BUS_ADDR_COUT 0xFFFFFFFC
#endif
#ifndef FRAME_BUFFER_BASE_ADDR
#define FRAME_BUFFER_BASE_ADDR 0xFF000000
#endif
#ifndef FRAME_BUFFER_WIDTH
#define FRAME_BUFFER_WIDTH 1920
#endif
#ifndef FRAME_BUFFER_HEIGHT
#define FRAME_BUFFER_HEIGHT 1080
#endif
#define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
#ifndef EXT_M_DISABLE
#define EXT_M_ENABLE
#endif
#ifndef EXT_F_DISABLE
#define EXT_F_ENABLE
#endif
// Device identification
#define VENDOR_ID 0
#define ARCHITECTURE_ID 0
#define IMPLEMENTATION_ID 0
///////////////////////////////////////////////////////////////////////////////
#ifndef LATENCY_IMUL
#define LATENCY_IMUL 3
#endif
#ifndef LATENCY_FNCP
#define LATENCY_FNCP 2
#endif
#ifndef LATENCY_FMA
#define LATENCY_FMA 4
#endif
#ifndef LATENCY_FDIV
#ifdef ALTERA_S10
#define LATENCY_FDIV 34
#else
#define LATENCY_FDIV 15
#endif
#endif
#ifndef LATENCY_FSQRT
#ifdef ALTERA_S10
#define LATENCY_FSQRT 25
#else
#define LATENCY_FSQRT 10
#endif
#endif
#ifndef LATENCY_FDIVSQRT
#define LATENCY_FDIVSQRT 32
#endif
#ifndef LATENCY_FCVT
#define LATENCY_FCVT 4
#endif
// CSR Addresses //////////////////////////////////////////////////////////////
// User Floating-Point CSRs
#define CSR_FFLAGS 0x001
#define CSR_FRM 0x002
#define CSR_FCSR 0x003
#define CSR_SATP 0x180
#define CSR_PMPCFG0 0x3A0
#define CSR_PMPADDR0 0x3B0
#define CSR_MSTATUS 0x300
#define CSR_MISA 0x301
#define CSR_MEDELEG 0x302
#define CSR_MIDELEG 0x303
#define CSR_MIE 0x304
#define CSR_MTVEC 0x305
#define CSR_MEPC 0x341
// Machine Counter/Timers
#define CSR_CYCLE 0xC00
#define CSR_CYCLE_H 0xC80
#define CSR_INSTRET 0xC02
#define CSR_INSTRET_H 0xC82
// Machine Performance-monitoring counters
// PERF: pipeline
#define CSR_MPM_IBUF_ST 0xB03
#define CSR_MPM_IBUF_ST_H 0xB83
#define CSR_MPM_SCRB_ST 0xB04
#define CSR_MPM_SCRB_ST_H 0xB84
#define CSR_MPM_ALU_ST 0xB05
#define CSR_MPM_ALU_ST_H 0xB85
#define CSR_MPM_LSU_ST 0xB06
#define CSR_MPM_LSU_ST_H 0xB86
#define CSR_MPM_CSR_ST 0xB07
#define CSR_MPM_CSR_ST_H 0xB87
#define CSR_MPM_FPU_ST 0xB08
#define CSR_MPM_FPU_ST_H 0xB88
#define CSR_MPM_GPU_ST 0xB09
#define CSR_MPM_GPU_ST_H 0xB89
// PERF: icache
#define CSR_MPM_ICACHE_READS 0xB0A // total reads
#define CSR_MPM_ICACHE_READS_H 0xB8A
#define CSR_MPM_ICACHE_MISS_R 0xB0B // total misses
#define CSR_MPM_ICACHE_MISS_R_H 0xB8B
#define CSR_MPM_ICACHE_PIPE_ST 0xB0C // pipeline stalls
#define CSR_MPM_ICACHE_PIPE_ST_H 0xB8C
#define CSR_MPM_ICACHE_CRSP_ST 0xB0D // core response stalls
#define CSR_MPM_ICACHE_CRSP_ST_H 0xB8D
// PERF: dcache
#define CSR_MPM_DCACHE_READS 0xB0E // total reads
#define CSR_MPM_DCACHE_READS_H 0xB8E
#define CSR_MPM_DCACHE_WRITES 0xB0F // total writes
#define CSR_MPM_DCACHE_WRITES_H 0xB8F
#define CSR_MPM_DCACHE_MISS_R 0xB10 // read misses
#define CSR_MPM_DCACHE_MISS_R_H 0xB90
#define CSR_MPM_DCACHE_MISS_W 0xB11 // write misses
#define CSR_MPM_DCACHE_MISS_W_H 0xB91
#define CSR_MPM_DCACHE_BANK_ST 0xB12 // bank conflicts stalls
#define CSR_MPM_DCACHE_BANK_ST_H 0xB92
#define CSR_MPM_DCACHE_MSHR_ST 0xB13 // MSHR stalls
#define CSR_MPM_DCACHE_MSHR_ST_H 0xB93
#define CSR_MPM_DCACHE_PIPE_ST 0xB14 // pipeline stalls
#define CSR_MPM_DCACHE_PIPE_ST_H 0xB94
#define CSR_MPM_DCACHE_CRSP_ST 0xB15 // core response stalls
#define CSR_MPM_DCACHE_CRSP_ST_H 0xB95
// PERF: smem
#define CSR_MPM_SMEM_READS 0xB16 // total reads
#define CSR_MPM_SMEM_READS_H 0xB96
#define CSR_MPM_SMEM_WRITES 0xB17 // total writes
#define CSR_MPM_SMEM_WRITES_H 0xB97
#define CSR_MPM_SMEM_BANK_ST 0xB18 // bank conflicts stalls
#define CSR_MPM_SMEM_BANK_ST_H 0xB98
// PERF: memory
#define CSR_MPM_DRAM_READS 0xB19 // dram reads
#define CSR_MPM_DRAM_READS_H 0xB99
#define CSR_MPM_DRAM_WRITES 0xB1A // dram writes
#define CSR_MPM_DRAM_WRITES_H 0xB9A
#define CSR_MPM_DRAM_ST 0xB1B // dram request stalls
#define CSR_MPM_DRAM_ST_H 0xB9B
#define CSR_MPM_DRAM_LAT 0xB1C // dram latency (total)
#define CSR_MPM_DRAM_LAT_H 0xB9C
// Machine Information Registers
#define CSR_MVENDORID 0xF11
#define CSR_MARCHID 0xF12
#define CSR_MIMPID 0xF13
#define CSR_MHARTID 0xF14
// User SIMT CSRs
#define CSR_WTID 0xCC0
#define CSR_LTID 0xCC1
#define CSR_GTID 0xCC2
#define CSR_LWID 0xCC3
#define CSR_GWID CSR_MHARTID
#define CSR_GCID 0xCC5
// Machine SIMT CSRs
#define CSR_NT 0xFC0
#define CSR_NW 0xFC1
#define CSR_NC 0xFC2
// Pipeline Queues ////////////////////////////////////////////////////////////
// Size of LSU Request Queue
#ifndef LSUQ_SIZE
#define LSUQ_SIZE 8
#endif
// Size of FPU Request Queue
#ifndef FPUQ_SIZE
#define FPUQ_SIZE 8
#endif
// Icache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
#ifndef ICACHE_SIZE
#define ICACHE_SIZE 16384
#endif
// Core Request Queue Size
#ifndef ICREQ_SIZE
#define ICREQ_SIZE 4
#endif
// Miss Handling Register Size
#ifndef IMSHR_SIZE
#define IMSHR_SIZE NUM_WARPS
#endif
// DRAM Request Queue Size
#ifndef IDREQ_SIZE
#define IDREQ_SIZE 4
#endif
// DRAM Response Queue Size
#ifndef IDRSQ_SIZE
#define IDRSQ_SIZE 4
#endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
#ifndef DCACHE_SIZE
#define DCACHE_SIZE 16384
#endif
// Number of banks
#ifndef DNUM_BANKS
#define DNUM_BANKS NUM_THREADS
#endif
// Number of bank ports
#ifndef DNUM_PORTS
#define DNUM_PORTS 1
#endif
// Core Request Queue Size
#ifndef DCREQ_SIZE
#define DCREQ_SIZE 4
#endif
// Miss Handling Register Size
#ifndef DMSHR_SIZE
#define DMSHR_SIZE LSUQ_SIZE
#endif
// DRAM Request Queue Size
#ifndef DDREQ_SIZE
#define DDREQ_SIZE 4
#endif
// DRAM Response Queue Size
#ifndef DDRSQ_SIZE
#define DDRSQ_SIZE MAX(4, (DNUM_BANKS * 2))
#endif
// SM Configurable Knobs //////////////////////////////////////////////////////
// per thread stack size
#ifndef STACK_SIZE
#define STACK_SIZE 1024
#endif
// Size of cache in bytes
#ifndef SMEM_SIZE
#define SMEM_SIZE (STACK_SIZE * NUM_WARPS * NUM_THREADS)
#endif
// Number of banks
#ifndef SNUM_BANKS
#define SNUM_BANKS NUM_THREADS
#endif
// Core Request Queue Size
#ifndef SCREQ_SIZE
#define SCREQ_SIZE 4
#endif
// L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
#ifndef L2CACHE_SIZE
#define L2CACHE_SIZE 65536
#endif
// Number of banks
#ifndef L2NUM_BANKS
#define L2NUM_BANKS MIN(NUM_CORES, 4)
#endif
// Core Request Queue Size
#ifndef L2CREQ_SIZE
#define L2CREQ_SIZE 4
#endif
// Miss Handling Register Size
#ifndef L2MSHR_SIZE
#define L2MSHR_SIZE 16
#endif
// DRAM Request Queue Size
#ifndef L2DREQ_SIZE
#define L2DREQ_SIZE 4
#endif
// DRAM Response Queue Size
#ifndef L2DRSQ_SIZE
#define L2DRSQ_SIZE MAX(4, (L2NUM_BANKS * 2))
#endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
#ifndef L3CACHE_SIZE
#define L3CACHE_SIZE 131072
#endif
// Number of banks
#ifndef L3NUM_BANKS
#define L3NUM_BANKS MIN(NUM_CLUSTERS, 4)
#endif
// Core Request Queue Size
#ifndef L3CREQ_SIZE
#define L3CREQ_SIZE 4
#endif
// Miss Handling Register Size
#ifndef L3MSHR_SIZE
#define L3MSHR_SIZE 16
#endif
// DRAM Request Queue Size
#ifndef L3DREQ_SIZE
#define L3DREQ_SIZE 4
#endif
// DRAM Response Queue Size
#ifndef L3DRSQ_SIZE
#define L3DRSQ_SIZE MAX(4, (L3NUM_BANKS * 2))
#endif
#endif

View File

@@ -73,13 +73,14 @@ module VX_commit #(
.ld_commit_if (ld_commit_if),
.csr_commit_if (csr_commit_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if)
);
// store and gpu commits don't writeback
// store doesn't writeback
assign st_commit_if.ready = 1'b1;
assign gpu_commit_if.ready = 1'b1;
// assign gpu_commit_if.ready = 1'b1;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin

View File

@@ -357,6 +357,13 @@ module VX_decode #(
use_rs2 = 1;
is_wstall = 1;
end
3'h5: begin
op_type = `OP_BITS'(`GPU_TEX);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
end
default:;
endcase
end

View File

@@ -52,6 +52,8 @@
`define INST_GPU 7'b1101011
`define INST_TEX 7'b0101011
///////////////////////////////////////////////////////////////////////////////
`define FRM_RNE 3'b000 // round to nearest even
@@ -182,6 +184,7 @@
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_TEX 3'h5
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
@@ -381,6 +384,17 @@
`define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2))
////////////////////////// Texture Unit Configurable Knobs //////////////////////////////
`define MADDRW 8
`define MAXWTW 8
`define MAXHTW 8
`define MAXFTW 8
`define MAXFMW 8
`define MAXAMW 8
`define TAGW 8
`define DATAW 32
////////////////////////////////////////////////////////////////////////////////////////
`include "VX_types.vh"
`endif

View File

@@ -23,10 +23,14 @@ module VX_gpu_unit #(
gpu_barrier_t barrier;
gpu_split_t split;
VX_tex_req_if tex_req_if;
VX_tex_rsp_if tex_rsp_if;
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
// tmc
@@ -39,7 +43,7 @@ module VX_gpu_unit #(
// wspawn
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0];
wire [`NUM_WARPS-1:0] wspawn_wmask;
for (genvar i = 0; i < `NUM_WARPS; i++) begin
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
@@ -69,21 +73,48 @@ module VX_gpu_unit #(
assign barrier.valid = is_bar;
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
// texture
assign tex_req_if.valid = is_tex;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i];
assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i];
end
`UNUSED_VAR (tex_req_if.u)
`UNUSED_VAR (tex_req_if.v)
`UNUSED_VAR (tex_req_if.valid)
`UNUSED_VAR (tex_req_if.lod_t)
VX_tex_unit #(
.CORE_ID(CORE_ID)
) texture_unit (
.clk (clk),
.reset (reset),
.tex_req_if (tex_req_if),
.tex_rsp_if (tex_rsp_if)
);
assign gpu_req_if.valid = is_tex;
assign gpu_req_if.wb = tex_rsp_if.ready;
// output
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
);
assign gpu_commit_if.eop = 1'b1;
@@ -99,7 +130,7 @@ module VX_gpu_unit #(
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data);
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]);
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);

View File

@@ -111,14 +111,14 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
) gpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (gpu_req_valid),
.ready_in (gpu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
.valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready)
);

View File

@@ -11,6 +11,7 @@ module VX_writeback #(
VX_commit_if ld_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
// outputs
VX_writeback_if writeback_if
@@ -19,6 +20,7 @@ module VX_writeback #(
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire gpu_valid = gpu_commit_if.valid && gpu_commit_if.wb;
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
@@ -31,37 +33,44 @@ module VX_writeback #(
assign wb_valid = ld_valid |
fpu_valid |
csr_valid |
alu_valid;
alu_valid |
gpu_valid;
assign wb_wid = ld_valid ? ld_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
csr_valid ? csr_commit_if.wid :
/*alu_valid ?*/ alu_commit_if.wid;
alu_valid ? alu_commit_if.wid :
/*gpu_valid*/ gpu_commit_if.wid;
assign wb_PC = ld_valid ? ld_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
csr_valid ? csr_commit_if.PC :
/*alu_valid ?*/ alu_commit_if.PC;
alu_valid ? alu_commit_if.PC :
/*gpu_valid*/ gpu_commit_if.PC;
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
fpu_valid ? fpu_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
/*alu_valid ?*/ alu_commit_if.tmask;
alu_valid ? alu_commit_if.tmask :
/*gpu_valid*/ gpu_commit_if.tmask;
assign wb_rd = ld_valid ? ld_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
/*alu_valid ?*/ alu_commit_if.rd;
alu_valid ? alu_commit_if.rd :
/*gpu_valid*/ gpu_commit_if.rd;
assign wb_data = ld_valid ? ld_commit_if.data :
fpu_valid ? fpu_commit_if.data :
csr_valid ? csr_commit_if.data :
/*alu_valid ?*/ alu_commit_if.data;
alu_valid ? alu_commit_if.data :
/*gpu_valid*/ gpu_commit_if.data;
assign wb_eop = ld_valid ? ld_commit_if.eop :
fpu_valid ? fpu_commit_if.eop :
csr_valid ? csr_commit_if.eop :
/*alu_valid ?*/ alu_commit_if.eop;
alu_valid ? alu_commit_if.eop :
/*gpu_valid*/ gpu_commit_if.eop;
wire stall = ~writeback_if.ready && writeback_if.valid;
@@ -79,7 +88,9 @@ module VX_writeback #(
assign ld_commit_if.ready = !stall;
assign fpu_commit_if.ready = !stall && !ld_valid;
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
// if not TEX instruction, no writeback and commit is ready
assign gpu_commit_if.ready = (!stall && !ld_valid && !fpu_valid && !csr_valid && !alu_valid) || !gpu_commit_if.wb ;
// special workaround to get RISC-V tests Pass/Fail status
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;

View File

@@ -13,7 +13,8 @@ interface VX_gpu_req_if();
wire [31:0] next_PC;
wire [`GPU_BITS-1:0] op_type;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire [`NR_BITS-1:0] rd;
wire wb;

View File

@@ -0,0 +1,24 @@
`ifndef VX_TEX_REQ_IF
`define VX_TEX_REQ_IF
`include "VX_define.vh"
interface VX_tex_req_if ();
wire valid;
wire [`NUM_THREADS-1:0][31:0] u;
wire [`NUM_THREADS-1:0][31:0] v;
wire [`NUM_THREADS-1:0][31:0] lod_t;
// wire [`MADDRW-1:0] addr;
// wire [`MAXWTW-1:0] width;
// wire [`MAXHTW-1:0] height;
// wire [`MAXFTW-1:0] format;
// wire [`MAXFMW-1:0] filter;
// wire [`MAXAMW-1:0] clamp;
// wire [`TAGW-1:0] tag;
// wire ready;
endinterface
`endif

View File

@@ -0,0 +1,14 @@
`ifndef VX_TEX_RSP_IF
`define VX_TEX_RSP_IF
`include "VX_define.vh"
interface VX_tex_rsp_if ();
// wire valid;
// wire [`TAGW-1:0] tag;
wire [`NUM_THREADS-1:0][31:0] data;
wire ready;
endinterface
`endif

View File

@@ -1,50 +1,55 @@
`include "VX_platform.vh"
`include "VX_define.vh"
module VX_tex_unit #(
parameter TADDRW = 32,
parameter MADDRW = 32,
parameter DATAW = 32,
parameter MAXWTW = 8,
parameter MAXHTW = 8,
parameter MAXFTW = 2,
parameter MAXFMW = 1,
parameter MAXAMW = 2,
parameter TAGW = 16,
parameter NUMCRQS = 32
module VX_tex_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Inputs
VX_tex_req_if tex_req_if,
// Texture Request
input wire tex_req_valid,
input wire [TADDRW-1:0] tex_req_u,
input wire [TADDRW-1:0] tex_req_v,
input wire [MADDRW-1:0] tex_req_addr,
input wire [MAXWTW-1:0] tex_req_width,
input wire [MAXHTW-1:0] tex_req_height,
input wire [MAXFTW-1:0] tex_req_format,
input wire [MAXFMW-1:0] tex_req_filter,
input wire [MAXAMW-1:0] tex_req_clamp,
input wire [TAGW-1:0] tex_req_tag,
output wire tex_req_ready,
// Outputs
VX_tex_rsp_if tex_rsp_if
// VX_commit_if gpu_commit_if
// // Texture Request
// input wire tex_req_valid,
// input wire [`TADDRW-1:0] tex_req_u,
// input wire [`TADDRW-1:0] tex_req_v,
// input wire [`MADDRW-1:0] tex_req_addr,
// input wire [`MAXWTW-1:0] tex_req_width,
// input wire [`MAXHTW-1:0] tex_req_height,
// input wire [`MAXFTW-1:0] tex_req_format,
// input wire [`MAXFMW-1:0] tex_req_filter,
// input wire [`MAXAMW-1:0] tex_req_clamp,
// input wire [`TAGW-1:0] tex_req_tag,
// output wire tex_req_ready,
// Texture Response
output wire tex_rsp_valid,
output wire [TAGW-1:0] tex_rsp_tag,
input wire [DATAW-1:0] tex_rsp_data,
input wire tex_rsp_ready,
// // Texture Response
// output wire tex_rsp_valid,
// output wire [`TAGW-1:0] tex_rsp_tag,
// input wire [`DATAW-1:0] tex_rsp_data,
// input wire tex_rsp_ready,
// Cache Request
output wire [NUMCRQS-1:0] cache_req_valids,
output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
input wire cache_req_ready,
// output wire [NUMCRQS-1:0] cache_req_valids,
// output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
// input wire cache_req_ready,
// Cache Response
input wire cache_rsp_valid,
input wire [MADDRW-1:0] cache_rsp_addr,
input wire [DATAW-1:0] cache_rsp_data,
output wire cache_rsp_ready
// input wire cache_rsp_valid,
// input wire [MADDRW-1:0] cache_rsp_addr,
// input wire [DATAW-1:0] cache_rsp_data,
// output wire cache_rsp_ready
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tex_rsp_if.data[i] = 32'hFAAF;
end
assign tex_rsp_if.ready = 1'b1;
endmodule

View File

@@ -36,8 +36,9 @@ RTL_DIR=../rtl
DPI_DIR=../dpi
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
SRCS = simulator.cpp testbench.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp