Texture Instruction - Fixed Color
This commit is contained in:
412
hw/VX_config.h
Normal file
412
hw/VX_config.h
Normal file
@@ -0,0 +1,412 @@
|
||||
// auto-generated by gen_config.py. DO NOT EDIT
|
||||
// Generated at 2021-03-12 17:51:37.263369
|
||||
|
||||
#ifndef VX_USER_CONFIG
|
||||
#define VX_USER_CONFIG
|
||||
|
||||
|
||||
#endif
|
||||
// auto-generated by gen_config.py. DO NOT EDIT
|
||||
// Generated at 2021-03-12 17:51:37.265050
|
||||
|
||||
// Translated from VX_config.vh:
|
||||
|
||||
#ifndef VX_CONFIG
|
||||
#define VX_CONFIG
|
||||
|
||||
|
||||
|
||||
#ifndef NUM_CLUSTERS
|
||||
#define NUM_CLUSTERS 1
|
||||
#endif
|
||||
|
||||
#ifndef NUM_CORES
|
||||
#define NUM_CORES 1
|
||||
#endif
|
||||
|
||||
#ifndef NUM_WARPS
|
||||
#define NUM_WARPS 4
|
||||
#endif
|
||||
|
||||
#ifndef NUM_THREADS
|
||||
#define NUM_THREADS 4
|
||||
#endif
|
||||
|
||||
#ifndef NUM_BARRIERS
|
||||
#define NUM_BARRIERS 4
|
||||
#endif
|
||||
|
||||
#ifndef L2_ENABLE
|
||||
#define L2_ENABLE 0
|
||||
#endif
|
||||
|
||||
#ifndef L3_ENABLE
|
||||
#define L3_ENABLE 0
|
||||
#endif
|
||||
|
||||
#ifndef SM_ENABLE
|
||||
#define SM_ENABLE 1
|
||||
#endif
|
||||
|
||||
#ifndef GLOBAL_BLOCK_SIZE
|
||||
#define GLOBAL_BLOCK_SIZE 64
|
||||
#endif
|
||||
|
||||
#ifndef L1_BLOCK_SIZE
|
||||
#define L1_BLOCK_SIZE (NUM_THREADS * 4)
|
||||
#endif
|
||||
|
||||
#ifndef STARTUP_ADDR
|
||||
#define STARTUP_ADDR 0x80000000
|
||||
#endif
|
||||
|
||||
#ifndef IO_BUS_BASE_ADDR
|
||||
#define IO_BUS_BASE_ADDR 0xFF000000
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_MEM_BASE_ADDR
|
||||
#define SHARED_MEM_BASE_ADDR IO_BUS_BASE_ADDR
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_MEM_BASE_ADDR_ALIGN
|
||||
#define SHARED_MEM_BASE_ADDR_ALIGN 64
|
||||
#endif
|
||||
|
||||
#ifndef IO_BUS_ADDR_COUT
|
||||
#define IO_BUS_ADDR_COUT 0xFFFFFFFC
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_BASE_ADDR
|
||||
#define FRAME_BUFFER_BASE_ADDR 0xFF000000
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_WIDTH
|
||||
#define FRAME_BUFFER_WIDTH 1920
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_HEIGHT
|
||||
#define FRAME_BUFFER_HEIGHT 1080
|
||||
#endif
|
||||
|
||||
#define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
|
||||
|
||||
#ifndef EXT_M_DISABLE
|
||||
#define EXT_M_ENABLE
|
||||
#endif
|
||||
|
||||
#ifndef EXT_F_DISABLE
|
||||
#define EXT_F_ENABLE
|
||||
#endif
|
||||
|
||||
// Device identification
|
||||
#define VENDOR_ID 0
|
||||
#define ARCHITECTURE_ID 0
|
||||
#define IMPLEMENTATION_ID 0
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LATENCY_IMUL
|
||||
#define LATENCY_IMUL 3
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FNCP
|
||||
#define LATENCY_FNCP 2
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FMA
|
||||
#define LATENCY_FMA 4
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FDIV
|
||||
#ifdef ALTERA_S10
|
||||
#define LATENCY_FDIV 34
|
||||
#else
|
||||
#define LATENCY_FDIV 15
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FSQRT
|
||||
#ifdef ALTERA_S10
|
||||
#define LATENCY_FSQRT 25
|
||||
#else
|
||||
#define LATENCY_FSQRT 10
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FDIVSQRT
|
||||
#define LATENCY_FDIVSQRT 32
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FCVT
|
||||
#define LATENCY_FCVT 4
|
||||
#endif
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
// User Floating-Point CSRs
|
||||
#define CSR_FFLAGS 0x001
|
||||
#define CSR_FRM 0x002
|
||||
#define CSR_FCSR 0x003
|
||||
|
||||
#define CSR_SATP 0x180
|
||||
|
||||
#define CSR_PMPCFG0 0x3A0
|
||||
#define CSR_PMPADDR0 0x3B0
|
||||
|
||||
#define CSR_MSTATUS 0x300
|
||||
#define CSR_MISA 0x301
|
||||
#define CSR_MEDELEG 0x302
|
||||
#define CSR_MIDELEG 0x303
|
||||
#define CSR_MIE 0x304
|
||||
#define CSR_MTVEC 0x305
|
||||
|
||||
#define CSR_MEPC 0x341
|
||||
|
||||
// Machine Counter/Timers
|
||||
#define CSR_CYCLE 0xC00
|
||||
#define CSR_CYCLE_H 0xC80
|
||||
#define CSR_INSTRET 0xC02
|
||||
#define CSR_INSTRET_H 0xC82
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
// PERF: pipeline
|
||||
#define CSR_MPM_IBUF_ST 0xB03
|
||||
#define CSR_MPM_IBUF_ST_H 0xB83
|
||||
#define CSR_MPM_SCRB_ST 0xB04
|
||||
#define CSR_MPM_SCRB_ST_H 0xB84
|
||||
#define CSR_MPM_ALU_ST 0xB05
|
||||
#define CSR_MPM_ALU_ST_H 0xB85
|
||||
#define CSR_MPM_LSU_ST 0xB06
|
||||
#define CSR_MPM_LSU_ST_H 0xB86
|
||||
#define CSR_MPM_CSR_ST 0xB07
|
||||
#define CSR_MPM_CSR_ST_H 0xB87
|
||||
#define CSR_MPM_FPU_ST 0xB08
|
||||
#define CSR_MPM_FPU_ST_H 0xB88
|
||||
#define CSR_MPM_GPU_ST 0xB09
|
||||
#define CSR_MPM_GPU_ST_H 0xB89
|
||||
// PERF: icache
|
||||
#define CSR_MPM_ICACHE_READS 0xB0A // total reads
|
||||
#define CSR_MPM_ICACHE_READS_H 0xB8A
|
||||
#define CSR_MPM_ICACHE_MISS_R 0xB0B // total misses
|
||||
#define CSR_MPM_ICACHE_MISS_R_H 0xB8B
|
||||
#define CSR_MPM_ICACHE_PIPE_ST 0xB0C // pipeline stalls
|
||||
#define CSR_MPM_ICACHE_PIPE_ST_H 0xB8C
|
||||
#define CSR_MPM_ICACHE_CRSP_ST 0xB0D // core response stalls
|
||||
#define CSR_MPM_ICACHE_CRSP_ST_H 0xB8D
|
||||
// PERF: dcache
|
||||
#define CSR_MPM_DCACHE_READS 0xB0E // total reads
|
||||
#define CSR_MPM_DCACHE_READS_H 0xB8E
|
||||
#define CSR_MPM_DCACHE_WRITES 0xB0F // total writes
|
||||
#define CSR_MPM_DCACHE_WRITES_H 0xB8F
|
||||
#define CSR_MPM_DCACHE_MISS_R 0xB10 // read misses
|
||||
#define CSR_MPM_DCACHE_MISS_R_H 0xB90
|
||||
#define CSR_MPM_DCACHE_MISS_W 0xB11 // write misses
|
||||
#define CSR_MPM_DCACHE_MISS_W_H 0xB91
|
||||
#define CSR_MPM_DCACHE_BANK_ST 0xB12 // bank conflicts stalls
|
||||
#define CSR_MPM_DCACHE_BANK_ST_H 0xB92
|
||||
#define CSR_MPM_DCACHE_MSHR_ST 0xB13 // MSHR stalls
|
||||
#define CSR_MPM_DCACHE_MSHR_ST_H 0xB93
|
||||
#define CSR_MPM_DCACHE_PIPE_ST 0xB14 // pipeline stalls
|
||||
#define CSR_MPM_DCACHE_PIPE_ST_H 0xB94
|
||||
#define CSR_MPM_DCACHE_CRSP_ST 0xB15 // core response stalls
|
||||
#define CSR_MPM_DCACHE_CRSP_ST_H 0xB95
|
||||
// PERF: smem
|
||||
#define CSR_MPM_SMEM_READS 0xB16 // total reads
|
||||
#define CSR_MPM_SMEM_READS_H 0xB96
|
||||
#define CSR_MPM_SMEM_WRITES 0xB17 // total writes
|
||||
#define CSR_MPM_SMEM_WRITES_H 0xB97
|
||||
#define CSR_MPM_SMEM_BANK_ST 0xB18 // bank conflicts stalls
|
||||
#define CSR_MPM_SMEM_BANK_ST_H 0xB98
|
||||
// PERF: memory
|
||||
#define CSR_MPM_DRAM_READS 0xB19 // dram reads
|
||||
#define CSR_MPM_DRAM_READS_H 0xB99
|
||||
#define CSR_MPM_DRAM_WRITES 0xB1A // dram writes
|
||||
#define CSR_MPM_DRAM_WRITES_H 0xB9A
|
||||
#define CSR_MPM_DRAM_ST 0xB1B // dram request stalls
|
||||
#define CSR_MPM_DRAM_ST_H 0xB9B
|
||||
#define CSR_MPM_DRAM_LAT 0xB1C // dram latency (total)
|
||||
#define CSR_MPM_DRAM_LAT_H 0xB9C
|
||||
|
||||
// Machine Information Registers
|
||||
#define CSR_MVENDORID 0xF11
|
||||
#define CSR_MARCHID 0xF12
|
||||
#define CSR_MIMPID 0xF13
|
||||
#define CSR_MHARTID 0xF14
|
||||
|
||||
// User SIMT CSRs
|
||||
#define CSR_WTID 0xCC0
|
||||
#define CSR_LTID 0xCC1
|
||||
#define CSR_GTID 0xCC2
|
||||
#define CSR_LWID 0xCC3
|
||||
#define CSR_GWID CSR_MHARTID
|
||||
#define CSR_GCID 0xCC5
|
||||
|
||||
// Machine SIMT CSRs
|
||||
#define CSR_NT 0xFC0
|
||||
#define CSR_NW 0xFC1
|
||||
#define CSR_NC 0xFC2
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of LSU Request Queue
|
||||
#ifndef LSUQ_SIZE
|
||||
#define LSUQ_SIZE 8
|
||||
#endif
|
||||
|
||||
// Size of FPU Request Queue
|
||||
#ifndef FPUQ_SIZE
|
||||
#define FPUQ_SIZE 8
|
||||
#endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef ICACHE_SIZE
|
||||
#define ICACHE_SIZE 16384
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef ICREQ_SIZE
|
||||
#define ICREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef IMSHR_SIZE
|
||||
#define IMSHR_SIZE NUM_WARPS
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef IDREQ_SIZE
|
||||
#define IDREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef IDRSQ_SIZE
|
||||
#define IDRSQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef DCACHE_SIZE
|
||||
#define DCACHE_SIZE 16384
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef DNUM_BANKS
|
||||
#define DNUM_BANKS NUM_THREADS
|
||||
#endif
|
||||
|
||||
// Number of bank ports
|
||||
#ifndef DNUM_PORTS
|
||||
#define DNUM_PORTS 1
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef DCREQ_SIZE
|
||||
#define DCREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef DMSHR_SIZE
|
||||
#define DMSHR_SIZE LSUQ_SIZE
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef DDREQ_SIZE
|
||||
#define DDREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef DDRSQ_SIZE
|
||||
#define DDRSQ_SIZE MAX(4, (DNUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// per thread stack size
|
||||
#ifndef STACK_SIZE
|
||||
#define STACK_SIZE 1024
|
||||
#endif
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef SMEM_SIZE
|
||||
#define SMEM_SIZE (STACK_SIZE * NUM_WARPS * NUM_THREADS)
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef SNUM_BANKS
|
||||
#define SNUM_BANKS NUM_THREADS
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef SCREQ_SIZE
|
||||
#define SCREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef L2CACHE_SIZE
|
||||
#define L2CACHE_SIZE 65536
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef L2NUM_BANKS
|
||||
#define L2NUM_BANKS MIN(NUM_CORES, 4)
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef L2CREQ_SIZE
|
||||
#define L2CREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef L2MSHR_SIZE
|
||||
#define L2MSHR_SIZE 16
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef L2DREQ_SIZE
|
||||
#define L2DREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef L2DRSQ_SIZE
|
||||
#define L2DRSQ_SIZE MAX(4, (L2NUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef L3CACHE_SIZE
|
||||
#define L3CACHE_SIZE 131072
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef L3NUM_BANKS
|
||||
#define L3NUM_BANKS MIN(NUM_CLUSTERS, 4)
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef L3CREQ_SIZE
|
||||
#define L3CREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef L3MSHR_SIZE
|
||||
#define L3MSHR_SIZE 16
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef L3DREQ_SIZE
|
||||
#define L3DREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef L3DRSQ_SIZE
|
||||
#define L3DRSQ_SIZE MAX(4, (L3NUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -73,13 +73,14 @@ module VX_commit #(
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
// store doesn't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
// assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -357,6 +357,13 @@ module VX_decode #(
|
||||
use_rs2 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -52,6 +52,8 @@
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
`define INST_TEX 7'b0101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
@@ -182,6 +184,7 @@
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_TEX 3'h5
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
@@ -381,6 +384,17 @@
|
||||
|
||||
`define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2))
|
||||
|
||||
////////////////////////// Texture Unit Configurable Knobs //////////////////////////////
|
||||
`define MADDRW 8
|
||||
`define MAXWTW 8
|
||||
`define MAXHTW 8
|
||||
`define MAXFTW 8
|
||||
`define MAXFMW 8
|
||||
`define MAXAMW 8
|
||||
`define TAGW 8
|
||||
`define DATAW 32
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "VX_types.vh"
|
||||
|
||||
`endif
|
||||
|
||||
@@ -23,10 +23,14 @@ module VX_gpu_unit #(
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
VX_tex_req_if tex_req_if;
|
||||
VX_tex_rsp_if tex_rsp_if;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
|
||||
|
||||
// tmc
|
||||
|
||||
@@ -39,7 +43,7 @@ module VX_gpu_unit #(
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
@@ -69,21 +73,48 @@ module VX_gpu_unit #(
|
||||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
|
||||
|
||||
// texture
|
||||
assign tex_req_if.valid = is_tex;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
|
||||
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i];
|
||||
assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (tex_req_if.u)
|
||||
`UNUSED_VAR (tex_req_if.v)
|
||||
`UNUSED_VAR (tex_req_if.valid)
|
||||
`UNUSED_VAR (tex_req_if.lod_t)
|
||||
|
||||
|
||||
VX_tex_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) texture_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_rsp_if (tex_rsp_if)
|
||||
);
|
||||
|
||||
assign gpu_req_if.valid = is_tex;
|
||||
assign gpu_req_if.wb = tex_rsp_if.ready;
|
||||
|
||||
// output
|
||||
|
||||
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.eop = 1'b1;
|
||||
@@ -99,7 +130,7 @@ module VX_gpu_unit #(
|
||||
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
|
||||
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
|
||||
@@ -111,14 +111,14 @@ module VX_instr_demux (
|
||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
||||
@@ -11,6 +11,7 @@ module VX_writeback #(
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if
|
||||
@@ -19,6 +20,7 @@ module VX_writeback #(
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
wire gpu_valid = gpu_commit_if.valid && gpu_commit_if.wb;
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
@@ -31,37 +33,44 @@ module VX_writeback #(
|
||||
assign wb_valid = ld_valid |
|
||||
fpu_valid |
|
||||
csr_valid |
|
||||
alu_valid;
|
||||
alu_valid |
|
||||
gpu_valid;
|
||||
|
||||
assign wb_wid = ld_valid ? ld_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
/*alu_valid ?*/ alu_commit_if.wid;
|
||||
alu_valid ? alu_commit_if.wid :
|
||||
/*gpu_valid*/ gpu_commit_if.wid;
|
||||
|
||||
assign wb_PC = ld_valid ? ld_commit_if.PC :
|
||||
fpu_valid ? fpu_commit_if.PC :
|
||||
csr_valid ? csr_commit_if.PC :
|
||||
/*alu_valid ?*/ alu_commit_if.PC;
|
||||
|
||||
alu_valid ? alu_commit_if.PC :
|
||||
/*gpu_valid*/ gpu_commit_if.PC;
|
||||
|
||||
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
|
||||
fpu_valid ? fpu_commit_if.tmask :
|
||||
csr_valid ? csr_commit_if.tmask :
|
||||
/*alu_valid ?*/ alu_commit_if.tmask;
|
||||
alu_valid ? alu_commit_if.tmask :
|
||||
/*gpu_valid*/ gpu_commit_if.tmask;
|
||||
|
||||
assign wb_rd = ld_valid ? ld_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
/*alu_valid ?*/ alu_commit_if.rd;
|
||||
alu_valid ? alu_commit_if.rd :
|
||||
/*gpu_valid*/ gpu_commit_if.rd;
|
||||
|
||||
assign wb_data = ld_valid ? ld_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
/*alu_valid ?*/ alu_commit_if.data;
|
||||
alu_valid ? alu_commit_if.data :
|
||||
/*gpu_valid*/ gpu_commit_if.data;
|
||||
|
||||
assign wb_eop = ld_valid ? ld_commit_if.eop :
|
||||
fpu_valid ? fpu_commit_if.eop :
|
||||
csr_valid ? csr_commit_if.eop :
|
||||
/*alu_valid ?*/ alu_commit_if.eop;
|
||||
alu_valid ? alu_commit_if.eop :
|
||||
/*gpu_valid*/ gpu_commit_if.eop;
|
||||
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
@@ -79,7 +88,9 @@ module VX_writeback #(
|
||||
assign ld_commit_if.ready = !stall;
|
||||
assign fpu_commit_if.ready = !stall && !ld_valid;
|
||||
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
|
||||
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||
// if not TEX instruction, no writeback and commit is ready
|
||||
assign gpu_commit_if.ready = (!stall && !ld_valid && !fpu_valid && !csr_valid && !alu_valid) || !gpu_commit_if.wb ;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
|
||||
@@ -13,7 +13,8 @@ interface VX_gpu_req_if();
|
||||
wire [31:0] next_PC;
|
||||
wire [`GPU_BITS-1:0] op_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
|
||||
24
hw/rtl/interfaces/VX_tex_req_if.v
Normal file
24
hw/rtl/interfaces/VX_tex_req_if.v
Normal file
@@ -0,0 +1,24 @@
|
||||
`ifndef VX_TEX_REQ_IF
|
||||
`define VX_TEX_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_req_if ();
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0][31:0] u;
|
||||
wire [`NUM_THREADS-1:0][31:0] v;
|
||||
wire [`NUM_THREADS-1:0][31:0] lod_t;
|
||||
// wire [`MADDRW-1:0] addr;
|
||||
// wire [`MAXWTW-1:0] width;
|
||||
// wire [`MAXHTW-1:0] height;
|
||||
// wire [`MAXFTW-1:0] format;
|
||||
// wire [`MAXFMW-1:0] filter;
|
||||
// wire [`MAXAMW-1:0] clamp;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
// wire ready;
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
|
||||
|
||||
|
||||
14
hw/rtl/interfaces/VX_tex_rsp_if.v
Normal file
14
hw/rtl/interfaces/VX_tex_rsp_if.v
Normal file
@@ -0,0 +1,14 @@
|
||||
`ifndef VX_TEX_RSP_IF
|
||||
`define VX_TEX_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_rsp_if ();
|
||||
// wire valid;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
endinterface
|
||||
`endif
|
||||
|
||||
|
||||
@@ -1,50 +1,55 @@
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_tex_unit #(
|
||||
parameter TADDRW = 32,
|
||||
parameter MADDRW = 32,
|
||||
parameter DATAW = 32,
|
||||
parameter MAXWTW = 8,
|
||||
parameter MAXHTW = 8,
|
||||
parameter MAXFTW = 2,
|
||||
parameter MAXFMW = 1,
|
||||
parameter MAXAMW = 2,
|
||||
parameter TAGW = 16,
|
||||
|
||||
parameter NUMCRQS = 32
|
||||
module VX_tex_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Inputs
|
||||
VX_tex_req_if tex_req_if,
|
||||
|
||||
// Texture Request
|
||||
input wire tex_req_valid,
|
||||
input wire [TADDRW-1:0] tex_req_u,
|
||||
input wire [TADDRW-1:0] tex_req_v,
|
||||
input wire [MADDRW-1:0] tex_req_addr,
|
||||
input wire [MAXWTW-1:0] tex_req_width,
|
||||
input wire [MAXHTW-1:0] tex_req_height,
|
||||
input wire [MAXFTW-1:0] tex_req_format,
|
||||
input wire [MAXFMW-1:0] tex_req_filter,
|
||||
input wire [MAXAMW-1:0] tex_req_clamp,
|
||||
input wire [TAGW-1:0] tex_req_tag,
|
||||
output wire tex_req_ready,
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
// VX_commit_if gpu_commit_if
|
||||
// // Texture Request
|
||||
// input wire tex_req_valid,
|
||||
// input wire [`TADDRW-1:0] tex_req_u,
|
||||
// input wire [`TADDRW-1:0] tex_req_v,
|
||||
// input wire [`MADDRW-1:0] tex_req_addr,
|
||||
// input wire [`MAXWTW-1:0] tex_req_width,
|
||||
// input wire [`MAXHTW-1:0] tex_req_height,
|
||||
// input wire [`MAXFTW-1:0] tex_req_format,
|
||||
// input wire [`MAXFMW-1:0] tex_req_filter,
|
||||
// input wire [`MAXAMW-1:0] tex_req_clamp,
|
||||
// input wire [`TAGW-1:0] tex_req_tag,
|
||||
// output wire tex_req_ready,
|
||||
|
||||
// Texture Response
|
||||
output wire tex_rsp_valid,
|
||||
output wire [TAGW-1:0] tex_rsp_tag,
|
||||
input wire [DATAW-1:0] tex_rsp_data,
|
||||
input wire tex_rsp_ready,
|
||||
// // Texture Response
|
||||
// output wire tex_rsp_valid,
|
||||
// output wire [`TAGW-1:0] tex_rsp_tag,
|
||||
// input wire [`DATAW-1:0] tex_rsp_data,
|
||||
// input wire tex_rsp_ready,
|
||||
|
||||
// Cache Request
|
||||
output wire [NUMCRQS-1:0] cache_req_valids,
|
||||
output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
|
||||
input wire cache_req_ready,
|
||||
// output wire [NUMCRQS-1:0] cache_req_valids,
|
||||
// output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
|
||||
// input wire cache_req_ready,
|
||||
|
||||
// Cache Response
|
||||
input wire cache_rsp_valid,
|
||||
input wire [MADDRW-1:0] cache_rsp_addr,
|
||||
input wire [DATAW-1:0] cache_rsp_data,
|
||||
output wire cache_rsp_ready
|
||||
// input wire cache_rsp_valid,
|
||||
// input wire [MADDRW-1:0] cache_rsp_addr,
|
||||
// input wire [DATAW-1:0] cache_rsp_data,
|
||||
// output wire cache_rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_rsp_if.data[i] = 32'hFAAF;
|
||||
end
|
||||
|
||||
assign tex_rsp_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
@@ -36,8 +36,9 @@ RTL_DIR=../rtl
|
||||
DPI_DIR=../dpi
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
|
||||
SRCS = simulator.cpp testbench.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
|
||||
Reference in New Issue
Block a user