Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

View File

@@ -1,7 +1,49 @@
`ifndef VX_CONFIG
`define VX_CONFIG
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef XLEN
`ifndef VX_CONFIG_VH
`define VX_CONFIG_VH
`ifndef MIN
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
`endif
`ifndef MAX
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
`endif
`ifndef CLAMP
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
`endif
`ifndef UP
`define UP(x) (((x) != 0) ? (x) : 1)
`endif
///////////////////////////////////////////////////////////////////////////////
// 32 bit XLEN as default.
`ifndef XLEN_32
`ifndef XLEN_64
`define XLEN_32
`endif
`endif
`ifdef XLEN_64
`define XLEN 64
`endif
`ifdef XLEN_32
`define XLEN 32
`endif
@@ -25,54 +67,127 @@
`define NUM_BARRIERS 4
`endif
`ifndef L2_ENABLE
`define L2_ENABLE 0
`ifndef SOCKET_SIZE
`define SOCKET_SIZE `MIN(4, `NUM_CORES)
`endif
`ifndef L3_ENABLE
`define L3_ENABLE 0
`ifdef L2_ENABLE
`define L2_ENABLED 1
`else
`define L2_ENABLED 0
`endif
`ifndef SM_ENABLE
`define SM_ENABLE 1
`ifdef L3_ENABLE
`define L3_ENABLED 1
`else
`define L3_ENABLED 0
`endif
`ifdef L1_DISABLE
`define ICACHE_DISABLE
`define DCACHE_DISABLE
`endif
`ifndef MEM_BLOCK_SIZE
`define MEM_BLOCK_SIZE 64
`endif
`ifndef L1_BLOCK_SIZE
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? 16 : `MEM_BLOCK_SIZE)
`ifndef MEM_ADDR_WIDTH
`ifdef XLEN_64
`define MEM_ADDR_WIDTH 48
`else
`define MEM_ADDR_WIDTH 32
`endif
`endif
`ifndef L1_LINE_SIZE
`ifdef L1_DISABLE
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 4 : `MEM_BLOCK_SIZE)
`else
`define L1_LINE_SIZE ((`L2_ENABLED || `L3_ENABLED) ? 16 : `MEM_BLOCK_SIZE)
`endif
`endif
`ifdef XLEN_64
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 64'h180000000
`endif
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 64'h1FF000000
`endif
`else
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
`ifndef IO_BASE_ADDR
`define IO_BASE_ADDR 32'hFF000000
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 32'hFF000000
`endif
`ifndef IO_ADDR_SIZE
`define IO_ADDR_SIZE (32'hFFFFFFFF - `IO_BASE_ADDR + 1)
`endif
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR (32'hFFFFFFFF - `MEM_BLOCK_SIZE + 1)
`endif
`ifndef IO_COUT_SIZE
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`endif
`ifndef IO_CSR_ADDR
`define IO_CSR_ADDR `IO_BASE_ADDR
`endif
`ifndef SMEM_BASE_ADDR
`define SMEM_BASE_ADDR `IO_BASE_ADDR
`define SMEM_BASE_ADDR `STACK_BASE_ADDR
`endif
`ifndef SMEM_LOG_SIZE
`define SMEM_LOG_SIZE 14
`endif
`ifndef IO_BASE_ADDR
`define IO_BASE_ADDR (`SMEM_BASE_ADDR + (1 << `SMEM_LOG_SIZE))
`endif
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR `IO_BASE_ADDR
`endif
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`ifndef IO_CSR_ADDR
`define IO_CSR_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
`endif
`define IO_CSR_SIZE (4 * 64 * `NUM_CORES * `NUM_CLUSTERS)
`ifndef STACK_LOG2_SIZE
`define STACK_LOG2_SIZE 13
`endif
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
`define RESET_DELAY 8
`ifndef STALL_TIMEOUT
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
`endif
`ifndef FPU_FPNEW
`ifndef FPU_DSP
`ifndef FPU_DPI
`ifdef SYNTHESIS
`define FPU_DSP
`else
`define FPU_DPI
`endif
`endif
`endif
`endif
`ifndef SYNTHESIS
`ifndef DPI_DISABLE
`define IMUL_DPI
`define IDIV_DPI
`endif
`endif
`ifndef DEBUG_LEVEL
`define DEBUG_LEVEL 3
`endif
// ISA Extensions /////////////////////////////////////////////////////////////
`ifndef EXT_M_DISABLE
`define EXT_M_ENABLE
`endif
@@ -81,230 +196,278 @@
`define EXT_F_ENABLE
`endif
// Device identification
`ifdef EXT_D_ENABLE
`define FLEN_64
`else
`define FLEN_32
`endif
`ifdef FLEN_64
`define FLEN 64
`endif
`ifdef FLEN_32
`define FLEN 32
`endif
`ifdef XLEN_64
`ifdef FLEN_32
`define FPU_RV64F
`endif
`endif
`define ISA_STD_A 0
`define ISA_STD_C 2
`define ISA_STD_D 3
`define ISA_STD_E 4
`define ISA_STD_F 5
`define ISA_STD_H 7
`define ISA_STD_I 8
`define ISA_STD_N 13
`define ISA_STD_Q 16
`define ISA_STD_S 18
`define ISA_STD_U 20
`define ISA_EXT_TEX 0
`define ISA_EXT_RASTER 1
`define ISA_EXT_ROP 2
`ifdef EXT_A_ENABLE
`define EXT_A_ENABLED 1
`else
`define EXT_A_ENABLED 0
`endif
`ifdef EXT_C_ENABLE
`define EXT_C_ENABLED 1
`else
`define EXT_C_ENABLED 0
`endif
`ifdef EXT_D_ENABLE
`define EXT_D_ENABLED 1
`else
`define EXT_D_ENABLED 0
`endif
`ifdef EXT_F_ENABLE
`define EXT_F_ENABLED 1
`else
`define EXT_F_ENABLED 0
`endif
`ifdef EXT_M_ENABLE
`define EXT_M_ENABLED 1
`else
`define EXT_M_ENABLED 0
`endif
`define ISA_X_ENABLED 0
`define MISA_EXT 0
`define MISA_STD (`EXT_A_ENABLED << 0) /* A - Atomic Instructions extension */ \
| (0 << 1) /* B - Tentatively reserved for Bit operations extension */ \
| (`EXT_C_ENABLED << 2) /* C - Compressed extension */ \
| (`EXT_D_ENABLED << 3) /* D - Double precsision floating-point extension */ \
| (0 << 4) /* E - RV32E base ISA */ \
| (`EXT_F_ENABLED << 5) /* F - Single precsision floating-point extension */ \
| (0 << 6) /* G - Additional standard extensions present */ \
| (0 << 7) /* H - Hypervisor mode implemented */ \
| (1 << 8) /* I - RV32I/64I/128I base ISA */ \
| (0 << 9) /* J - Reserved */ \
| (0 << 10) /* K - Reserved */ \
| (0 << 11) /* L - Tentatively reserved for Bit operations extension */ \
| (`EXT_M_ENABLED << 12) /* M - Integer Multiply/Divide extension */ \
| (0 << 13) /* N - User level interrupts supported */ \
| (0 << 14) /* O - Reserved */ \
| (0 << 15) /* P - Tentatively reserved for Packed-SIMD extension */ \
| (0 << 16) /* Q - Quad-precision floating-point extension */ \
| (0 << 17) /* R - Reserved */ \
| (0 << 18) /* S - Supervisor mode implemented */ \
| (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \
| (1 << 20) /* U - User mode implemented */ \
| (0 << 21) /* V - Tentatively reserved for Vector extension */ \
| (0 << 22) /* W - Reserved */ \
| (`ISA_X_ENABLED << 23) /* X - Non-standard extensions present */ \
| (0 << 24) /* Y - Reserved */ \
| (0 << 25) /* Z - Reserved */
// Device identification //////////////////////////////////////////////////////
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0
`define IMPLEMENTATION_ID 0
///////////////////////////////////////////////////////////////////////////////
// Pipeline Configuration /////////////////////////////////////////////////////
`ifndef LATENCY_IMUL
`define LATENCY_IMUL 3
// Issue width
`ifndef ISSUE_WIDTH
`define ISSUE_WIDTH `MIN(`NUM_WARPS, 4)
`endif
`ifndef LATENCY_FNCP
`define LATENCY_FNCP 2
// Number of ALU units
`ifndef NUM_ALU_LANES
`define NUM_ALU_LANES `UP(`NUM_THREADS / 2)
`endif
`ifndef NUM_ALU_BLOCKS
`define NUM_ALU_BLOCKS `UP(`ISSUE_WIDTH / 1)
`endif
`ifndef LATENCY_FMA
`define LATENCY_FMA 4
// Number of FPU units
`ifndef NUM_FPU_LANES
`define NUM_FPU_LANES `UP(`NUM_THREADS / 2)
`endif
`ifndef NUM_FPU_BLOCKS
`define NUM_FPU_BLOCKS `UP(`ISSUE_WIDTH / 1)
`endif
`ifndef LATENCY_FDIV
`ifdef ALTERA_S10
`define LATENCY_FDIV 34
`else
`define LATENCY_FDIV 15
`endif
// Number of LSU units
`ifndef NUM_LSU_LANES
`define NUM_LSU_LANES `MIN(`NUM_THREADS, 4)
`endif
`ifndef LATENCY_FSQRT
`ifdef ALTERA_S10
`define LATENCY_FSQRT 25
`else
`define LATENCY_FSQRT 10
// Number of SFU units
`ifndef NUM_SFU_LANES
`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4)
`endif
`endif
`ifndef LATENCY_FDIVSQRT
`define LATENCY_FDIVSQRT 32
`endif
`ifndef LATENCY_FCVT
`define LATENCY_FCVT 5
`endif
`define RESET_DELAY 6
// CSR Addresses //////////////////////////////////////////////////////////////
// User Floating-Point CSRs
`define CSR_FFLAGS 12'h001
`define CSR_FRM 12'h002
`define CSR_FCSR 12'h003
`define CSR_SATP 12'h180
`define CSR_PMPCFG0 12'h3A0
`define CSR_PMPADDR0 12'h3B0
`define CSR_MSTATUS 12'h300
`define CSR_MISA 12'h301
`define CSR_MEDELEG 12'h302
`define CSR_MIDELEG 12'h303
`define CSR_MIE 12'h304
`define CSR_MTVEC 12'h305
`define CSR_MEPC 12'h341
// Machine Performance-monitoring counters
`define CSR_MPM_BASE 12'hB00
`define CSR_MPM_BASE_H 12'hB80
// PERF: pipeline
`define CSR_MCYCLE 12'hB00
`define CSR_MCYCLE_H 12'hB80
`define CSR_MPM_RESERVED 12'hB01
`define CSR_MPM_RESERVED_H 12'hB81
`define CSR_MINSTRET 12'hB02
`define CSR_MINSTRET_H 12'hB82
`define CSR_MPM_IBUF_ST 12'hB03
`define CSR_MPM_IBUF_ST_H 12'hB83
`define CSR_MPM_SCRB_ST 12'hB04
`define CSR_MPM_SCRB_ST_H 12'hB84
`define CSR_MPM_ALU_ST 12'hB05
`define CSR_MPM_ALU_ST_H 12'hB85
`define CSR_MPM_LSU_ST 12'hB06
`define CSR_MPM_LSU_ST_H 12'hB86
`define CSR_MPM_CSR_ST 12'hB07
`define CSR_MPM_CSR_ST_H 12'hB87
`define CSR_MPM_FPU_ST 12'hB08
`define CSR_MPM_FPU_ST_H 12'hB88
`define CSR_MPM_GPU_ST 12'hB09
`define CSR_MPM_GPU_ST_H 12'hB89
// PERF: decode
`define CSR_MPM_LOADS 12'hB0A
`define CSR_MPM_LOADS_H 12'hB8A
`define CSR_MPM_STORES 12'hB0B
`define CSR_MPM_STORES_H 12'hB8B
`define CSR_MPM_BRANCHES 12'hB0C
`define CSR_MPM_BRANCHES_H 12'hB8C
// PERF: icache
`define CSR_MPM_ICACHE_READS 12'hB0D // total reads
`define CSR_MPM_ICACHE_READS_H 12'hB8D
`define CSR_MPM_ICACHE_MISS_R 12'hB0E // read misses
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8E
// PERF: dcache
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
`define CSR_MPM_DCACHE_READS_H 12'hB8F
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
// PERF: smem
`define CSR_MPM_SMEM_READS 12'hB15 // total reads
`define CSR_MPM_SMEM_READS_H 12'hB95
`define CSR_MPM_SMEM_WRITES 12'hB16 // total writes
`define CSR_MPM_SMEM_WRITES_H 12'hB96
`define CSR_MPM_SMEM_BANK_ST 12'hB17 // bank conflicts
`define CSR_MPM_SMEM_BANK_ST_H 12'hB97
// PERF: memory
`define CSR_MPM_MEM_READS 12'hB18 // memory reads
`define CSR_MPM_MEM_READS_H 12'hB98
`define CSR_MPM_MEM_WRITES 12'hB19 // memory writes
`define CSR_MPM_MEM_WRITES_H 12'hB99
`define CSR_MPM_MEM_LAT 12'hB1A // memory latency
`define CSR_MPM_MEM_LAT_H 12'hB9A
// PERF: texunit
`define CSR_MPM_TEX_READS 12'hB1B // texture accesses
`define CSR_MPM_TEX_READS_H 12'hB9B
`define CSR_MPM_TEX_LAT 12'hB1C // texture latency
`define CSR_MPM_TEX_LAT_H 12'hB9C
// Machine Information Registers
`define CSR_MVENDORID 12'hF11
`define CSR_MARCHID 12'hF12
`define CSR_MIMPID 12'hF13
`define CSR_MHARTID 12'hF14
// User SIMT CSRs
`define CSR_WTID 12'hCC0
`define CSR_LTID 12'hCC1
`define CSR_GTID 12'hCC2
`define CSR_LWID 12'hCC3
`define CSR_GWID `CSR_MHARTID
`define CSR_GCID 12'hCC5
`define CSR_TMASK 12'hCC4
// Machine SIMT CSRs
`define CSR_NT 12'hFC0
`define CSR_NW 12'hFC1
`define CSR_NC 12'hFC2
////////// Texture Units //////////////////////////////////////////////////////
`define NUM_TEX_UNITS 2
`define TEX_SUBPIXEL_BITS 8
`define TEX_DIM_BITS 15
`define TEX_LOD_MAX `TEX_DIM_BITS
`define TEX_LOD_BITS 4
`define TEX_FXD_BITS 32
`define TEX_FXD_FRAC (`TEX_DIM_BITS+`TEX_SUBPIXEL_BITS)
`define TEX_STATE_ADDR 0
`define TEX_STATE_WIDTH 1
`define TEX_STATE_HEIGHT 2
`define TEX_STATE_FORMAT 3
`define TEX_STATE_FILTER 4
`define TEX_STATE_WRAPU 5
`define TEX_STATE_WRAPV 6
`define TEX_STATE_MIPOFF(lod) (7+(lod))
`define NUM_TEX_STATES (`TEX_STATE_MIPOFF(`TEX_LOD_MAX)+1)
`define CSR_TEX_UNIT 12'hFD0
`define CSR_TEX_STATE_BEGIN 12'hFD1
`define CSR_TEX_ADDR (`CSR_TEX_STATE_BEGIN+`TEX_STATE_ADDR)
`define CSR_TEX_WIDTH (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WIDTH)
`define CSR_TEX_HEIGHT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_HEIGHT)
`define CSR_TEX_FORMAT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FORMAT)
`define CSR_TEX_FILTER (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FILTER)
`define CSR_TEX_WRAPU (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPU)
`define CSR_TEX_WRAPV (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPV)
`define CSR_TEX_MIPOFF(lod) (`CSR_TEX_STATE_BEGIN+`TEX_STATE_MIPOFF(lod))
`define CSR_TEX_STATE_END (`CSR_TEX_STATE_BEGIN+`NUM_TEX_STATES)
`define CSR_TEX_STATE(addr) ((addr) - `CSR_TEX_STATE_BEGIN)
// Pipeline Queues ////////////////////////////////////////////////////////////
// Size of Instruction Buffer
`ifndef IBUF_SIZE
`define IBUF_SIZE 2
`define IBUF_SIZE (2 * (`NUM_WARPS / `ISSUE_WIDTH))
`endif
// Size of LSU Request Queue
`ifndef LSUQ_SIZE
`define LSUQ_SIZE (`NUM_WARPS * 2)
`define LSUQ_SIZE (2 * (`NUM_THREADS / `NUM_LSU_LANES))
`endif
// LSU Duplicate Address Check
`ifdef LSU_DUP
`define LSU_DUP_ENABLED 1
`else
`define LSU_DUP_ENABLED 0
`endif
`ifdef GBAR_ENABLE
`define GBAR_ENABLED 1
`else
`define GBAR_ENABLED 0
`endif
`ifndef LATENCY_IMUL
`ifdef VIVADO
`define LATENCY_IMUL 4
`endif
`ifdef QUARTUS
`define LATENCY_IMUL 3
`endif
`ifndef LATENCY_IMUL
`define LATENCY_IMUL 4
`endif
`endif
// Floating-Point Units ///////////////////////////////////////////////////////
// Size of FPU Request Queue
`ifndef FPUQ_SIZE
`define FPUQ_SIZE 8
`ifndef FPU_REQ_QUEUE_SIZE
`define FPU_REQ_QUEUE_SIZE (2 * (`NUM_THREADS / `NUM_FPU_LANES))
`endif
// Texture Unit Request Queue
`ifndef TEXQ_SIZE
`define TEXQ_SIZE (`NUM_WARPS * 2)
// FNCP Latency
`ifndef LATENCY_FNCP
`define LATENCY_FNCP 2
`endif
// FMA Latency
`ifndef LATENCY_FMA
`ifdef FPU_DPI
`define LATENCY_FMA 4
`endif
`ifdef FPU_FPNEW
`define LATENCY_FMA 4
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FMA 4
`endif
`ifdef VIVADO
`define LATENCY_FMA 16
`endif
`ifndef LATENCY_FMA
`define LATENCY_FMA 4
`endif
`endif
`endif
// FDIV Latency
`ifndef LATENCY_FDIV
`ifdef FPU_DPI
`define LATENCY_FDIV 15
`endif
`ifdef FPU_FPNEW
`define LATENCY_FDIV 16
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FDIV 15
`endif
`ifdef VIVADO
`define LATENCY_FDIV 28
`endif
`ifndef LATENCY_FDIV
`define LATENCY_FDIV 16
`endif
`endif
`endif
// FSQRT Latency
`ifndef LATENCY_FSQRT
`ifdef FPU_DPI
`define LATENCY_FSQRT 10
`endif
`ifdef FPU_FPNEW
`define LATENCY_FSQRT 16
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FSQRT 10
`endif
`ifdef VIVADO
`define LATENCY_FSQRT 28
`endif
`ifndef LATENCY_FSQRT
`define LATENCY_FSQRT 16
`endif
`endif
`endif
// FCVT Latency
`ifndef LATENCY_FCVT
`define LATENCY_FCVT 5
`endif
// Icache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
`ifndef ICACHE_SIZE
`define ICACHE_SIZE 16384
// Cache Enable
`ifndef ICACHE_DISABLE
`define ICACHE_ENABLE
`endif
`ifdef ICACHE_ENABLE
`define ICACHE_ENABLED 1
`else
`define ICACHE_ENABLED 0
`define NUM_ICACHES 0
`endif
// Core Request Queue Size
`ifndef ICACHE_CREQ_SIZE
`define ICACHE_CREQ_SIZE 0
// Number of Cache Units
`ifndef NUM_ICACHES
`define NUM_ICACHES `UP(`NUM_CORES / 4)
`endif
// Cache Size
`ifndef ICACHE_SIZE
`define ICACHE_SIZE 16384
`endif
// Core Response Queue Size
@@ -314,7 +477,7 @@
// Miss Handling Register Size
`ifndef ICACHE_MSHR_SIZE
`define ICACHE_MSHR_SIZE `NUM_WARPS
`define ICACHE_MSHR_SIZE 16
`endif
// Memory Request Queue Size
@@ -327,26 +490,38 @@
`define ICACHE_MRSQ_SIZE 0
`endif
// Number of Associative Ways
`ifndef ICACHE_NUM_WAYS
`define ICACHE_NUM_WAYS 2
`endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
// Size of cache in bytes
// Cache Enable
`ifndef DCACHE_DISABLE
`define DCACHE_ENABLE
`endif
`ifdef DCACHE_ENABLE
`define DCACHE_ENABLED 1
`else
`define DCACHE_ENABLED 0
`define NUM_DCACHES 0
`define DCACHE_NUM_BANKS 1
`endif
// Number of Cache Units
`ifndef NUM_DCACHES
`define NUM_DCACHES `UP(`NUM_CORES / 4)
`endif
// Cache Size
`ifndef DCACHE_SIZE
`define DCACHE_SIZE 16384
`endif
// Number of banks
// Number of Banks
`ifndef DCACHE_NUM_BANKS
`define DCACHE_NUM_BANKS `NUM_THREADS
`endif
// Number of ports per bank
`ifndef DCACHE_NUM_PORTS
`define DCACHE_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef DCACHE_CREQ_SIZE
`define DCACHE_CREQ_SIZE 0
`define DCACHE_NUM_BANKS (`NUM_LSU_LANES)
`endif
// Core Response Queue Size
@@ -356,7 +531,7 @@
// Miss Handling Register Size
`ifndef DCACHE_MSHR_SIZE
`define DCACHE_MSHR_SIZE `LSUQ_SIZE
`define DCACHE_MSHR_SIZE 16
`endif
// Memory Request Queue Size
@@ -369,54 +544,42 @@
`define DCACHE_MRSQ_SIZE 0
`endif
// Number of Associative Ways
`ifndef DCACHE_NUM_WAYS
`define DCACHE_NUM_WAYS 2
`endif
// SM Configurable Knobs //////////////////////////////////////////////////////
// per thread stack size
`ifndef STACK_LOG2_SIZE
`define STACK_LOG2_SIZE 10
`ifndef SM_DISABLE
`define SM_ENABLE
`endif
`define STACK_SIZE (1 << `STACK_LOG2_SIZE)
// Size of cache in bytes
`ifndef SMEM_SIZE
`define SMEM_SIZE (`STACK_SIZE * `NUM_WARPS * `NUM_THREADS)
`ifdef SM_ENABLE
`define SM_ENABLED 1
`else
`define SM_ENABLED 0
`define SMEM_NUM_BANKS 1
`endif
// Number of banks
// Number of Banks
`ifndef SMEM_NUM_BANKS
`define SMEM_NUM_BANKS `NUM_THREADS
`endif
// Core Request Queue Size
`ifndef SMEM_CREQ_SIZE
`define SMEM_CREQ_SIZE 2
`endif
// Core Response Queue Size
`ifndef SMEM_CRSQ_SIZE
`define SMEM_CRSQ_SIZE 2
`define SMEM_NUM_BANKS (`NUM_LSU_LANES)
`endif
// L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
// Cache Size
`ifndef L2_CACHE_SIZE
`define L2_CACHE_SIZE 131072
`ifdef ALTERA_S10
`define L2_CACHE_SIZE 2097152
`else
`define L2_CACHE_SIZE 1048576
`endif
`endif
// Number of banks
// Number of Banks
`ifndef L2_NUM_BANKS
`define L2_NUM_BANKS ((`NUM_CORES < 4) ? `NUM_CORES : 4)
`endif
// Number of ports per bank
`ifndef L2_NUM_PORTS
`define L2_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L2_CREQ_SIZE
`define L2_CREQ_SIZE 0
`define L2_NUM_BANKS 2
`endif
// Core Response Queue Size
@@ -439,26 +602,25 @@
`define L2_MRSQ_SIZE 0
`endif
// Number of Associative Ways
`ifndef L2_NUM_WAYS
`define L2_NUM_WAYS 4
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
// Cache Size
`ifndef L3_CACHE_SIZE
`ifdef ALTERA_S10
`define L3_CACHE_SIZE 2097152
`else
`define L3_CACHE_SIZE 1048576
`endif
`endif
// Number of banks
// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS ((`NUM_CLUSTERS < 4) ? `NUM_CORES : 4)
`endif
// Number of ports per bank
`ifndef L3_NUM_PORTS
`define L3_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L3_CREQ_SIZE
`define L3_CREQ_SIZE 0
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
`endif
// Core Response Queue Size
@@ -481,4 +643,9 @@
`define L3_MRSQ_SIZE 0
`endif
`endif
// Number of Associative Ways
`ifndef L3_NUM_WAYS
`define L3_NUM_WAYS 4
`endif
`endif // VX_CONFIG_VH