From 96a03a3edf4bb40d5f1c664685827a33f05a1304 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 20 Mar 2021 19:25:11 -0400 Subject: [PATCH] minor update --- hw/VX_config.h | 432 ----------------------------------------------- simX/execute.cpp | 77 ++++----- 2 files changed, 36 insertions(+), 473 deletions(-) delete mode 100644 hw/VX_config.h diff --git a/hw/VX_config.h b/hw/VX_config.h deleted file mode 100644 index c730b02c..00000000 --- a/hw/VX_config.h +++ /dev/null @@ -1,432 +0,0 @@ -// auto-generated by gen_config.py. DO NOT EDIT -// Generated at 2021-03-20 18:29:13.211392 - -#ifndef VX_USER_CONFIG -#define VX_USER_CONFIG - - -#endif -// auto-generated by gen_config.py. DO NOT EDIT -// Generated at 2021-03-20 18:29:13.214396 - -// Translated from VX_config.vh: - -#ifndef VX_CONFIG -#define VX_CONFIG - - - -#ifndef NUM_CLUSTERS -#define NUM_CLUSTERS 1 -#endif - -#ifndef NUM_CORES -#define NUM_CORES 1 -#endif - -#ifndef NUM_WARPS -#define NUM_WARPS 4 -#endif - -#ifndef NUM_THREADS -#define NUM_THREADS 4 -#endif - -#ifndef NUM_BARRIERS -#define NUM_BARRIERS 4 -#endif - -#ifndef L2_ENABLE -#define L2_ENABLE 0 -#endif - -#ifndef L3_ENABLE -#define L3_ENABLE 0 -#endif - -#ifndef SM_ENABLE -#define SM_ENABLE 1 -#endif - -#ifndef GLOBAL_BLOCK_SIZE -#define GLOBAL_BLOCK_SIZE 64 -#endif - -#ifndef L1_BLOCK_SIZE -#define L1_BLOCK_SIZE (NUM_THREADS * 4) -#endif - -#ifndef STARTUP_ADDR -#define STARTUP_ADDR 0x80000000 -#endif - -#ifndef IO_BUS_BASE_ADDR -#define IO_BUS_BASE_ADDR 0xFF000000 -#endif - -#ifndef SHARED_MEM_BASE_ADDR -#define SHARED_MEM_BASE_ADDR IO_BUS_BASE_ADDR -#endif - -#ifndef SHARED_MEM_BASE_ADDR_ALIGN -#define SHARED_MEM_BASE_ADDR_ALIGN 64 -#endif - -#ifndef IO_BUS_ADDR_COUT -#define IO_BUS_ADDR_COUT 0xFFFFFFFC -#endif - -#ifndef FRAME_BUFFER_BASE_ADDR -#define FRAME_BUFFER_BASE_ADDR 0xFF000000 -#endif - -#ifndef FRAME_BUFFER_WIDTH -#define FRAME_BUFFER_WIDTH 1920 -#endif - -#ifndef FRAME_BUFFER_HEIGHT -#define FRAME_BUFFER_HEIGHT 1080 -#endif - -#define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT) - -#ifndef EXT_M_DISABLE -#define EXT_M_ENABLE -#endif - -#ifndef EXT_F_DISABLE -#define EXT_F_ENABLE -#endif - -#ifndef EXT_TEX_DISABLE -#define EXT_TEX_ENABLE -#endif - -// Device identification -#define VENDOR_ID 0 -#define ARCHITECTURE_ID 0 -#define IMPLEMENTATION_ID 0 - -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LATENCY_IMUL -#define LATENCY_IMUL 3 -#endif - -#ifndef LATENCY_FNCP -#define LATENCY_FNCP 2 -#endif - -#ifndef LATENCY_FMA -#define LATENCY_FMA 4 -#endif - -#ifndef LATENCY_FDIV -#ifdef ALTERA_S10 -#define LATENCY_FDIV 34 -#else -#define LATENCY_FDIV 15 -#endif -#endif - -#ifndef LATENCY_FSQRT -#ifdef ALTERA_S10 -#define LATENCY_FSQRT 25 -#else -#define LATENCY_FSQRT 10 -#endif -#endif - -#ifndef LATENCY_FDIVSQRT -#define LATENCY_FDIVSQRT 32 -#endif - -#ifndef LATENCY_FCVT -#define LATENCY_FCVT 4 -#endif - -// CSR Addresses ////////////////////////////////////////////////////////////// - -// User Floating-Point CSRs -#define CSR_FFLAGS 0x001 -#define CSR_FRM 0x002 -#define CSR_FCSR 0x003 - -#define CSR_SATP 0x180 - -#define CSR_PMPCFG0 0x3A0 -#define CSR_PMPADDR0 0x3B0 - -#define CSR_MSTATUS 0x300 -#define CSR_MISA 0x301 -#define CSR_MEDELEG 0x302 -#define CSR_MIDELEG 0x303 -#define CSR_MIE 0x304 -#define CSR_MTVEC 0x305 - -#define CSR_MEPC 0x341 - -// Machine Counter/Timers -#define CSR_CYCLE 0xC00 -#define CSR_CYCLE_H 0xC80 -#define CSR_INSTRET 0xC02 -#define CSR_INSTRET_H 0xC82 - -// Machine Performance-monitoring counters -// PERF: pipeline -#define CSR_MPM_IBUF_ST 0xB03 -#define CSR_MPM_IBUF_ST_H 0xB83 -#define CSR_MPM_SCRB_ST 0xB04 -#define CSR_MPM_SCRB_ST_H 0xB84 -#define CSR_MPM_ALU_ST 0xB05 -#define CSR_MPM_ALU_ST_H 0xB85 -#define CSR_MPM_LSU_ST 0xB06 -#define CSR_MPM_LSU_ST_H 0xB86 -#define CSR_MPM_CSR_ST 0xB07 -#define CSR_MPM_CSR_ST_H 0xB87 -#define CSR_MPM_FPU_ST 0xB08 -#define CSR_MPM_FPU_ST_H 0xB88 -#define CSR_MPM_GPU_ST 0xB09 -#define CSR_MPM_GPU_ST_H 0xB89 -// PERF: icache -#define CSR_MPM_ICACHE_READS 0xB0A // total reads -#define CSR_MPM_ICACHE_READS_H 0xB8A -#define CSR_MPM_ICACHE_MISS_R 0xB0B // total misses -#define CSR_MPM_ICACHE_MISS_R_H 0xB8B -#define CSR_MPM_ICACHE_PIPE_ST 0xB0C // pipeline stalls -#define CSR_MPM_ICACHE_PIPE_ST_H 0xB8C -#define CSR_MPM_ICACHE_CRSP_ST 0xB0D // core response stalls -#define CSR_MPM_ICACHE_CRSP_ST_H 0xB8D -// PERF: dcache -#define CSR_MPM_DCACHE_READS 0xB0E // total reads -#define CSR_MPM_DCACHE_READS_H 0xB8E -#define CSR_MPM_DCACHE_WRITES 0xB0F // total writes -#define CSR_MPM_DCACHE_WRITES_H 0xB8F -#define CSR_MPM_DCACHE_MISS_R 0xB10 // read misses -#define CSR_MPM_DCACHE_MISS_R_H 0xB90 -#define CSR_MPM_DCACHE_MISS_W 0xB11 // write misses -#define CSR_MPM_DCACHE_MISS_W_H 0xB91 -#define CSR_MPM_DCACHE_BANK_ST 0xB12 // bank conflicts stalls -#define CSR_MPM_DCACHE_BANK_ST_H 0xB92 -#define CSR_MPM_DCACHE_MSHR_ST 0xB13 // MSHR stalls -#define CSR_MPM_DCACHE_MSHR_ST_H 0xB93 -#define CSR_MPM_DCACHE_PIPE_ST 0xB14 // pipeline stalls -#define CSR_MPM_DCACHE_PIPE_ST_H 0xB94 -#define CSR_MPM_DCACHE_CRSP_ST 0xB15 // core response stalls -#define CSR_MPM_DCACHE_CRSP_ST_H 0xB95 -// PERF: smem -#define CSR_MPM_SMEM_READS 0xB16 // total reads -#define CSR_MPM_SMEM_READS_H 0xB96 -#define CSR_MPM_SMEM_WRITES 0xB17 // total writes -#define CSR_MPM_SMEM_WRITES_H 0xB97 -#define CSR_MPM_SMEM_BANK_ST 0xB18 // bank conflicts stalls -#define CSR_MPM_SMEM_BANK_ST_H 0xB98 -// PERF: memory -#define CSR_MPM_DRAM_READS 0xB19 // dram reads -#define CSR_MPM_DRAM_READS_H 0xB99 -#define CSR_MPM_DRAM_WRITES 0xB1A // dram writes -#define CSR_MPM_DRAM_WRITES_H 0xB9A -#define CSR_MPM_DRAM_ST 0xB1B // dram request stalls -#define CSR_MPM_DRAM_ST_H 0xB9B -#define CSR_MPM_DRAM_LAT 0xB1C // dram latency (total) -#define CSR_MPM_DRAM_LAT_H 0xB9C - -// Machine Information Registers -#define CSR_MVENDORID 0xF11 -#define CSR_MARCHID 0xF12 -#define CSR_MIMPID 0xF13 -#define CSR_MHARTID 0xF14 - -// User SIMT CSRs -#define CSR_WTID 0xCC0 -#define CSR_LTID 0xCC1 -#define CSR_GTID 0xCC2 -#define CSR_LWID 0xCC3 -#define CSR_GWID CSR_MHARTID -#define CSR_GCID 0xCC5 - -// Machine SIMT CSRs -#define CSR_NT 0xFC0 -#define CSR_NW 0xFC1 -#define CSR_NC 0xFC2 - -////////// Texture Units ////////////////////////////////////////////////////// - -#define NUM_TEX_UNITS 2 - -#define CSR_TEX_STATES 8 -#define CSR_TEX_BEGIN(x) (0xFD0 + (x) * CSR_TEX_STATES) - -#define CSR_TEX_ADDR(x) (CSR_TEX_BEGIN(x) + 0x00) -#define CSR_TEX_FORMAT(x) (CSR_TEX_BEGIN(x) + 0x01) -#define CSR_TEX_WIDTH(x) (CSR_TEX_BEGIN(x) + 0x02) -#define CSR_TEX_HEIGHT(x) (CSR_TEX_BEGIN(x) + 0x03) -#define CSR_TEX_STRIDE(x) (CSR_TEX_BEGIN(x) + 0x04) -#define CSR_TEX_WRAP_U(x) (CSR_TEX_BEGIN(x) + 0x05) -#define CSR_TEX_WRAP_V(x) (CSR_TEX_BEGIN(x) + 0x06) -#define CSR_TEX_FILTER(x) (CSR_TEX_BEGIN(x) + 0x07) - -// Pipeline Queues //////////////////////////////////////////////////////////// - -// Size of LSU Request Queue -#ifndef LSUQ_SIZE -#define LSUQ_SIZE 8 -#endif - -// Size of FPU Request Queue -#ifndef FPUQ_SIZE -#define FPUQ_SIZE 8 -#endif - -// Icache Configurable Knobs ////////////////////////////////////////////////// - -// Size of cache in bytes -#ifndef ICACHE_SIZE -#define ICACHE_SIZE 16384 -#endif - -// Core Request Queue Size -#ifndef ICREQ_SIZE -#define ICREQ_SIZE 4 -#endif - -// Miss Handling Register Size -#ifndef IMSHR_SIZE -#define IMSHR_SIZE NUM_WARPS -#endif - -// DRAM Request Queue Size -#ifndef IDREQ_SIZE -#define IDREQ_SIZE 4 -#endif - -// DRAM Response Queue Size -#ifndef IDRSQ_SIZE -#define IDRSQ_SIZE 4 -#endif - -// Dcache Configurable Knobs ////////////////////////////////////////////////// - -// Size of cache in bytes -#ifndef DCACHE_SIZE -#define DCACHE_SIZE 16384 -#endif - -// Number of banks -#ifndef DNUM_BANKS -#define DNUM_BANKS NUM_THREADS -#endif - -// Number of bank ports -#ifndef DNUM_PORTS -#define DNUM_PORTS 1 -#endif - -// Core Request Queue Size -#ifndef DCREQ_SIZE -#define DCREQ_SIZE 4 -#endif - -// Miss Handling Register Size -#ifndef DMSHR_SIZE -#define DMSHR_SIZE LSUQ_SIZE -#endif - -// DRAM Request Queue Size -#ifndef DDREQ_SIZE -#define DDREQ_SIZE 4 -#endif - -// DRAM Response Queue Size -#ifndef DDRSQ_SIZE -#define DDRSQ_SIZE MAX(4, (DNUM_BANKS * 2)) -#endif - -// SM Configurable Knobs ////////////////////////////////////////////////////// - -// per thread stack size -#ifndef STACK_SIZE -#define STACK_SIZE 1024 -#endif - -// Size of cache in bytes -#ifndef SMEM_SIZE -#define SMEM_SIZE (STACK_SIZE * NUM_WARPS * NUM_THREADS) -#endif - -// Number of banks -#ifndef SNUM_BANKS -#define SNUM_BANKS NUM_THREADS -#endif - -// Core Request Queue Size -#ifndef SCREQ_SIZE -#define SCREQ_SIZE 4 -#endif - -// L2cache Configurable Knobs ///////////////////////////////////////////////// - -// Size of cache in bytes -#ifndef L2CACHE_SIZE -#define L2CACHE_SIZE 65536 -#endif - -// Number of banks -#ifndef L2NUM_BANKS -#define L2NUM_BANKS MIN(NUM_CORES, 4) -#endif - -// Core Request Queue Size -#ifndef L2CREQ_SIZE -#define L2CREQ_SIZE 4 -#endif - -// Miss Handling Register Size -#ifndef L2MSHR_SIZE -#define L2MSHR_SIZE 16 -#endif - -// DRAM Request Queue Size -#ifndef L2DREQ_SIZE -#define L2DREQ_SIZE 4 -#endif - -// DRAM Response Queue Size -#ifndef L2DRSQ_SIZE -#define L2DRSQ_SIZE MAX(4, (L2NUM_BANKS * 2)) -#endif - -// L3cache Configurable Knobs ///////////////////////////////////////////////// - -// Size of cache in bytes -#ifndef L3CACHE_SIZE -#define L3CACHE_SIZE 131072 -#endif - -// Number of banks -#ifndef L3NUM_BANKS -#define L3NUM_BANKS MIN(NUM_CLUSTERS, 4) -#endif - -// Core Request Queue Size -#ifndef L3CREQ_SIZE -#define L3CREQ_SIZE 4 -#endif - -// Miss Handling Register Size -#ifndef L3MSHR_SIZE -#define L3MSHR_SIZE 16 -#endif - -// DRAM Request Queue Size -#ifndef L3DREQ_SIZE -#define L3DREQ_SIZE 4 -#endif - -// DRAM Response Queue Size -#ifndef L3DRSQ_SIZE -#define L3DRSQ_SIZE MAX(4, (L3NUM_BANKS * 2)) -#endif - -#endif - diff --git a/simX/execute.cpp b/simX/execute.cpp index e8cef37c..43d07fbd 100644 --- a/simX/execute.cpp +++ b/simX/execute.cpp @@ -776,51 +776,46 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) { case FMSUB: case FMNMADD: case FMNMSUB: { - // select FP format - if (core_->get_csr(CSR_FPMODE, t, id_) == 1) { - // CODE - } else { - // multiplicands are infinity and zero, them set FCSR - if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) { + // multiplicands are infinity and zero, them set FCSR + if (fpBinIsZero(rsdata[0]) || fpBinIsZero(rsdata[1]) || fpBinIsInf(rsdata[0]) || fpBinIsInf(rsdata[1])) { + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit + } + if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) { + // if one of op is NaN, if addend is not quiet NaN, them set FCSR + if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) { core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit - core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } - if (fpBinIsNan(rsdata[0]) || fpBinIsNan(rsdata[1]) || fpBinIsNan(rsdata[2])) { - // if one of op is NaN, if addend is not quiet NaN, them set FCSR - if ((fpBinIsNan(rsdata[0])==2) | (fpBinIsNan(rsdata[1])==2) | (fpBinIsNan(rsdata[1])==2)) { - core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit - core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit - } - rddata = 0x7fc00000; // canonical(quiet) NaN - } else { - float rs1 = intregToFloat(rsdata[0]); - float rs2 = intregToFloat(rsdata[1]); - float rs3 = intregToFloat(rsdata[2]); - float fpDest(0.0); - feclearexcept(FE_ALL_EXCEPT); - switch (opcode) { - case FMADD: - // rd = (rs1*rs2)+rs3 - fpDest = (rs1 * rs2) + rs3; break; - case FMSUB: - // rd = (rs1*rs2)-rs3 - fpDest = (rs1 * rs2) - rs3; break; - case FMNMADD: - // rd = -(rs1*rs2)+rs3 - fpDest = -1*(rs1 * rs2) - rs3; break; - case FMNMSUB: - // rd = -(rs1*rs2)-rs3 - fpDest = -1*(rs1 * rs2) + rs3; break; - default: - std::abort(); - break; - } + rddata = 0x7fc00000; // canonical(quiet) NaN + } else { + float rs1 = intregToFloat(rsdata[0]); + float rs2 = intregToFloat(rsdata[1]); + float rs3 = intregToFloat(rsdata[2]); + float fpDest(0.0); + feclearexcept(FE_ALL_EXCEPT); + switch (opcode) { + case FMADD: + // rd = (rs1*rs2)+rs3 + fpDest = (rs1 * rs2) + rs3; break; + case FMSUB: + // rd = (rs1*rs2)-rs3 + fpDest = (rs1 * rs2) - rs3; break; + case FMNMADD: + // rd = -(rs1*rs2)+rs3 + fpDest = -1*(rs1 * rs2) - rs3; break; + case FMNMSUB: + // rd = -(rs1*rs2)-rs3 + fpDest = -1*(rs1 * rs2) + rs3; break; + default: + std::abort(); + break; + } - // update fcsrs - update_fcrs(core_, t, id_); + // update fcsrs + update_fcrs(core_, t, id_); - rddata = floatToBin(fpDest); - } + rddata = floatToBin(fpDest); } } break;