From a10d6fed47de1289a025974e614dcc3acc00f04c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Sep 2020 18:29:09 -0700 Subject: [PATCH 01/19] update Makefile --- benchmarks/opencl/bfs/Makefile | 4 ++-- benchmarks/opencl/convolution/Makefile | 4 ++-- benchmarks/opencl/guassian/Makefile | 4 ++-- benchmarks/opencl/kmeans/Makefile | 4 ++-- benchmarks/opencl/nearn/Makefile | 4 ++-- benchmarks/opencl/saxpy/Makefile | 4 ++-- benchmarks/opencl/sfilter/Makefile | 4 ++-- benchmarks/opencl/sgemm/Makefile | 4 ++-- benchmarks/opencl/transpose/Makefile | 4 ++-- benchmarks/opencl/vecadd/Makefile | 4 ++-- driver/tests/basic/Makefile | 2 +- driver/tests/demo/Makefile | 2 +- driver/tests/dogfood/Makefile | 2 +- miscs/rvvector/vector_test/Makefile | 2 +- runtime/Makefile | 2 +- runtime/tests/dev/Makefile | 2 +- runtime/tests/hello/Makefile | 2 +- runtime/tests/nlTest/Makefile | 2 +- runtime/tests/simple/Makefile | 2 +- runtime/tests/vecadd/Makefile | 2 +- 20 files changed, 30 insertions(+), 30 deletions(-) diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index d62da96a..f11896b3 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/convolution/Makefile b/benchmarks/opencl/convolution/Makefile index 3138e01a..1ec76131 100644 --- a/benchmarks/opencl/convolution/Makefile +++ b/benchmarks/opencl/convolution/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile index a716b790..365430a6 100644 --- a/benchmarks/opencl/guassian/Makefile +++ b/benchmarks/opencl/guassian/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index f8636dbb..2faf1b5c 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/nearn/Makefile b/benchmarks/opencl/nearn/Makefile index 3024f31d..86b5090d 100644 --- a/benchmarks/opencl/nearn/Makefile +++ b/benchmarks/opencl/nearn/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 6f725541..a35ebb9e 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index 3a7f86e6..4dcba1f9 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/sgemm/Makefile b/benchmarks/opencl/sgemm/Makefile index 0f789a28..1c5c63e8 100644 --- a/benchmarks/opencl/sgemm/Makefile +++ b/benchmarks/opencl/sgemm/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/transpose/Makefile b/benchmarks/opencl/transpose/Makefile index d1d8e0fe..deeef42c 100644 --- a/benchmarks/opencl/transpose/Makefile +++ b/benchmarks/opencl/transpose/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/benchmarks/opencl/vecadd/Makefile b/benchmarks/opencl/vecadd/Makefile index bc025b21..4008e857 100644 --- a/benchmarks/opencl/vecadd/Makefile +++ b/benchmarks/opencl/vecadd/Makefile @@ -1,5 +1,5 @@ -LLVM_HOME ?= ~/dev/llvm-project/drops -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +LLVM_HOME ?= /opt/llvm-project/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf POCL_CC_PATH ?= $(realpath ../compiler) diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index 2edd71cf..60b05034 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) +RISCV_TOOLCHAIN_PATH ?= $(wildcard /opt/riscv-gnu-toolchain/drops) VORTEX_RT_PATH ?= $(wildcard ../../../runtime) OPTS ?= -n256 diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index 348e5d11..e8ad3569 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) +RISCV_TOOLCHAIN_PATH ?= $(wildcard /opt/riscv-gnu-toolchain/drops) VORTEX_RT_PATH ?= $(wildcard ../../../runtime) OPTS ?= -n64 diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index b1fcb076..82941be7 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) +RISCV_TOOLCHAIN_PATH ?= $(wildcard /opt/riscv-gnu-toolchain/drops) VORTEX_RT_PATH ?= $(wildcard ../../../runtime) OPTS ?= -n64 diff --git a/miscs/rvvector/vector_test/Makefile b/miscs/rvvector/vector_test/Makefile index 8613dcc8..acf92cea 100644 --- a/miscs/rvvector/vector_test/Makefile +++ b/miscs/rvvector/vector_test/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc diff --git a/runtime/Makefile b/runtime/Makefile index 41127208..6b6dba74 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc AR = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc-ar diff --git a/runtime/tests/dev/Makefile b/runtime/tests/dev/Makefile index f0f4719b..7900af8f 100644 --- a/runtime/tests/dev/Makefile +++ b/runtime/tests/dev/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc diff --git a/runtime/tests/hello/Makefile b/runtime/tests/hello/Makefile index 01b2edd4..b7457b16 100644 --- a/runtime/tests/hello/Makefile +++ b/runtime/tests/hello/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc diff --git a/runtime/tests/nlTest/Makefile b/runtime/tests/nlTest/Makefile index bd863989..22b21318 100644 --- a/runtime/tests/nlTest/Makefile +++ b/runtime/tests/nlTest/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc diff --git a/runtime/tests/simple/Makefile b/runtime/tests/simple/Makefile index b862c64c..b0fa94f2 100644 --- a/runtime/tests/simple/Makefile +++ b/runtime/tests/simple/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc diff --git a/runtime/tests/vecadd/Makefile b/runtime/tests/vecadd/Makefile index b6906eeb..dcc235e0 100644 --- a/runtime/tests/vecadd/Makefile +++ b/runtime/tests/vecadd/Makefile @@ -1,4 +1,4 @@ -RISCV_TOOLCHAIN_PATH ?= ~/dev/riscv-gnu-toolchain/drops +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain/drops VORTEX_RT_PATH ?= $(wildcard ../..) CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc From 4e1007e5b236c86a356797a06d472c39bfd20663 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 3 Oct 2020 18:53:21 -0400 Subject: [PATCH 02/19] scope refactoring --- driver/opae/Makefile | 11 +- driver/opae/vlsim/Makefile | 24 +- driver/opae/vlsim/opae_sim.cpp | 4 + driver/opae/vortex.cpp | 4 +- driver/opae/vx_scope.cpp | 137 +------ driver/rtlsim/Makefile | 2 + driver/tests/basic/kernel.bin | Bin 6548 -> 6548 bytes driver/tests/basic/kernel.dump | 11 +- driver/tests/basic/kernel.elf | Bin 8712 -> 8716 bytes hw/Makefile | 2 +- hw/opae/vortex_afu.sv | 90 ++--- hw/rtl/VX_cluster.v | 27 +- hw/rtl/VX_core.v | 8 +- hw/rtl/VX_define.vh | 8 +- hw/rtl/VX_mem_unit.v | 10 +- hw/rtl/VX_platform.vh | 2 +- hw/rtl/VX_scope.vh | 467 ++++----------------- hw/rtl/Vortex.v | 44 +- hw/rtl/cache/VX_bank.v | 2 +- hw/rtl/cache/VX_cache.v | 6 +- hw/rtl/cache/VX_cache_config.vh | 1 - hw/rtl/cache/VX_cache_miss_resrv.v | 2 +- hw/rtl/cache/VX_snp_forwarder.v | 2 +- hw/rtl/libs/VX_generic_queue.v | 2 +- hw/scripts/gen_synth_configs.py | 48 --- hw/scripts/scope.json | 161 ++++++++ hw/scripts/scope.py | 630 +++++++++++++++++++++++++++++ hw/simulate/Makefile | 2 + 28 files changed, 1014 insertions(+), 693 deletions(-) delete mode 100755 hw/scripts/gen_synth_configs.py create mode 100644 hw/scripts/scope.json create mode 100755 hw/scripts/scope.py diff --git a/driver/opae/Makefile b/driver/opae/Makefile index a2ebdd05..66517af2 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -1,11 +1,14 @@ OPAE_HOME ?= /tools/opae/1.4.0 +#CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw LDFLAGS += -L$(OPAE_HOME)/lib +SCOPE=1 + # stack execution protection LDFLAGS +=-z noexecstack @@ -22,7 +25,11 @@ CXXFLAGS += -fPIC CXXFLAGS += -DDUMP_PERF_STATS # Enable scope analyzer -#CXXFLAGS += -DSCOPE +# Enable scope analyzer +ifdef SCOPE + CXXFLAGS += -DSCOPE + SET_SCOPE = SCOPE=1 +endif LDFLAGS += -shared @@ -64,7 +71,7 @@ vlsim: $(SRCS) opae-vlsim $(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM) opae-vlsim: - $(MAKE) -C vlsim + $(SET_SCOPE) $(MAKE) -C vlsim vortex.o: vortex.cpp $(CXX) $(CXXFLAGS) -c vortex.cpp -o $@ diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 0cae2dfd..dba311b1 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -12,6 +12,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE +DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO @@ -21,7 +23,8 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -#DEBUG=1 +DEBUG=1 +#SCOPE=1 CFLAGS += -fPIC @@ -34,7 +37,7 @@ LDFLAGS += -shared -pthread TOP = vortex_afu_shim -RTL_DIR = ../../../hw/rtl +RTL_DIR=../../../hw/rtl SRCS = fpga.cpp opae_sim.cpp SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp @@ -60,20 +63,27 @@ else CFLAGS += -DNDEBUG endif +# Enable scope analyzer +ifdef SCOPE + VL_FLAGS += -DSCOPE + CFLAGS += -DSCOPE + SCOPE_CFG = scope +endif + VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE -# Enable scope analyzer -#VL_FLAGS += -DSCOPE -#CFLAGS += -DSCOPE - RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip PROJECT = libopae-c-vlsim.so all: $(PROJECT) + +# generate scope data +scope: ../../../hw/scripts/scope.json + ../../../hw/scripts/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl ../../../hw/rtl/scope-defs.vh ../../../hw/scripts/scope.json -$(PROJECT): $(SRCS) +$(PROJECT): $(SRCS) $(SCOPE_CFG) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index dcfbd9f2..9374c7ab 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -135,6 +135,10 @@ void opae_sim::step() { this->sRxPort_bus(); this->sTxPort_bus(); this->avs_bus(); + +#ifndef NDEBUG + fflush(stdout); +#endif } void opae_sim::eval() { diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 41e69baa..6ae578e6 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -509,11 +509,11 @@ extern int vx_start(vx_device_h hdevice) { // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN)); -#ifdef SCOPE +/*#ifdef SCOPE sleep(15); vx_scope_stop(device->fpga, 0); exit(0); -#endif +#endif*/ return 0; } diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index 41709e37..7fb29c9e 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -14,6 +14,9 @@ #include #include "vx_scope.h" #include "vortex_afu.h" +#include "scope-defs.h" + +#define SCOPE_FRAME_WIDTH 1768 #define CHECK_RES(_expr) \ do { \ @@ -28,132 +31,6 @@ #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) -struct scope_signal_t { - int width; - const char* name; -}; - -constexpr int ilog2(int n) { - return (n > 1) ? 1 + ilog2(n >> 1) : 0; -} - -static constexpr int NW_BITS = ilog2(NUM_WARPS); - -#ifdef EXT_F_ENABLE -static constexpr int NR_BITS = ilog2(64); -#else -static constexpr int NR_BITS = ilog2(32); -#endif - -static constexpr int EX_BITS = 3; -static constexpr int OP_BITS = 4; -static constexpr int MOD_BITS = 3; - -static constexpr int ICORE_TAG_WIDTH = NW_BITS; -static constexpr int DCORE_TAG_WIDTH = ilog2(LSUQ_SIZE); - -static constexpr scope_signal_t scope_signals[] = { - - { 32, "dram_req_addr" }, - { 1, "dram_req_rw" }, - { 16, "dram_req_byteen" }, - { 128, "dram_req_data" }, - { 29, "dram_req_tag" }, - { 128, "dram_rsp_data" }, - { 29, "dram_rsp_tag" }, - - { 32, "snp_req_addr" }, - { 1, "snp_req_invalidate" }, - { 16, "snp_req_tag" }, - { 16, "snp_rsp_tag" }, - - { NW_BITS, "icache_req_wid" }, - { 32, "icache_req_addr" }, - { ICORE_TAG_WIDTH, "icache_req_tag" }, - { 32, "icache_rsp_data" }, - { ICORE_TAG_WIDTH, "icache_rsp_tag" }, - - { NW_BITS, "dcache_req_wid" }, - { 32, "dcache_req_pc" }, - { NUM_THREADS * 32, "dcache_req_addr" }, - { 1, "dcache_req_rw" }, - { NUM_THREADS * 4, "dcache_req_byteen" }, - { NUM_THREADS * 32, "dcache_req_data" }, - { DCORE_TAG_WIDTH, "dcache_req_tag" }, - { NUM_THREADS * 32, "dcache_rsp_data" }, - { DCORE_TAG_WIDTH, "dcache_rsp_tag" }, - - { NW_BITS, "issue_wid" }, - { NUM_THREADS, "issue_tmask" }, - { 32, "issue_pc" }, - { EX_BITS, "issue_ex_type" }, - { OP_BITS, "issue_op_type" }, - { MOD_BITS, "issue_op_mod" }, - { 1, "issue_wb" }, - { NR_BITS, "issue_rd" }, - { NR_BITS, "issue_rs1" }, - { NR_BITS, "issue_rs2" }, - { NR_BITS, "issue_rs3" }, - { 32, "issue_imm" }, - { 1, "issue_rs1_is_pc" }, - { 1, "issue_rs2_is_imm" }, - - { NW_BITS, "gpr_rsp_wid" }, - { 32, "gpr_rsp_pc" }, - { NUM_THREADS * 32, "gpr_rsp_a" }, - { NUM_THREADS * 32, "gpr_rsp_b" }, - { NUM_THREADS * 32, "gpr_rsp_c" }, - - { NW_BITS, "writeback_wid" }, - { 32, "writeback_pc" }, - { NR_BITS, "writeback_rd" }, - { NUM_THREADS * 32, "writeback_data" }, - - { 32, "bank_addr_st0" }, - { 32, "bank_addr_st1" }, - { 32, "bank_addr_st2" }, - { 1, "scope_bank_is_mrvq_st1" }, - { 1, "scope_bank_miss_st1" }, - { 1, "scope_bank_dirty_st1" }, - { 1, "scope_bank_force_miss_st1" }, - - /////////////////////////////////////////////////////////////////////////// - - { 1, "dram_req_valid" }, - { 1, "dram_req_ready" }, - { 1, "dram_rsp_valid" }, - { 1, "dram_rsp_ready" }, - - { 1, "snp_req_valid" }, - { 1, "snp_req_ready" }, - { 1, "snp_rsp_valid" }, - { 1, "snp_rsp_ready" }, - - { 1, "icache_req_valid" }, - { 1, "icache_req_ready" }, - { 1, "icache_rsp_valid" }, - { 1, "icache_rsp_ready" }, - - { NUM_THREADS, "dcache_req_valid" }, - { 1, "dcache_req_ready" }, - { NUM_THREADS, "dcache_rsp_valid" }, - { 1, "dcache_rsp_ready" }, - - { 1, "bank_valid_st0" }, - { 1, "bank_valid_st1" }, - { 1, "bank_valid_st2" }, - { 1, "bank_stall_pipe" }, - - { 1, "issue_valid" }, - { 1, "issue_ready" }, - { 1, "gpr_rsp_valid" }, - { 1, "writeback_valid" }, - { 1, "scoreboard_delay" }, - { 1, "gpr_delay" }, - { 1, "execute_delay" }, - { 1, "busy" }, -}; - static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); constexpr int calcFrameWidth(int index = 0) { @@ -161,7 +38,6 @@ constexpr int calcFrameWidth(int index = 0) { } static constexpr int fwidth = calcFrameWidth(); -static_assert(fwidth == 1766, "invalid size"); int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) @@ -190,9 +66,14 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { std::ofstream ofs("vx_scope.vcd"); + ofs << "$version Generated by Vortex Scope $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; ofs << "$var reg 1 0 clk $end" << std::endl; + for (int i = 0; i < num_signals; ++i) { + ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; + } + ofs << "enddefinitions $end" << std::endl; uint64_t frame_width, max_frames, data_valid; @@ -237,7 +118,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "b1 0" << std::endl; uint64_t delta; - fpga_result res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta); + auto res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta); assert(res == FPGA_OK); while (delta != 0) { diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 1f0c36cf..4db05ba8 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -12,6 +12,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE +DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 87fd4c9319cce2796c7028281b4b3d3c2aa0a983..21381a65b9ac4d50d31905f6789533a8d835e678 100755 GIT binary patch delta 20 acmbPYJjHmzJ0=CDi68QS delta 20 acmbPYJjHmzJ0^#Z6F=ku$;}CjCK3Q+LkPkE diff --git a/driver/tests/basic/kernel.dump b/driver/tests/basic/kernel.dump index f5e2da2a..76eccc8c 100644 --- a/driver/tests/basic/kernel.dump +++ b/driver/tests/basic/kernel.dump @@ -74,7 +74,7 @@ Disassembly of section .text: 800000e0: 0005006b 0x5006b 800000e4: 00002197 auipc gp,0x2 800000e8: c8418193 addi gp,gp,-892 # 80001d68 <__global_pointer$> -800000ec: f14025f3 csrr a1,mhartid +800000ec: 022025f3 csrr a1,0x22 800000f0: 00a59593 slli a1,a1,0xa 800000f4: 02002673 csrr a2,0x20 800000f8: 00261613 slli a2,a2,0x2 @@ -122,7 +122,7 @@ Disassembly of section .text: 80000158: 00008067 ret 8000015c : -8000015c: f1402573 csrr a0,mhartid +8000015c: 02202573 csrr a0,0x22 80000160: 00008067 ret 80000164 : @@ -458,13 +458,12 @@ Disassembly of section .comment: Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2041 jal 80 <_start-0x7fffff80> + 0: 2541 jal 680 <_start-0x7ffff980> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 0016 c.slli zero,0x5 - e: 0000 unimp + c: 0000001b 0x1b 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) @@ -473,4 +472,4 @@ Disassembly of section .riscv.attributes: 1a: 5f30 lw a2,120(a4) 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e> 1e: 3070 fld fa2,224(s0) - ... + 20: 665f 7032 0030 0x307032665f diff --git a/driver/tests/basic/kernel.elf b/driver/tests/basic/kernel.elf index 90887d0ed7a47f9ad422bd270035d015443a3730..31205987b97b455373ea6360078436172b98db46 100755 GIT binary patch delta 100 zcmeBh>2aB$z#5{!z`(Ro^`rok0@LIV0(n4cbAq57hl(l#14B_}adH_0qco7oBEVWy xW^9ycRA3OF3nJ2h#KcC)&G#kevNNhpu2ocKJTrNrqB`T8$tQv2x5*zB#Q_`p9L4|u delta 99 zcmeBi>2R5#z#5>yz`(Fk^`rok!^g=V1oD8?<^(}E4tbDxQD$*+83UsjkjWyzT2y9i ylxb985T6SqCN@fKek?hcol$XeqoOk7iOEY9)fs0@KC7tC`bnOFA$sye1z`Z6YaLMl diff --git a/hw/Makefile b/hw/Makefile index e4e79593..635b4d15 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -5,5 +5,5 @@ build_config: $(MAKE) -C simulate clean: - rm ./rtl/VX_user_config.vh ./VX_config.h + rm -f ./rtl/VX_user_config.vh ./VX_config.h $(MAKE) -C simulate clean \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 23ce2ea9..36cf7ca2 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -178,7 +178,7 @@ logic [31:0] cmd_csr_wdata; t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); `IGNORE_WARNINGS_END -`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, "Oops!") +`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) t_if_ccip_c2_Tx mmio_tx; assign af2cp_sTxPort.c2 = mmio_tx; @@ -221,54 +221,54 @@ begin MMIO_IO_ADDR: begin cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); `endif end MMIO_MEM_ADDR: begin cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data)); `endif end MMIO_DATA_SIZE: begin cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_DATA_SIZE: %0d", $time, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); `endif end MMIO_CMD_TYPE: begin `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CMD_TYPE: %0d", $time, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_WRITE: %0h", $time, 64'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)); `endif end `endif MMIO_CSR_CORE: begin cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_CORE: %0h", $time, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); `endif end MMIO_CSR_ADDR: begin cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_ADDR: %0h", $time, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); `endif end MMIO_CSR_DATA: begin cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_DATA: %0h", $time, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); `endif end default: begin `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_WR: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); `endif end endcase @@ -297,27 +297,27 @@ begin mmio_tx.data <= 64'(state); `ifdef DBG_PRINT_OPAE if (state != state_t'(mmio_tx.data)) begin - $display("%t: MMIO_STATUS: state=%0d", $time, state); + $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); end `endif end MMIO_CSR_READ: begin mmio_tx.data <= 64'(cmd_csr_rdata); `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_READ: data=%0h", $time, cmd_csr_rdata); + $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); `endif end `ifdef SCOPE MMIO_SCOPE_READ: begin mmio_tx.data <= cmd_scope_rdata; `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_READ: data=%0h", $time, cmd_scope_rdata); + $display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata); `endif end `endif default: begin `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_RD: addr=%0h", $time, mmio_hdr.address); + $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); `endif end endcase @@ -946,11 +946,15 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_SIGNALS_ISTAGE_TOP_BIND + `SCOPE_SIGNALS_LSU_TOP_BIND + `SCOPE_SIGNALS_BANK_L3_TOP_BIND + `SCOPE_SIGNALS_BANK_L2_TOP_BIND + `SCOPE_SIGNALS_BANK_L1D_TOP_BIND + `SCOPE_SIGNALS_BANK_L1I_TOP_BIND + `SCOPE_SIGNALS_BANK_L1S_TOP_BIND + `SCOPE_SIGNALS_ISSUE_TOP_BIND + `SCOPE_SIGNALS_EXECUTE_TOP_BIND .clk (clk), .reset (reset | vx_reset), @@ -1026,10 +1030,7 @@ end `ifdef SCOPE -localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}); -localparam SCOPE_SR_DEPTH = 2; - -`STATIC_ASSERT(SCOPE_DATAW == 1766, "invalid size") +localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}); `SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid); `SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); @@ -1060,45 +1061,10 @@ localparam SCOPE_SR_DEPTH = 2; `SCOPE_ASSIGN (scope_busy, vx_busy); -wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready) - || (scope_icache_rsp_valid && scope_icache_rsp_ready) - || ((| scope_dcache_req_valid) && scope_dcache_req_ready) - || ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready) - || (scope_dram_req_valid && scope_dram_req_ready) - || (scope_dram_rsp_valid && scope_dram_rsp_ready) - || (scope_snp_req_valid && scope_snp_req_ready) - || (scope_snp_rsp_valid && scope_snp_rsp_ready) - || (scope_issue_valid && scope_issue_ready) - || scope_gpr_rsp_valid - || scope_bank_valid_st0 - || scope_bank_valid_st1 - || scope_bank_valid_st2 - || scope_bank_stall_pipe - || scope_scoreboard_delay - || scope_gpr_delay - || scope_execute_delay - || scope_busy; +wire scope_changed = `SCOPE_TRIGGERS; wire scope_start = vx_reset; -wire [SCOPE_DATAW+1:0] scope_data_in_st[SCOPE_SR_DEPTH-1:0]; -wire [SCOPE_DATAW+1:0] scope_data_in_ste; -assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start}; -assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1]; - -for (genvar i = 1; i < SCOPE_SR_DEPTH; i++) begin - VX_generic_register #( - .N (SCOPE_DATAW+2) - ) scope_sr ( - .clk (clk), - .reset (reset), - .stall (0), - .flush (0), - .in (scope_data_in_st[i-1]), - .out (scope_data_in_st[i]) - ); -end - VX_scope #( .DATAW (SCOPE_DATAW), .BUSW (64), @@ -1107,10 +1073,10 @@ VX_scope #( ) scope ( .clk (clk), .reset (reset), - .start (scope_data_in_ste[0]), + .start (scope_start), .stop (0), - .changed (scope_data_in_ste[1]), - .data_in (scope_data_in_ste[SCOPE_DATAW+1:2]), + .changed (scope_changed), + .data_in ({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}), .bus_in (cmd_scope_wdata), .bus_out (cmd_scope_rdata), .bus_read (cmd_scope_read), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 47505cbc..d3b75b68 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -3,11 +3,14 @@ module VX_cluster #( parameter CLUSTER_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CACHE_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_SIGNALS_ISTAGE_CLUSTER_IO + `SCOPE_SIGNALS_LSU_CLUSTER_IO + `SCOPE_SIGNALS_BANK_L2_CLUSTER_IO + `SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO + `SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO + `SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO + `SCOPE_SIGNALS_ISSUE_CLUSTER_IO + `SCOPE_SIGNALS_EXECUTE_CLUSTER_IO // Clock input wire clk, @@ -138,11 +141,13 @@ module VX_cluster #( VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_SIGNALS_ISTAGE_SELECT(i) + `SCOPE_SIGNALS_LSU_SELECT(i) + `SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(i) + `SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(i) + `SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(i) + `SCOPE_SIGNALS_ISSUE_SELECT(i) + `SCOPE_SIGNALS_EXECUTE_SELECT(i) .clk (clk), .reset (reset), @@ -380,7 +385,7 @@ module VX_cluster #( .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) ) l2cache ( - `SCOPE_SIGNALS_CACHE_UNBIND + `SCOPE_SIGNALS_BANK_L2_CACHE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index f9582daf..91e0a6ac 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -5,7 +5,9 @@ module VX_core #( ) ( `SCOPE_SIGNALS_ISTAGE_IO `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CACHE_IO + `SCOPE_SIGNALS_BANK_L1D_CORE_IO + `SCOPE_SIGNALS_BANK_L1I_CORE_IO + `SCOPE_SIGNALS_BANK_L1S_CORE_IO `SCOPE_SIGNALS_ISSUE_IO `SCOPE_SIGNALS_EXECUTE_IO @@ -258,7 +260,9 @@ module VX_core #( VX_mem_unit #( .CORE_ID(CORE_ID) ) mem_unit ( - `SCOPE_SIGNALS_CACHE_BIND + `SCOPE_SIGNALS_BANK_L1D_CORE_BIND + `SCOPE_SIGNALS_BANK_L1I_CORE_BIND + `SCOPE_SIGNALS_BANK_L1S_CORE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index dcb7e8b7..a7a2e0ef 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -248,7 +248,7 @@ ////////////////////////// Dcache Configurable Knobs ////////////////////////// // Cache ID -`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0) +`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0 // TAG sharing enable `define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE) @@ -277,7 +277,7 @@ ////////////////////////// Icache Configurable Knobs ////////////////////////// // Cache ID -`define ICACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 1) +`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1 // Core request address bits `define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE)) @@ -309,7 +309,7 @@ ////////////////////////// SM Configurable Knobs ////////////////////////////// // Cache ID -`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 2) +`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2 // Number of Word requests per cycle {1, 2, 4, 8, ...} `define SNUM_REQUESTS `NUM_THREADS @@ -326,7 +326,7 @@ ////////////////////////// L2cache Configurable Knobs ///////////////////////// // Cache ID -`define L2CACHE_ID (`L3_ENABLE ? 1 : 0) +`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID // Core request tag bits `define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 5d5c7928..9cd29a1a 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -3,7 +3,9 @@ module VX_mem_unit # ( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_CACHE_IO + `SCOPE_SIGNALS_BANK_L1D_CORE_IO + `SCOPE_SIGNALS_BANK_L1I_CORE_IO + `SCOPE_SIGNALS_BANK_L1S_CORE_IO input wire clk, input wire reset, @@ -77,7 +79,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) ) smem ( - `SCOPE_SIGNALS_CACHE_UNBIND + `SCOPE_SIGNALS_BANK_L1S_CACHE_BIND .clk (clk), .reset (reset), @@ -159,7 +161,7 @@ module VX_mem_unit # ( .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) dcache ( - `SCOPE_SIGNALS_CACHE_BIND + `SCOPE_SIGNALS_BANK_L1D_CACHE_BIND .clk (clk), .reset (reset), @@ -240,7 +242,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) ) icache ( - `SCOPE_SIGNALS_CACHE_UNBIND + `SCOPE_SIGNALS_BANK_L1I_CACHE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 3f158408..5c0c4e63 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -41,7 +41,7 @@ `define STATIC_ASSERT(cond, msg) \ generate \ - if (!(cond)) $error(msg); \ + if (!(cond)) $error msg; \ endgenerate `define ENABLE_TRACING /* verilator tracing_on */ diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index fa595c56..15b6fb1a 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,400 +1,89 @@ + `ifndef VX_SCOPE `define VX_SCOPE `ifdef SCOPE -`define SCOPE_SIGNALS_DATA_LIST \ - scope_dram_req_addr, \ - scope_dram_req_rw, \ - scope_dram_req_byteen, \ - scope_dram_req_data, \ - scope_dram_req_tag, \ - scope_dram_rsp_data, \ - scope_dram_rsp_tag, \ - scope_snp_req_addr, \ - scope_snp_req_invalidate, \ - scope_snp_req_tag, \ - scope_snp_rsp_tag, \ - scope_icache_req_wid, \ - scope_icache_req_addr, \ - scope_icache_req_tag, \ - scope_icache_rsp_data, \ - scope_icache_rsp_tag, \ - scope_dcache_req_wid, \ - scope_dcache_req_pc, \ - scope_dcache_req_addr, \ - scope_dcache_req_rw, \ - scope_dcache_req_byteen, \ - scope_dcache_req_data, \ - scope_dcache_req_tag, \ - scope_dcache_rsp_data, \ - scope_dcache_rsp_tag, \ - scope_issue_wid, \ - scope_issue_tmask, \ - scope_issue_pc, \ - scope_issue_ex_type, \ - scope_issue_op_type, \ - scope_issue_op_mod, \ - scope_issue_wb, \ - scope_issue_rd, \ - scope_issue_rs1, \ - scope_issue_rs2, \ - scope_issue_rs3, \ - scope_issue_imm, \ - scope_issue_rs1_is_pc, \ - scope_issue_rs2_is_imm, \ - scope_gpr_rsp_wid, \ - scope_gpr_rsp_pc, \ - scope_gpr_rsp_a, \ - scope_gpr_rsp_b, \ - scope_gpr_rsp_c, \ - scope_writeback_wid, \ - scope_writeback_pc, \ - scope_writeback_rd, \ - scope_writeback_data, \ - scope_bank_addr_st0, \ - scope_bank_addr_st1, \ - scope_bank_addr_st2, \ - scope_bank_is_mrvq_st1, \ - scope_bank_miss_st1, \ - scope_bank_dirty_st1, \ - scope_bank_force_miss_st1, - - `define SCOPE_SIGNALS_UPD_LIST \ - scope_dram_req_valid, \ - scope_dram_req_ready, \ - scope_dram_rsp_valid, \ - scope_dram_rsp_ready, \ - scope_snp_req_valid, \ - scope_snp_req_ready, \ - scope_snp_rsp_valid, \ - scope_snp_rsp_ready, \ - scope_icache_req_valid, \ - scope_icache_req_ready, \ - scope_icache_rsp_valid, \ - scope_icache_rsp_ready, \ - scope_dcache_req_valid, \ - scope_dcache_req_ready, \ - scope_dcache_rsp_valid, \ - scope_dcache_rsp_ready, \ - scope_bank_valid_st0, \ - scope_bank_valid_st1, \ - scope_bank_valid_st2, \ - scope_bank_stall_pipe, \ - scope_issue_valid, \ - scope_issue_ready, \ - scope_gpr_rsp_valid, \ - scope_writeback_valid, \ - scope_scoreboard_delay, \ - scope_gpr_delay, \ - scope_execute_delay, \ - scope_busy +`include "scope-defs.vh" - `define SCOPE_SIGNALS_DECL \ - wire scope_dram_req_valid; \ - wire [31:0] scope_dram_req_addr; \ - wire scope_dram_req_rw; \ - wire [15:0] scope_dram_req_byteen; \ - wire [127:0] scope_dram_req_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ - wire scope_dram_req_ready; \ - wire scope_dram_rsp_valid; \ - wire [127:0] scope_dram_rsp_data; \ - wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ - wire scope_dram_rsp_ready; \ - wire scope_snp_req_valid; \ - wire [31:0] scope_snp_req_addr; \ - wire scope_snp_req_invalidate; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ - wire scope_snp_req_ready; \ - wire scope_snp_rsp_valid; \ - wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ - wire scope_icache_req_valid; \ - wire [`NW_BITS-1:0] scope_icache_req_wid; \ - wire [31:0] scope_icache_req_addr; \ - wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag; \ - wire scope_icache_req_ready; \ - wire scope_icache_rsp_valid; \ - wire [31:0] scope_icache_rsp_data; \ - wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag; \ - wire scope_icache_rsp_ready; \ - wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \ - wire [`NW_BITS-1:0] scope_dcache_req_wid; \ - wire [31:0] scope_dcache_req_pc; \ - wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr; \ - wire scope_dcache_req_rw; \ - wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen; \ - wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data; \ - wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag; \ - wire scope_dcache_req_ready; \ - wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \ - wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data; \ - wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag; \ - wire scope_dcache_rsp_ready; \ - wire scope_snp_rsp_ready; \ - wire [`NW_BITS-1:0] scope_issue_wid; \ - wire [`NUM_THREADS-1:0] scope_issue_tmask; \ - wire [31:0] scope_issue_pc; \ - wire [`EX_BITS-1:0] scope_issue_ex_type; \ - wire [`OP_BITS-1:0] scope_issue_op_type; \ - wire [`MOD_BITS-1:0] scope_issue_op_mod; \ - wire scope_issue_wb; \ - wire [`NR_BITS-1:0] scope_issue_rd; \ - wire [`NR_BITS-1:0] scope_issue_rs1; \ - wire [`NR_BITS-1:0] scope_issue_rs2; \ - wire [`NR_BITS-1:0] scope_issue_rs3; \ - wire [31:0] scope_issue_imm; \ - wire scope_issue_rs1_is_pc; \ - wire scope_issue_rs2_is_imm; \ - wire scope_gpr_rsp_valid; \ - wire [`NW_BITS-1:0] scope_gpr_rsp_wid; \ - wire [31:0] scope_gpr_rsp_pc; \ - wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a; \ - wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b; \ - wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c; \ - wire scope_writeback_valid; \ - wire [`NW_BITS-1:0] scope_writeback_wid; \ - wire [31:0] scope_writeback_pc; \ - wire [`NR_BITS-1:0] scope_writeback_rd; \ - wire [`NUM_THREADS-1:0][31:0] scope_writeback_data; \ - wire scope_bank_valid_st0; \ - wire scope_bank_valid_st1; \ - wire scope_bank_valid_st2; \ - wire [31:0] scope_bank_addr_st0; \ - wire [31:0] scope_bank_addr_st1; \ - wire [31:0] scope_bank_addr_st2; \ - wire scope_bank_is_mrvq_st1; \ - wire scope_bank_miss_st1; \ - wire scope_bank_dirty_st1; \ - wire scope_bank_force_miss_st1; \ - wire scope_bank_stall_pipe; \ - wire scope_issue_valid; \ - wire scope_issue_ready; \ - wire scope_scoreboard_delay; \ - wire scope_gpr_delay; \ - wire scope_execute_delay; \ - wire scope_busy; +`define SCOPE_ASSIGN(d,s) assign d = s - `define SCOPE_SIGNALS_ISTAGE_IO \ - output wire scope_icache_req_valid, \ - output wire [`NW_BITS-1:0] scope_icache_req_wid, \ - output wire [31:0] scope_icache_req_addr, \ - output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag, \ - output wire scope_icache_req_ready, \ - output wire scope_icache_rsp_valid, \ - output wire [31:0] scope_icache_rsp_data, \ - output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag, \ - output wire scope_icache_rsp_ready, - - `define SCOPE_SIGNALS_LSU_IO \ - output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \ - output wire [`NW_BITS-1:0] scope_dcache_req_wid, \ - output wire [31:0] scope_dcache_req_pc, \ - output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr, \ - output wire scope_dcache_req_rw, \ - output wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen, \ - output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data, \ - output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag, \ - output wire scope_dcache_req_ready, \ - output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \ - output wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data, \ - output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag, \ - output wire scope_dcache_rsp_ready, - - `define SCOPE_SIGNALS_CACHE_IO \ - output wire scope_bank_valid_st0, \ - output wire scope_bank_valid_st1, \ - output wire scope_bank_valid_st2, \ - output wire [31:0] scope_bank_addr_st0, \ - output wire [31:0] scope_bank_addr_st1, \ - output wire [31:0] scope_bank_addr_st2, \ - output wire scope_bank_is_mrvq_st1, \ - output wire scope_bank_miss_st1, \ - output wire scope_bank_dirty_st1, \ - output wire scope_bank_force_miss_st1, \ - output wire scope_bank_stall_pipe, - - `define SCOPE_SIGNALS_ISSUE_IO \ - output wire scope_issue_valid, \ - output wire [`NW_BITS-1:0] scope_issue_wid, \ - output wire [`NUM_THREADS-1:0] scope_issue_tmask, \ - output wire [31:0] scope_issue_pc, \ - output wire [`EX_BITS-1:0] scope_issue_ex_type, \ - output wire [`OP_BITS-1:0] scope_issue_op_type, \ - output wire [`MOD_BITS-1:0] scope_issue_op_mod, \ - output wire scope_issue_wb, \ - output wire [`NR_BITS-1:0] scope_issue_rd, \ - output wire [`NR_BITS-1:0] scope_issue_rs1, \ - output wire [`NR_BITS-1:0] scope_issue_rs2, \ - output wire [`NR_BITS-1:0] scope_issue_rs3, \ - output wire [31:0] scope_issue_imm, \ - output wire scope_issue_rs1_is_pc, \ - output wire scope_issue_rs2_is_imm, \ - output wire scope_writeback_valid, \ - output wire scope_gpr_rsp_valid, \ - output wire [`NW_BITS-1:0] scope_gpr_rsp_wid, \ - output wire [31:0] scope_gpr_rsp_pc, \ - output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a, \ - output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b, \ - output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c, \ - output wire [`NW_BITS-1:0] scope_writeback_wid, \ - output wire [31:0] scope_writeback_pc, \ - output wire [`NR_BITS-1:0] scope_writeback_rd, \ - output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, \ - output wire scope_issue_ready, \ - output wire scope_scoreboard_delay, \ - output wire scope_gpr_delay, \ - output wire scope_execute_delay, - - `define SCOPE_SIGNALS_EXECUTE_IO - - `define SCOPE_SIGNALS_ISTAGE_BIND \ - .scope_icache_req_valid (scope_icache_req_valid), \ - .scope_icache_req_wid (scope_icache_req_wid), \ - .scope_icache_req_addr (scope_icache_req_addr), \ - .scope_icache_req_tag (scope_icache_req_tag), \ - .scope_icache_req_ready (scope_icache_req_ready), \ - .scope_icache_rsp_valid (scope_icache_rsp_valid), \ - .scope_icache_rsp_data (scope_icache_rsp_data), \ - .scope_icache_rsp_tag (scope_icache_rsp_tag), \ - .scope_icache_rsp_ready (scope_icache_rsp_ready), - - `define SCOPE_SIGNALS_LSU_BIND \ - .scope_dcache_req_valid (scope_dcache_req_valid), \ - .scope_dcache_req_wid (scope_dcache_req_wid), \ - .scope_dcache_req_pc (scope_dcache_req_pc), \ - .scope_dcache_req_addr (scope_dcache_req_addr), \ - .scope_dcache_req_rw (scope_dcache_req_rw), \ - .scope_dcache_req_byteen(scope_dcache_req_byteen), \ - .scope_dcache_req_data (scope_dcache_req_data), \ - .scope_dcache_req_tag (scope_dcache_req_tag), \ - .scope_dcache_req_ready (scope_dcache_req_ready), \ - .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ - .scope_dcache_rsp_data (scope_dcache_rsp_data), \ - .scope_dcache_rsp_tag (scope_dcache_rsp_tag), \ - .scope_dcache_rsp_ready (scope_dcache_rsp_ready), - - `define SCOPE_SIGNALS_CACHE_BIND \ - .scope_bank_valid_st0 (scope_bank_valid_st0), \ - .scope_bank_valid_st1 (scope_bank_valid_st1), \ - .scope_bank_valid_st2 (scope_bank_valid_st2), \ - .scope_bank_addr_st0 (scope_bank_addr_st0), \ - .scope_bank_addr_st1 (scope_bank_addr_st1), \ - .scope_bank_addr_st2 (scope_bank_addr_st2), \ - .scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \ - .scope_bank_miss_st1 (scope_bank_miss_st1), \ - .scope_bank_dirty_st1 (scope_bank_dirty_st1), \ - .scope_bank_force_miss_st1(scope_bank_force_miss_st1), \ - .scope_bank_stall_pipe (scope_bank_stall_pipe), - - `define SCOPE_SIGNALS_CACHE_UNBIND \ - /* verilator lint_off PINCONNECTEMPTY */ \ - .scope_bank_valid_st0 (), \ - .scope_bank_valid_st1 (), \ - .scope_bank_valid_st2 (), \ - .scope_bank_addr_st0 (), \ - .scope_bank_addr_st1 (), \ - .scope_bank_addr_st2 (), \ - .scope_bank_is_mrvq_st1 (), \ - .scope_bank_miss_st1 (), \ - .scope_bank_dirty_st1 (), \ - .scope_bank_force_miss_st1 (), \ - .scope_bank_stall_pipe (), \ - /* verilator lint_on PINCONNECTEMPTY */ - - `define SCOPE_SIGNALS_CACHE_BANK_SELECT \ - /* verilator lint_off UNUSED */ \ - wire [NUM_BANKS-1:0] scope_per_bank_valid_st0; \ - wire [NUM_BANKS-1:0] scope_per_bank_valid_st1; \ - wire [NUM_BANKS-1:0] scope_per_bank_valid_st2; \ - wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st0; \ - wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st1; \ - wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st2; \ - wire [NUM_BANKS-1:0] scope_per_bank_is_mrvq_st1; \ - wire [NUM_BANKS-1:0] scope_per_bank_miss_st1; \ - wire [NUM_BANKS-1:0] scope_per_bank_dirty_st1; \ - wire [NUM_BANKS-1:0] scope_per_bank_force_miss_st1; \ - wire [NUM_BANKS-1:0] scope_per_bank_stall_pipe; \ - /* verilator lint_on UNUSED */ \ - assign scope_bank_valid_st0 = scope_per_bank_valid_st0[0]; \ - assign scope_bank_valid_st1 = scope_per_bank_valid_st1[0]; \ - assign scope_bank_valid_st2 = scope_per_bank_valid_st2[0]; \ - assign scope_bank_addr_st0 = scope_per_bank_addr_st0[0]; \ - assign scope_bank_addr_st1 = scope_per_bank_addr_st1[0]; \ - assign scope_bank_addr_st2 = scope_per_bank_addr_st2[0]; \ - assign scope_bank_is_mrvq_st1 = scope_per_bank_is_mrvq_st1[0]; \ - assign scope_bank_miss_st1 = scope_per_bank_miss_st1[0]; \ - assign scope_bank_dirty_st1 = scope_per_bank_dirty_st1[0]; \ - assign scope_bank_force_miss_st1 = scope_per_bank_force_miss_st1[0]; \ - assign scope_bank_stall_pipe = scope_per_bank_stall_pipe[0]; - - `define SCOPE_SIGNALS_CACHE_BANK_BIND \ - .scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \ - .scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \ - .scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \ - .scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \ - .scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \ - .scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \ - .scope_bank_is_mrvq_st1 (scope_per_bank_is_mrvq_st1[i]), \ - .scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \ - .scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \ - .scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \ - .scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]), - - `define SCOPE_SIGNALS_ISSUE_BIND \ - .scope_issue_valid (scope_issue_valid), \ - .scope_issue_wid (scope_issue_wid), \ - .scope_issue_tmask (scope_issue_tmask), \ - .scope_issue_pc (scope_issue_pc), \ - .scope_issue_ex_type (scope_issue_ex_type), \ - .scope_issue_op_type (scope_issue_op_type), \ - .scope_issue_op_mod (scope_issue_op_mod), \ - .scope_issue_wb (scope_issue_wb), \ - .scope_issue_rd (scope_issue_rd), \ - .scope_issue_rs1 (scope_issue_rs1), \ - .scope_issue_rs2 (scope_issue_rs2), \ - .scope_issue_rs3 (scope_issue_rs3), \ - .scope_issue_imm (scope_issue_imm), \ - .scope_issue_rs1_is_pc (scope_issue_rs1_is_pc), \ - .scope_issue_rs2_is_imm (scope_issue_rs2_is_imm), \ - .scope_writeback_valid (scope_writeback_valid), \ - .scope_writeback_wid (scope_writeback_wid), \ - .scope_writeback_pc (scope_writeback_pc), \ - .scope_writeback_rd (scope_writeback_rd), \ - .scope_writeback_data (scope_writeback_data), \ - .scope_issue_ready (scope_issue_ready), \ - .scope_gpr_rsp_valid (scope_gpr_rsp_valid), \ - .scope_gpr_rsp_wid (scope_gpr_rsp_wid), \ - .scope_gpr_rsp_pc (scope_gpr_rsp_pc), \ - .scope_gpr_rsp_a (scope_gpr_rsp_a), \ - .scope_gpr_rsp_b (scope_gpr_rsp_b), \ - .scope_gpr_rsp_c (scope_gpr_rsp_c), \ - .scope_scoreboard_delay (scope_scoreboard_delay), \ - .scope_gpr_delay (scope_gpr_delay), \ - .scope_execute_delay (scope_execute_delay), \ - - `define SCOPE_SIGNALS_EXECUTE_BIND - - `define SCOPE_ASSIGN(d,s) assign d = s `else - `define SCOPE_SIGNALS_ISTAGE_IO - `define SCOPE_SIGNALS_LSU_IO - `define SCOPE_SIGNALS_CACHE_IO - `define SCOPE_SIGNALS_ISSUE_IO - `define SCOPE_SIGNALS_EXECUTE_IO - `define SCOPE_SIGNALS_ISTAGE_BIND - `define SCOPE_SIGNALS_LSU_BIND - `define SCOPE_SIGNALS_CACHE_BIND - `define SCOPE_SIGNALS_ISSUE_BIND - `define SCOPE_SIGNALS_EXECUTE_BIND - - `define SCOPE_SIGNALS_CACHE_UNBIND - `define SCOPE_SIGNALS_CACHE_BANK_SELECT - `define SCOPE_SIGNALS_CACHE_BANK_BIND - - `define SCOPE_ASSIGN(d,s) +`define SCOPE_SIGNALS_ISTAGE_TOP_IO +`define SCOPE_SIGNALS_ISTAGE_TOP_BIND +`define SCOPE_SIGNALS_ISTAGE_CLUSTER_IO +`define SCOPE_SIGNALS_ISTAGE_CLUSTER_BIND +`define SCOPE_SIGNALS_ISTAGE_IO +`define SCOPE_SIGNALS_ISTAGE_BIND +`define SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_ISTAGE_SELECT(__i__) +`define SCOPE_SIGNALS_LSU_TOP_IO +`define SCOPE_SIGNALS_LSU_TOP_BIND +`define SCOPE_SIGNALS_LSU_CLUSTER_IO +`define SCOPE_SIGNALS_LSU_CLUSTER_BIND +`define SCOPE_SIGNALS_LSU_IO +`define SCOPE_SIGNALS_LSU_BIND +`define SCOPE_SIGNALS_LSU_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_LSU_SELECT(__i__) +`define SCOPE_SIGNALS_ISSUE_TOP_IO +`define SCOPE_SIGNALS_ISSUE_TOP_BIND +`define SCOPE_SIGNALS_ISSUE_CLUSTER_IO +`define SCOPE_SIGNALS_ISSUE_CLUSTER_BIND +`define SCOPE_SIGNALS_ISSUE_IO +`define SCOPE_SIGNALS_ISSUE_BIND +`define SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_ISSUE_SELECT(__i__) +`define SCOPE_SIGNALS_EXECUTE_TOP_IO +`define SCOPE_SIGNALS_EXECUTE_TOP_BIND +`define SCOPE_SIGNALS_EXECUTE_CLUSTER_IO +`define SCOPE_SIGNALS_EXECUTE_CLUSTER_BIND +`define SCOPE_SIGNALS_EXECUTE_IO +`define SCOPE_SIGNALS_EXECUTE_BIND +`define SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_EXECUTE_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L3_TOP_IO +`define SCOPE_SIGNALS_BANK_L3_TOP_BIND +`define SCOPE_SIGNALS_BANK_L2_TOP_IO +`define SCOPE_SIGNALS_BANK_L2_TOP_BIND +`define SCOPE_SIGNALS_BANK_L1D_TOP_IO +`define SCOPE_SIGNALS_BANK_L1D_TOP_BIND +`define SCOPE_SIGNALS_BANK_L1I_TOP_IO +`define SCOPE_SIGNALS_BANK_L1I_TOP_BIND +`define SCOPE_SIGNALS_BANK_L1S_TOP_IO +`define SCOPE_SIGNALS_BANK_L1S_TOP_BIND +`define SCOPE_SIGNALS_BANK_L2_CLUSTER_IO +`define SCOPE_SIGNALS_BANK_L2_CLUSTER_BIND +`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO +`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_BIND +`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO +`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_BIND +`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO +`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_BIND +`define SCOPE_SIGNALS_BANK_L1D_CORE_IO +`define SCOPE_SIGNALS_BANK_L1D_CORE_BIND +`define SCOPE_SIGNALS_BANK_L1I_CORE_IO +`define SCOPE_SIGNALS_BANK_L1I_CORE_BIND +`define SCOPE_SIGNALS_BANK_L1S_CORE_IO +`define SCOPE_SIGNALS_BANK_L1S_CORE_BIND +`define SCOPE_SIGNALS_BANK_CACHE_IO +`define SCOPE_SIGNALS_BANK_CACHE_BIND +`define SCOPE_SIGNALS_BANK_IO +`define SCOPE_SIGNALS_BANK_BIND +`define SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(__i__) +`define SCOPE_SIGNALS_BANK_L3_CACHE_BIND +`define SCOPE_SIGNALS_BANK_L2_CACHE_BIND +`define SCOPE_SIGNALS_BANK_L1D_CACHE_BIND +`define SCOPE_SIGNALS_BANK_L1I_CACHE_BIND +`define SCOPE_SIGNALS_BANK_L1S_CACHE_BIND +`define SCOPE_SIGNALS_BANK_SELECT(__i__) +`define SCOPE_ASSIGN(d,s) + `endif - -// VX_SCOPE `endif \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index aa328828..82f36353 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -1,11 +1,15 @@ `include "VX_define.vh" module Vortex ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_CACHE_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_SIGNALS_ISTAGE_TOP_IO + `SCOPE_SIGNALS_LSU_TOP_IO + `SCOPE_SIGNALS_BANK_L3_TOP_IO + `SCOPE_SIGNALS_BANK_L2_TOP_IO + `SCOPE_SIGNALS_BANK_L1D_TOP_IO + `SCOPE_SIGNALS_BANK_L1I_TOP_IO + `SCOPE_SIGNALS_BANK_L1S_TOP_IO + `SCOPE_SIGNALS_ISSUE_TOP_IO + `SCOPE_SIGNALS_EXECUTE_TOP_IO // Clock input wire clk, @@ -75,11 +79,14 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(0) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(0) + `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(0) .clk (clk), .reset (reset), @@ -193,11 +200,14 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(i) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_CACHE_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(i) + `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(i) .clk (clk), .reset (reset), @@ -384,7 +394,7 @@ module Vortex ( assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); VX_cache #( - .CACHE_ID (0), + .CACHE_ID (`L3CACHE_ID), .CACHE_SIZE (`L3CACHE_SIZE), .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), .NUM_BANKS (`L3NUM_BANKS), @@ -407,7 +417,7 @@ module Vortex ( .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) l3cache ( - `SCOPE_SIGNALS_CACHE_UNBIND + `SCOPE_SIGNALS_BANK_L3_CACHE_BIND .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 21125640..e953b651 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -50,7 +50,7 @@ module VX_bank #( // Snooping request tag width parameter SNP_REQ_TAG_WIDTH = 0 ) ( - `SCOPE_SIGNALS_CACHE_IO + `SCOPE_SIGNALS_BANK_IO input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index b2dfe0e5..c3189499 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -59,7 +59,7 @@ module VX_cache #( // Snooping forward tag width parameter SNP_FWD_TAG_WIDTH = 1 ) ( - `SCOPE_SIGNALS_CACHE_IO + `SCOPE_SIGNALS_BANK_CACHE_IO input wire clk, input wire reset, @@ -162,8 +162,6 @@ module VX_cache #( wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag; wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready; - `SCOPE_SIGNALS_CACHE_BANK_SELECT - wire snp_req_valid_qual; wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual; wire snp_req_invalidate_qual; @@ -367,7 +365,7 @@ module VX_cache #( .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) bank ( - `SCOPE_SIGNALS_CACHE_BANK_BIND + `SCOPE_SIGNALS_BANK_SELECT(i) .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 1237c1c6..b8fe1d46 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -2,7 +2,6 @@ `define VX_CACHE_CONFIG `include "VX_platform.vh" -`include "VX_scope.vh" `ifdef DBG_CORE_REQ_INFO `include "VX_define.vh" diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 61c4bcfb..6ed5b02c 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -66,7 +66,7 @@ module VX_cache_miss_resrv #( reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size; - `STATIC_ASSERT(MRVQ_SIZE > 5, "invalid size") + `STATIC_ASSERT(MRVQ_SIZE > 5, ("invalid size")) assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index d1a7433a..bffc4679 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -37,7 +37,7 @@ module VX_snp_forwarder #( input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag, output wire [NUM_REQUESTS-1:0] snp_fwdin_ready ); - `STATIC_ASSERT(NUM_REQUESTS > 1, "invalid value") + `STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value")) reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 4164fdad..6dab29f9 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -17,7 +17,7 @@ module VX_generic_queue #( output wire full, output wire [SIZEW-1:0] size ); - `STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!") + `STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!")) reg [SIZEW-1:0] size_r; wire reading; diff --git a/hw/scripts/gen_synth_configs.py b/hw/scripts/gen_synth_configs.py deleted file mode 100755 index 0b42d88a..00000000 --- a/hw/scripts/gen_synth_configs.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -import os -import glob - -config_location = 'configs' - -name_template = '{clusters}cl-{cores}c-{warps}w-{threads}t-{l2}Kl2-{dcache}Kd-{icache}Ki{name_suffix}.sh' - -template = """ - -export V_NT={threads} -export V_NW={warps} -export V_NUM_CORES_PER_CLUSTER={cores} -export V_NUM_CLUSTERS={clusters} -export V_DCACHE_SIZE_BYTES={dcachek} -export V_ICACHE_SIZE_BYTES={icachek} - -# L2 Cache size -export V_L2CACHE_SIZE_BYTES={l2k} - -{codegen} - -""" - -# cluster, cores, warps, threads, l2, dcache, icache -configs = [ - (1, 2, 8, 4, 8, 4, 1), - (1, 2, 8, 8, 8, 4, 1), - (1, 2, 8, 8, 16, 8, 1), - - (1, 4, 8, 8, 16, 4, 1), - (1, 4, 8, 8, 16, 8, 1), - (1, 4, 16, 8, 16, 8, 1), - - (2, 4, 8, 4, 8, 4, 1), - (2, 4, 8, 8, 16, 8, 1), -] - -files = glob.glob(config_location + '/*.sh') -for f in files: - os.remove(f) - -for clusters, cores, warps, threads, l2, dcache, icache in configs: - l2k, dcachek, icachek = 1024 * l2, 1024 * dcache, 1024 * icache - name_suffix = '' - with open(config_location + '/' + name_template.format(**locals()), 'w') as f: - codegen = '' - f.write(template.format(**locals())) diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json new file mode 100644 index 00000000..9be503bb --- /dev/null +++ b/hw/scripts/scope.json @@ -0,0 +1,161 @@ +{ + "version": 1, + "includes":[ + "../rtl/VX_config.vh", + "../rtl/VX_platform.vh", + "../rtl/VX_define.vh", + "../rtl/cache/VX_cache_config.vh" + ], + "parameters": { + "L3_ENABLE": "`L3_ENABLE", + "L2_ENABLE": "`L2_ENABLE", + "NUM_CLUSTERS": "`NUM_CLUSTERS", + "NUM_CORES": "`NUM_CORES", + "DNUM_BANKS": "`DNUM_BANKS", + "INUM_BANKS": "`INUM_BANKS", + "SNUM_BANKS": "`SNUM_BANKS", + "L2NUM_BANKS": "`L2NUM_BANKS", + "L3NUM_BANKS": "`L3NUM_BANKS" + }, + "taps": { + "top::SCOPE_SIGNALS_AFU": { + "!scope_dram_req_valid": 1, + "scope_dram_req_addr": 32, + "scope_dram_req_rw": 1, + "scope_dram_req_byteen": "`VX_DRAM_BYTEEN_WIDTH", + "scope_dram_req_data": "`VX_DRAM_LINE_WIDTH", + "scope_dram_req_tag": "`VX_DRAM_TAG_WIDTH", + "!scope_dram_req_ready": 1, + "!scope_dram_rsp_valid": 1, + "scope_dram_rsp_data": 128, + "scope_dram_rsp_tag": "`VX_DRAM_TAG_WIDTH", + "!scope_dram_rsp_ready": 1, + "!scope_snp_req_valid": 1, + "scope_snp_req_addr": 32, + "scope_snp_req_invalidate": 1, + "scope_snp_req_tag": "`VX_SNP_TAG_WIDTH", + "!scope_snp_req_ready": 1, + "!scope_snp_rsp_valid": 1, + "scope_snp_rsp_tag": "`VX_SNP_TAG_WIDTH", + "!scope_snp_rsp_ready": 1, + "scope_busy": 1 + }, + "core::SCOPE_SIGNALS_ISTAGE": { + "!scope_icache_req_valid": 1, + "scope_icache_req_wid": "`NW_BITS", + "scope_icache_req_addr": 32, + "scope_icache_req_tag": "`ICORE_TAG_ID_BITS", + "!scope_icache_req_ready": 1, + "!scope_icache_rsp_valid": 1, + "scope_icache_rsp_data": 32, + "scope_icache_rsp_tag": "`ICORE_TAG_ID_BITS", + "!scope_icache_rsp_ready": 1 + }, + "core::SCOPE_SIGNALS_LSU": { + "!scope_dcache_req_valid": "`NUM_THREADS", + "scope_dcache_req_wid": "`NW_BITS", + "scope_dcache_req_pc": 32, + "scope_dcache_req_addr": "`NUM_THREADS * 32", + "scope_dcache_req_rw": 1, + "scope_dcache_req_byteen": "`NUM_THREADS * 4", + "scope_dcache_req_data": "`NUM_THREADS * 32", + "scope_dcache_req_tag": "`DCORE_TAG_ID_BITS", + "!scope_dcache_req_ready": 1, + "!scope_dcache_rsp_valid": "`NUM_THREADS", + "scope_dcache_rsp_data": "`NUM_THREADS * 32", + "scope_dcache_rsp_tag": "`DCORE_TAG_ID_BITS", + "!scope_dcache_rsp_ready": 1 + }, + "core::SCOPE_SIGNALS_ISSUE": { + "!scope_issue_valid": 1, + "scope_issue_wid": "`NW_BITS", + "scope_issue_tmask": "`NUM_THREADS", + "scope_issue_pc": 32, + "scope_issue_ex_type": "`EX_BITS", + "scope_issue_op_type": "`OP_BITS", + "scope_issue_op_mod": "`MOD_BITS", + "scope_issue_wb": 1, + "scope_issue_rd": "`NR_BITS", + "scope_issue_rs1": "`NR_BITS", + "scope_issue_rs2": "`NR_BITS", + "scope_issue_rs3": "`NR_BITS", + "scope_issue_imm": 32, + "scope_issue_rs1_is_pc": 1, + "scope_issue_rs2_is_imm": 1, + "!scope_issue_ready": 1, + "!scope_gpr_rsp_valid": 1, + "scope_gpr_rsp_wid": "`NW_BITS", + "scope_gpr_rsp_pc": 32, + "scope_gpr_rsp_a": "`NUM_THREADS * 32", + "scope_gpr_rsp_b": "`NUM_THREADS * 32", + "scope_gpr_rsp_c": "`NUM_THREADS * 32", + "!scope_gpr_delay": 1, + "!scope_writeback_valid": 1, + "scope_writeback_wid": "`NW_BITS", + "scope_writeback_pc": 32, + "scope_writeback_rd": "`NR_BITS", + "scope_writeback_data": "`NUM_THREADS * 32", + "!scope_scoreboard_delay": 1, + "!scope_execute_delay": 1 + }, + "core::SCOPE_SIGNALS_EXECUTE": {}, + "bank::SCOPE_SIGNALS_BANK": { + "!scope_bank_valid_st0": 1, + "!scope_bank_valid_st1": 1, + "!scope_bank_valid_st2": 1, + "scope_bank_addr_st0": 32, + "scope_bank_addr_st1": 32, + "scope_bank_addr_st2": 32, + "scope_bank_is_mrvq_st1": 1, + "scope_bank_miss_st1": 1, + "scope_bank_dirty_st1": 1, + "!scope_bank_force_miss_st1": 1, + "!scope_bank_stall_pipe": 1 + } + }, + "triggers": [ + ["scope_dram_req_valid", "scope_dram_req_ready"], + ["scope_dram_rsp_valid", "scope_dram_rsp_ready"], + ["scope_snp_req_valid", "scope_snp_req_ready"], + ["scope_snp_rsp_valid", "scope_snp_rsp_ready"], + + ["scope_icache_req_valid_top", "scope_icache_req_ready_top"], + ["scope_icache_rsp_valid_top", "scope_icache_rsp_ready_top"], + ["scope_dcache_req_valid_top", "scope_dcache_req_ready_top"], + ["scope_dcache_rsp_valid_top", "scope_dcache_rsp_ready_top"], + + ["scope_bank_valid_st0_l3_top"], + ["scope_bank_valid_st1_l3_top"], + ["scope_bank_valid_st2_l3_top"], + ["scope_bank_stall_pipe_l3_top"], + + ["scope_bank_valid_st0_l2_top"], + ["scope_bank_valid_st1_l2_top"], + ["scope_bank_valid_st2_l2_top"], + ["scope_bank_stall_pipe_l2_top"], + + ["scope_bank_valid_st0_l1d_top"], + ["scope_bank_valid_st1_l1d_top"], + ["scope_bank_valid_st2_l1d_top"], + ["scope_bank_stall_pipe_l1d_top"], + + ["scope_bank_valid_st0_l1i_top"], + ["scope_bank_valid_st1_l1i_top"], + ["scope_bank_valid_st2_l1i_top"], + ["scope_bank_stall_pipe_l1i_top"], + + ["scope_bank_valid_st0_l1s_top"], + ["scope_bank_valid_st1_l1s_top"], + ["scope_bank_valid_st2_l1s_top"], + ["scope_bank_stall_pipe_l1s_top"], + + ["scope_issue_valid_top", "scope_issue_ready_top"], + ["scope_gpr_rsp_valid_top"], + ["scope_scoreboard_delay_top"], + ["scope_gpr_delay_top"], + ["scope_execute_delay_top"], + + ["scope_busy"] + ] + } + \ No newline at end of file diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py new file mode 100755 index 00000000..fcac3d34 --- /dev/null +++ b/hw/scripts/scope.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python3 +import os +import re +import json +import argparse +import math + +vl_include_re = re.compile(r"^\s*`include\s+\"(.+)\"") +vl_define_re = re.compile(r"^\s*`define\s+(\w+)(\([\w\s,]*\))?(.*)") +vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$") +vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$") +vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)") + +parameters = [] +exclude_files = [] +include_dirs = [] +macros = [] +br_stack = [] + +def parse_func_args(text): + args = [] + arg = '' + l = len(text) + if text[0] != '(': + raise Exception("missing leading parenthesis: " + text) + paren = 1 + for i in range(1, l): + c = text[i] + + if c == '(': + paren += 1 + elif c == ')': + if paren == 0: + raise Exception("mismatched parenthesis: (" + i + ") " + text) + paren -= 1 + if paren == 0: + l = i + break + + if c == ',' and paren == 1: + if arg.strip(): + args.append(arg) + arg = '' + else: + arg += c + + if paren != 0: + raise Exception("missing closing parenthesis: " + text) + + if arg.strip(): + args.append(arg) + + return (args, l) + +def resolve_include_path(filename, parent_dir): + if os.path.basename(filename) in exclude_files: + return None + if os.path.isfile(filename): + return os.path.abspath(filename) + search_dirs = include_dirs + if parent_dir: + search_dirs.append(parent_dir) + for dir in search_dirs: + filepath = os.path.join(dir, filename) + if os.path.isfile(filepath): + return os.path.abspath(filepath) + raise Exception("couldn't find include file: " + filename) + +def remove_comments(text): + text = re.sub(re.compile("/\*.*?\*/",re.DOTALL ), "", text) # multiline + text = re.sub(re.compile("//.*?\n" ), "\n", text) # singleline + return text + +def add_macro(name, args, value): + macro = (name, args, value) + macros.append(macro) + if not args is None: + print("*** token: " + name + "(", end='') + for i in range(len(args)): + if i > 0: + print(', ', end='') + print(args[i], end='') + print(")=" + value) + else: + print("*** token: " + name + "=" + value) + +def find_macro(name): + for macro in macros: + if macro[0] == name: + return macro + return None + +def expand_text(text): + + class DoRepl(object): + def __init__(self): + self.expanded = False + self.has_func = False + def __call__(self, match): + name = match.group(1) + macro = find_macro(name) + if macro: + if not macro[1] is None: + self.has_func = True + else: + self.expanded = True + return macro[2] + return "`" + name + + class DoRepl2(object): + def __init__(self, args, f_args): + map = {} + for i in range(len(args)): + map[args[i]] = f_args[i] + self.map = map + def __call__(self, match): + for key in match.groups(): + return self.map[key] + return group + + def repl_func_macro(text): + expanded = False + match = re.search(vl_expand_re, text) + if match: + name = match.group(1) + macro = find_macro(name) + if macro: + args = macro[1] + value = macro[2] + if not args is None: + str_args = text[match.end():].strip() + f_args = parse_func_args(str_args) + if len(args) == 0: + if len(f_args[0]) != 0: + raise Exception("invalid argments for macro '" + name + "': value=" + text) + else: + if len(args) != len(f_args[0]): + raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args)) + + pattern = "(? 0: + pattern += "|" + pattern += args[i] + pattern += ")(?![0-9a-zA-Z_])" + + dorepl = DoRepl2(args, f_args[0]) + value = re.sub(pattern, dorepl, value) + + str_head = text[0:match.start()] + str_tail = text[match.end() + f_args[1]+1:] + text = str_head + value + str_tail + expanded = True + if expanded: + return text + return None + + changed = False + iter = 0 + + while True: + if iter > 99: + raise Exception("Macro recursion!") + has_func = False + while True: + do_repl = DoRepl() + new_text = re.sub(vl_expand_re, do_repl, text) + has_func = do_repl.has_func + if not do_repl.expanded: + break + text = new_text + changed = True + if not has_func: + break + expanded = repl_func_macro(text) + if not expanded: + break + text = expanded + changed = True + iter += 1 + + if changed: + return text + return None + +def parse_include(filename, nesting): + if nesting > 99: + raise Exception("include recursion!") + print("*** parsing '" + filename + "'...") + content = None + with open(filename, "r") as f: + content = f.read() + # remove comments + content = remove_comments(content) + # parse content + prev_line = None + for line in content.splitlines(False): + # skip empty lines + if re.match(re.compile(r'^\s*$'), line): + continue + # merge multi-line lines + if line.endswith('\\'): + if prev_line: + prev_line += line[:len(line) - 1] + else: + prev_line = line[:len(line) - 1] + continue + if prev_line: + line = prev_line + line + prev_line = None + # parse ifdef + m = re.match(vl_ifdef_re, line) + if m: + key = m.group(1) + cond = m.group(2) + taken = find_macro(cond) is not None + if key == 'ifndef': + taken = not taken + elif key == '"elsif': + br_stack.pop() + br_stack.append(taken) + print("*** " + key + "(" + cond + ") => " + str(taken)) + continue + # parse endif + m = re.match(vl_endif_re, line) + if m: + key = m.group(1) + top = br_stack.pop() + if key == 'else': + br_stack.append(not top) + print("*** " + key) + continue + # skip disabled blocks + if not all(br_stack): + continue + + # parse include + m = re.match(vl_include_re, line) + if m: + include = m.group(1) + include = resolve_include_path(include, os.path.dirname(filename)) + if include: + parse_include(include, nesting + 1) + continue + # parse define + m = re.match(vl_define_re, line) + if m: + name = m.group(1) + args = m.group(2) + if args: + args = args[1:len(args)-1].strip() + if args != '': + args = args.split(',') + for i in range(len(args)): + args[i] = args[i].strip() + else: + args = [] + value = m.group(3) + add_macro(name, args, value.strip()) + continue + +def parse_includes(includes): + # change current directory to include directory + old_dir = os.getcwd() + script_dir = os.path.dirname(os.path.realpath(__file__)) + os.chdir(script_dir) + + for include in includes: + parse_include(include, 0) + + # restore current directory + os.chdir(old_dir) + +def load_include_dirs(dirs): + for dir in dirs: + print("*** include dir: " + dir) + include_dirs.append(dir) + +def load_defines(defines): + for define in defines: + key_value = define.split('=', 2) + name = key_value[0] + value = '' + if len(key_value) == 2: + value = key_value[1] + add_macro(name, None, value) + +def load_config(filename): + with open(filename, "r") as f: + config = json.load(f) + print("condfig=", config) + return config + +def gen_cc_header(file, ports): + + header = ''' +#pragma once\n +struct scope_signal_t { + int width; + const char* name; +};\n +inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n +static constexpr scope_signal_t scope_signals[] = {''' + + footer = "};" + + def eval_macro(text): + expanded = expand_text(text) + if expanded: + text = expanded + text = text.replace('$clog2', '__clog2') + return text + + def asize_name(asize): + def Q(arr, ss, asize, idx, N): + for i in range(asize[idx]): + tmp = ss + "_" + str(i) + if (idx + 1) < N: + Q(arr, tmp, asize, idx + 1, N) + else: + arr.append(tmp) + + l = len(asize) + if l == 0: + return [""] + arr = [] + Q(arr, "", asize, 0, l) + return arr + + with open(file, 'w') as f: + print(header, file=f) + i = 0 + for port in ports: + name = port[0] + size = eval_macro(str(port[1])) + for ss in asize_name(port[2]): + if i > 0: + print(",", file=f) + print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') + i += 1 + print("", file=f) + print(footer, file=f) + +def gen_vl_header(file, taps, triggers): + + header = ''' +`ifndef VX_SCOPE_DEFS +`define VX_SCOPE_DEFS +''' + footer = '`endif' + + def signal_size(size, asize): + str_asize = "" + for s in asize: + if type(s) == int: + str_asize += "[" + str(s-1) + ":0]" + else: + str_asize += "[" + str(s) + "-1:0]" + + if type(size) == int: + size1 = (size-1) + if size1 != 0: + return str_asize + "[" + str(size1) + ":0]" + else: + return str_asize + else: + return str_asize + "[(" + size + ")-1:0]" + + def generate_ports(tclass, tap, ports, new_taps): + + def emit_io(tap, ports, prefix, asize, return_list, new_taps, is_enabled): + stap = tap + "_IO" + new_taps.append(stap) + print("`define " + stap + " \\", file=f) + if is_enabled: + for key in ports: + size = ports[key] + name = key + is_trigger = False + if name[0] == '!': + name = name[1:] + is_trigger = True + if not return_list is None: + return_list.append((name + prefix, size, asize, is_trigger)) + print("\toutput wire" + signal_size(size, asize) + " " + name + prefix + ", \\", file=f) + print("", file=f) + emit_bind(tap, ports, prefix, prefix, new_taps, is_enabled) + + def emit_bind(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): + stap = tap + "_BIND" + new_taps.append(stap) + print("`define " + stap + " \\", file=f) + for key in ports: + name = key + if name[0] == '!': + name = name[1:] + if is_enabled: + print("\t." + name + to_prefix + " (" + name + from_prefix + "), \\", file=f) + else: + if (from_prefix != to_prefix): + print("\t`UNUSED_PIN (" + name + to_prefix + "), \\", file=f) + print("", file=f) + + def emit_select(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): + stap = tap + "_SELECT(__i__)" + new_taps.append(stap) + print("`define " + stap + " \\", file=f) + if is_enabled: + for key in ports: + name = key + if name[0] == '!': + name = name[1:] + print("\t." + name + to_prefix + " (" + name + from_prefix + "[__i__]), \\", file=f) + print("", file=f) + + def do_top(tap, ports, new_taps): + out_ports = [] + for p in ports: + name = p + is_trigger = False + if name[0] == '!': + name = name[1:] + is_trigger = True + out_ports.append((name, ports[p], [], is_trigger)) + return out_ports + + def do_core(tap, ports, new_taps): + out_ports = [] + nclusters = parameters["NUM_CLUSTERS"] + ncores = parameters["NUM_CORES"] + emit_io(tap + "_TOP", ports, "_top", [nclusters, ncores], out_ports, new_taps, True) + emit_io(tap + "_CLUSTER", ports, "_cluster", [ncores], None, new_taps, True) + emit_io(tap + "", ports, "", [], None, new_taps, True) + emit_select(tap + "_CLUSTER", ports, "_top", "_cluster", new_taps, True) + emit_select(tap + "", ports, "_cluster", "", new_taps, True) + return out_ports + + def do_bank(tap, ports, new_taps): + out_ports = [] + + nclusters = parameters["NUM_CLUSTERS"] + ncores = parameters["NUM_CORES"] + has_l3 = (parameters["L3_ENABLE"] != 0) + has_l2 = (parameters["L2_ENABLE"] != 0) + + emit_io(tap + "_L3_TOP", ports, "_l3_cache", [parameters["L3NUM_BANKS"]], out_ports, new_taps, has_l3) + emit_io(tap + "_L2_TOP", ports, "_l2_top", [nclusters, parameters["L2NUM_BANKS"]], out_ports, new_taps, has_l2) + emit_io(tap + "_L1D_TOP", ports, "_l1d_top", [nclusters, ncores, parameters["DNUM_BANKS"]], out_ports, new_taps, True) + emit_io(tap + "_L1I_TOP", ports, "_l1i_top", [nclusters, ncores, parameters["INUM_BANKS"]], out_ports, new_taps, True) + emit_io(tap + "_L1S_TOP", ports, "_l1s_top", [nclusters, ncores, parameters["SNUM_BANKS"]], out_ports, new_taps, True) + + emit_io(tap + "_L2_CLUSTER", ports, "_l2_cache", [parameters["L2NUM_BANKS"]], None, new_taps, has_l2) + emit_io(tap + "_L1D_CLUSTER", ports, "_l1d_cluster", [ncores, parameters["DNUM_BANKS"]], None, new_taps, True) + emit_io(tap + "_L1I_CLUSTER", ports, "_l1i_cluster", [ncores, parameters["INUM_BANKS"]], None, new_taps, True) + emit_io(tap + "_L1S_CLUSTER", ports, "_l1s_cluster", [ncores, parameters["SNUM_BANKS"]], None, new_taps, True) + + emit_io(tap + "_L1D_CORE", ports, "_l1d_cache", [parameters["DNUM_BANKS"]], None, new_taps, True) + emit_io(tap + "_L1I_CORE", ports, "_l1i_cache", [parameters["INUM_BANKS"]], None, new_taps, True) + emit_io(tap + "_L1S_CORE", ports, "_l1s_cache", [parameters["SNUM_BANKS"]], None, new_taps, True) + + emit_io(tap + "_CACHE", ports, "_cache", ["NUM_BANKS"], None, new_taps, True) + emit_io(tap + "", ports, "", [], None, new_taps, True) + + emit_select(tap + "_L2_CLUSTER", ports, "_l2_top", "_l2_cache", new_taps, has_l2) + emit_select(tap + "_L1D_CLUSTER", ports, "_l1d_top", "_l1d_cluster", new_taps, True) + emit_select(tap + "_L1I_CLUSTER", ports, "_l1i_top", "_l1i_cluster", new_taps, True) + emit_select(tap + "_L1S_CLUSTER", ports, "_l1s_top", "_l1s_cluster", new_taps, True) + + emit_select(tap + "_L1D_CORE", ports, "_l1d_cluster", "_l1d_cache", new_taps, True) + emit_select(tap + "_L1I_CORE", ports, "_l1i_cluster", "_l1i_cache", new_taps, True) + emit_select(tap + "_L1S_CORE", ports, "_l1s_cluster", "_l1s_cache", new_taps, True) + + emit_bind(tap + "_L3_CACHE", ports, "_l3_cache", "_cache", new_taps, has_l3) + emit_bind(tap + "_L2_CACHE", ports, "_l2_cache", "_cache", new_taps, has_l2) + emit_bind(tap + "_L1D_CACHE", ports, "_l1d_cache", "_cache", new_taps, True) + emit_bind(tap + "_L1I_CACHE", ports, "_l1i_cache", "_cache", new_taps, True) + emit_bind(tap + "_L1S_CACHE", ports, "_l1s_cache", "_cache", new_taps, True) + + emit_select(tap + "", ports, "_cache", "", new_taps, True) + + return out_ports + + callbacks = { + "top": do_top, + "core": do_core, + "bank": do_bank + } + + return callbacks[tclass](tap, ports, new_taps) + + def trigger_size(name, ports): + for port in ports: + if port[0] == name: + return (port[1], port[2]) + return None + + def trigger_prefices(asize): + def Q(arr, ss, asize, idx, N): + for i in range(asize[idx]): + tmp = ss + '[' + str(i) + ']' + if (idx + 1) < N: + Q(arr, tmp, asize, idx + 1, N) + else: + arr.append(tmp) + + l = len(asize) + if l == 0: + return [""] + arr = [] + Q(arr, "", asize, 0, l) + return arr + + def trigger_name(name, size): + if type(size) == int: + size1 = (size-1) + if size1 != 0: + return "(| " + name + ")" + else: + return name + else: + return "(| " + name + ")" + + with open(file, 'w') as f: + print(header, file=f) + + all_ports = [] + new_taps = [] + + for key in taps: + [tclass, tap] = key.split('::') + ports = generate_ports(tclass, tap, taps[key], new_taps) + for port in ports: + all_ports.append(port) + + print("`define SCOPE_SIGNALS_DECL \\", file=f) + i = 0 + for port in all_ports: + if i > 0: + print(" \\", file=f) + print("\twire" + signal_size(port[1], port[2]) + " " + port[0] + ";", file=f, end='') + i += 1 + print("", file=f) + print("", file=f) + + print("`define SCOPE_SIGNALS_DATA_LIST \\", file=f) + i = 0 + for port in all_ports: + if port[3]: + continue + if i > 0: + print(", \\", file=f) + print("\t" + port[0], file=f, end='') + i += 1 + print("", file=f) + print("", file=f) + + print("`define SCOPE_SIGNALS_UPD_LIST \\", file=f) + i = 0 + for port in all_ports: + if not port[3]: + continue + if i > 0: + print(", \\", file=f) + print("\t" + port[0], file=f, end='') + i += 1 + print("", file=f) + print("", file=f) + + print("`define SCOPE_TRIGGERS \\", file=f) + i = 0 + for trigger in triggers: + arr = trigger_size(trigger[0], all_ports) + if arr is None: + continue + [size, asize] = arr + for prefix in trigger_prefices(asize): + if i > 0: + print(" | \\", file=f) + print("\t(", file=f, end='') + for j in range(len(trigger)): + if j > 0: + print(" && ", file=f, end='') + print(trigger_name(trigger[j] + prefix, size), file=f, end='') + print(")", file=f, end='') + i += 1 + print("", file=f) + print("", file=f) + + print(footer, file=f) + + return all_ports + +def main(): + parser = argparse.ArgumentParser(description='Scope headers generator.') + parser.add_argument('-vl', nargs='?', default='scope-defs.vh', metavar='file', help='Output Verilog header') + parser.add_argument('-cc', nargs='?', default='scope-defs.h', metavar='file', help='Output C++ header') + parser.add_argument('-D', nargs='?', action='append', metavar='macro[=value]', help='define macro') + parser.add_argument('-I', nargs='?', action='append', metavar='', help='include directory') + parser.add_argument('config', help='Json config file') + args = parser.parse_args() + print("args=", args) + + global parameters + global exclude_files + global include_dirs + global macros + global br_stack + + if args.I: + load_include_dirs(args.I) + + if args.D: + load_defines(args.D) + + config = load_config(args.config) + + exclude_files.append(os.path.basename(args.vl)) + + if "includes" in config: + parse_includes(config["includes"]) + + parameters = config["parameters"] + for key in parameters: + parameters[key] = int(eval(expand_text(str(parameters[key])))) + + ports = gen_vl_header(args.vl, config["taps"], config["triggers"]) + gen_cc_header(args.cc, ports) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 2b78f017..20e7e85b 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -13,6 +13,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE +DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO From d2ab8d3cc6aa73f191a6e5b6c67a504f0d0cf28a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carter=20Ren=C3=A9=20Montgomery?= Date: Mon, 5 Oct 2020 14:49:47 -0400 Subject: [PATCH 03/19] Added comments to prep for cache presentation --- hw/rtl/cache/VX_bank.v | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 21125640..05f58ebe 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -263,7 +263,8 @@ module VX_bank #( `DEBUG_BEGIN wire going_to_write_st1; `DEBUG_END - + + //determines if the if it is time to pop a req from the queues wire mrvq_pop_unqual = mrvq_valid_st0; wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty; wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1; @@ -297,15 +298,19 @@ module VX_bank #( wire snp_invalidate_st1; wire is_mrvq_st1; - assign qual_is_fill_st0 = dfpq_pop_unqual; + //why is the signal prefixed with qual? + assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request - assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; + assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped + //decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : mrvq_pop_unqual ? mrvq_addr_st0 : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; + + //Word select does ? Does this just pick a specific word from the line instead of the whole line? if (`WORD_SELECT_WIDTH != 0) begin assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] : mrvq_pop_unqual ? mrvq_wsel_st0 : @@ -315,8 +320,10 @@ module VX_bank #( assign qual_wsel_st0 = 0; end + //if you are filling from dram then that is the write data? What about core? What is 57? assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57; + // assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} : reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} : snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : @@ -327,6 +334,7 @@ module VX_bank #( (reqq_pop_unqual && reqq_req_rw_st0) ? 1 : 0; + //snp signals check to see if the miss reserve as a snp in it first. assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 : snrq_pop_unqual ? 1 : 0; From 309dd48fc680b6baa6c375004ec55b5868406333 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Oct 2020 03:59:27 -0400 Subject: [PATCH 04/19] scope bug fixes --- driver/opae/vlsim/Makefile | 10 ++-- driver/opae/vx_scope.cpp | 114 +++++++++++++++++++++---------------- hw/opae/vortex_afu.sv | 18 ++++-- hw/rtl/VX_platform.vh | 6 +- hw/rtl/VX_scope.vh | 5 +- hw/rtl/libs/VX_scope.v | 56 +++++++++++------- hw/scripts/scope.json | 37 +----------- hw/scripts/scope.py | 112 ++++++++++++++++++++---------------- 8 files changed, 191 insertions(+), 167 deletions(-) diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index dba311b1..947e5698 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,11 +20,11 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 DEBUG=1 -#SCOPE=1 +SCOPE=1 CFLAGS += -fPIC @@ -45,7 +45,7 @@ SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) -VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS) +VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS) VL_FLAGS += -Wno-DECLFILENAME VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += verilator.vlt @@ -88,4 +88,4 @@ $(PROJECT): $(SRCS) $(SCOPE_CFG) OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk clean: - rm -rf $(PROJECT) obj_dir + rm -rf $(PROJECT) obj_dir ../scope-defs.h ../../../hw/rtl/scope-defs.vh diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index 7fb29c9e..dca8311f 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -31,6 +31,14 @@ #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) +#define CMD_GET_VALID 0 +#define CMD_GET_DATA 1 +#define CMD_GET_WIDTH 2 +#define CMD_GET_COUNT 3 +#define CMD_SET_DELAY 4 +#define CMD_SET_STOP 5 +#define CMD_GET_OFFSET 6 + static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); constexpr int calcFrameWidth(int index = 0) { @@ -39,13 +47,24 @@ constexpr int calcFrameWidth(int index = 0) { static constexpr int fwidth = calcFrameWidth(); +uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { + while (delta != 0) { + ofs << '#' << timestamp++ << std::endl; + ofs << "b0 0" << std::endl; + ofs << '#' << timestamp++ << std::endl; + ofs << "b1 0" << std::endl; + --delta; + } + return timestamp; +} + int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) return -1; if (delay != uint64_t(-1)) { // set start delay - uint64_t cmd_delay = ((delay << 3) | 4); + uint64_t cmd_delay = ((delay << 3) | CMD_SET_DELAY); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay)); std::cout << "scope start delay: " << delay << std::endl; } @@ -59,7 +78,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { if (delay != uint64_t(-1)) { // stop recording - uint64_t cmd_stop = ((delay << 3) | 5); + uint64_t cmd_stop = ((delay << 3) | CMD_SET_STOP); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop)); std::cout << "scope stop delay: " << delay << std::endl; } @@ -68,18 +87,25 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "$version Generated by Vortex Scope $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; + ofs << "$scope module TOP $end" << std::endl; ofs << "$var reg 1 0 clk $end" << std::endl; for (int i = 0; i < num_signals; ++i) { ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; } + ofs << "$upscope $end" << std::endl; ofs << "enddefinitions $end" << std::endl; - - uint64_t frame_width, max_frames, data_valid; + + uint64_t frame_width, max_frames, data_valid, offset, delta; + uint64_t timestamp = 0; + uint64_t frame_offset = 0; + uint64_t frame_no = 0; + int signal_id = 0; + int signal_offset = 0; // wait for recording to terminate - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0)); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID)); do { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid)); if (data_valid) @@ -89,60 +115,45 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { std::cout << "scope trace dump begin..." << std::endl; - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 2)); + // get frame width + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width)); - std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl; - - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 3)); - CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames)); - std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl; - - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1)); + std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl; if (fwidth != (int)frame_width) { std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl; std::abort(); } + + // get max frames + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT)); + CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames)); + std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl; + + // get offset + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET)); + CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset)); + + // get data + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA)); + + // print clock header + CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); + timestamp = print_clock(ofs, offset + delta + 2, timestamp); + signal_id = num_signals; + std::vector signal_data(frame_width+1); - - uint64_t frame_offset = 0; - uint64_t frame_no = 0; - uint64_t timestamp = 0; - int signal_id = 0; - int signal_offset = 0; - - auto print_header = [&] () { - ofs << '#' << timestamp++ << std::endl; - ofs << "b0 0" << std::endl; - ofs << '#' << timestamp++ << std::endl; - ofs << "b1 0" << std::endl; - - uint64_t delta; - auto res = fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta); - assert(res == FPGA_OK); - - while (delta != 0) { - ofs << '#' << timestamp++ << std::endl; - ofs << "b0 0" << std::endl; - ofs << '#' << timestamp++ << std::endl; - ofs << "b1 0" << std::endl; - --delta; - } - - signal_id = num_signals; - }; - - print_header(); do { if (frame_no == (max_frames-1)) { // verify last frame is valid - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0)); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid)); assert(data_valid == 1); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 1)); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA)); } + // read next data words uint64_t word; CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word)); @@ -166,17 +177,24 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { assert(0 == signal_offset); frame_offset = 0; ++frame_no; - if (frame_no != max_frames) { - print_header(); - } + + if (frame_no != max_frames) { + // print clock header + CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); + timestamp = print_clock(ofs, delta + 1, timestamp); + signal_id = num_signals; + //std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + } } + } while ((frame_offset % 64) != 0); + } while (frame_no != max_frames); std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl; // verify data not valid - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, 0)); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID)); CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid)); assert(data_valid == 0); diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 36cf7ca2..ff122e99 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -200,6 +200,10 @@ wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_s wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; +`ifdef SCOPE +reg scope_start; +`endif + always_ff @(posedge clk) begin if (reset) begin @@ -209,14 +213,18 @@ begin cmd_io_addr <= 0; cmd_mem_addr <= 0; cmd_data_size <= 0; + `ifdef SCOPE + scope_start <= 0; + `endif end else begin - mmio_tx.mmioRdValid <= 0; - // serve MMIO write request if (cp2af_sRxPort.c0.mmioWrValid) begin + `ifdef SCOPE + scope_start <= 1; + `endif case (mmio_hdr.address) MMIO_IO_ADDR: begin cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); @@ -1030,7 +1038,7 @@ end `ifdef SCOPE -localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}); +`SCOPE_ASSIGN (scope_reset, vx_reset); `SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid); `SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); @@ -1063,10 +1071,8 @@ localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST wire scope_changed = `SCOPE_TRIGGERS; -wire scope_start = vx_reset; - VX_scope #( - .DATAW (SCOPE_DATAW), + .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST})), .BUSW (64), .SIZE (4096), .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 5c0c4e63..a1818d62 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -22,14 +22,16 @@ /* verilator lint_off WIDTH */ \ /* verilator lint_off UNOPTFLAT */ \ /* verilator lint_off UNDRIVEN */ \ - /* verilator lint_off DECLFILENAME */ + /* verilator lint_off DECLFILENAME */ \ + /* verilator lint_off IMPLICIT */ `define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \ /* verilator lint_on PINCONNECTEMPTY */ \ /* verilator lint_on WIDTH */ \ /* verilator lint_on UNOPTFLAT */ \ /* verilator lint_on UNDRIVEN */ \ - /* verilator lint_on DECLFILENAME */ + /* verilator lint_on DECLFILENAME */ \ + /* verilator lint_on IMPLICIT */ `define UNUSED_VAR(x) always @(x) begin end diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 15b6fb1a..4292bb8f 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -6,7 +6,10 @@ `include "scope-defs.vh" -`define SCOPE_ASSIGN(d,s) assign d = s +`define SCOPE_ASSIGN(d,s) \ + `IGNORE_WARNINGS_BEGIN \ + assign d = s \ + `IGNORE_WARNINGS_END `else diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 2b924776..19f385c3 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -18,7 +18,7 @@ module VX_scope #( input wire bus_write, input wire bus_read ); - localparam DELTA_ENABLE = (UPDW != 0); + localparam UPDW_ENABLE = (UPDW != 0); localparam MAX_DELTA = (2 ** DELTAW) - 1; localparam CMD_GET_VALID = 3'd0; @@ -27,19 +27,21 @@ module VX_scope #( localparam CMD_GET_COUNT = 3'd3; localparam CMD_SET_DELAY = 3'd4; localparam CMD_SET_STOP = 3'd5; - localparam CMD_RESERVED1 = 3'd6; + localparam CMD_GET_OFFSET= 3'd6; localparam CMD_RESERVED2 = 3'd7; - localparam GET_VALID = 2'd0; - localparam GET_DATA = 2'd1; - localparam GET_WIDTH = 2'd2; - localparam GET_COUNT = 2'd3; + localparam GET_VALID = 3'd0; + localparam GET_DATA = 3'd1; + localparam GET_WIDTH = 3'd2; + localparam GET_COUNT = 3'd3; + localparam GET_OFFSET = 3'd6; reg [DATAW-1:0] data_store [SIZE-1:0]; reg [DELTAW-1:0] delta_store [SIZE-1:0]; reg [UPDW-1:0] prev_trigger_id; reg [DELTAW-1:0] delta; reg [BUSW-1:0] bus_out_r; + reg [63:0] timestamp, start_time; reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end; @@ -49,8 +51,7 @@ module VX_scope #( reg [BUSW-3:0] delay_val, delay_cntr; - reg [1:0] out_cmd; - + reg [2:0] get_cmd; wire [2:0] cmd_type; wire [BUSW-4:0] cmd_data; assign {cmd_data, cmd_type} = bus_in; @@ -59,7 +60,7 @@ module VX_scope #( always @(posedge clk) begin if (reset) begin - out_cmd <= $bits(out_cmd)'(CMD_GET_VALID); + get_cmd <= $bits(get_cmd)'(CMD_GET_VALID); raddr <= 0; waddr <= 0; waddr_end <= $bits(waddr)'(SIZE-1); @@ -74,13 +75,18 @@ module VX_scope #( read_offset <= 0; read_delta <= 0; data_valid <= 0; + timestamp <= 0; end else begin + + timestamp <= timestamp + 1; + if (bus_write) begin case (cmd_type) CMD_GET_VALID, CMD_GET_DATA, CMD_GET_WIDTH, - CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type); + CMD_GET_OFFSET, + CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type); CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data); default:; @@ -92,8 +98,10 @@ module VX_scope #( delta_flush <= 1; if (0 == delay_val) begin start_wait <= 0; - recording <= 1; - delay_cntr <= 0; + recording <= 1; + delta <= 0; + delay_cntr <= 0; + start_time <= timestamp; end else begin start_wait <= 1; recording <= 0; @@ -106,16 +114,18 @@ module VX_scope #( if (1 == delay_cntr) begin start_wait <= 0; recording <= 1; + delta <= 0; + start_time <= timestamp; end end if (recording) begin - if (DELTA_ENABLE) begin + if (UPDW_ENABLE) begin if (delta_flush || changed || (trigger_id != prev_trigger_id)) begin - data_store[waddr] <= data_in; delta_store[waddr] <= delta; + data_store[waddr] <= data_in; waddr <= waddr + 1; delta <= 0; delta_flush <= 0; @@ -125,7 +135,8 @@ module VX_scope #( end prev_trigger_id <= trigger_id; end else begin - data_store[waddr] <= data_in; + delta_store[waddr] <= 0; + data_store[waddr] <= data_in; waddr <= waddr + 1; end @@ -134,12 +145,12 @@ module VX_scope #( waddr <= waddr; // keep last address recording <= 0; data_valid <= 1; - read_delta <= DELTA_ENABLE; + read_delta <= 1; end end if (bus_read - && (out_cmd == GET_DATA) + && (get_cmd == GET_DATA) && data_valid) begin if (read_delta) begin read_delta <= 0; @@ -150,14 +161,14 @@ module VX_scope #( end else begin raddr <= raddr + 1; read_offset <= 0; - read_delta <= DELTA_ENABLE; + read_delta <= 1; if (raddr == waddr) begin data_valid <= 0; end end end else begin raddr <= raddr + 1; - read_delta <= DELTA_ENABLE; + read_delta <= 1; if (raddr == waddr) begin data_valid <= 0; end @@ -168,11 +179,14 @@ module VX_scope #( end always @(*) begin - case (out_cmd) + case (get_cmd) GET_VALID : bus_out_r = BUSW'(data_valid); GET_WIDTH : bus_out_r = BUSW'(DATAW); GET_COUNT : bus_out_r = BUSW'(waddr) + BUSW'(1); + GET_OFFSET: bus_out_r = BUSW'(start_time); + /* verilator lint_off WIDTH */ GET_DATA : bus_out_r = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset); + /* verilator lint_on WIDTH */ default : bus_out_r = 0; endcase end @@ -182,7 +196,7 @@ module VX_scope #( `ifdef DBG_PRINT_SCOPE always @(posedge clk) begin if (bus_read) begin - $display("%t: scope-read: cmd=%0d, out=%0h, addr=%0d", $time, out_cmd, bus_out, raddr); + $display("%t: scope-read: cmd=%0d, addr=%0d, value=%0h", $time, get_cmd, raddr, bus_out); end if (bus_write) begin $display("%t: scope-write: cmd=%0d, value=%0d", $time, cmd_type, cmd_data); diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 9be503bb..1001e32c 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -27,7 +27,7 @@ "scope_dram_req_tag": "`VX_DRAM_TAG_WIDTH", "!scope_dram_req_ready": 1, "!scope_dram_rsp_valid": 1, - "scope_dram_rsp_data": 128, + "scope_dram_rsp_data": "`VX_DRAM_LINE_WIDTH", "scope_dram_rsp_tag": "`VX_DRAM_TAG_WIDTH", "!scope_dram_rsp_ready": 1, "!scope_snp_req_valid": 1, @@ -83,7 +83,6 @@ "scope_issue_rs1_is_pc": 1, "scope_issue_rs2_is_imm": 1, "!scope_issue_ready": 1, - "!scope_gpr_rsp_valid": 1, "scope_gpr_rsp_wid": "`NW_BITS", "scope_gpr_rsp_pc": 32, "scope_gpr_rsp_a": "`NUM_THREADS * 32", @@ -121,41 +120,11 @@ ["scope_icache_req_valid_top", "scope_icache_req_ready_top"], ["scope_icache_rsp_valid_top", "scope_icache_rsp_ready_top"], + ["scope_dcache_req_valid_top", "scope_dcache_req_ready_top"], ["scope_dcache_rsp_valid_top", "scope_dcache_rsp_ready_top"], - ["scope_bank_valid_st0_l3_top"], - ["scope_bank_valid_st1_l3_top"], - ["scope_bank_valid_st2_l3_top"], - ["scope_bank_stall_pipe_l3_top"], - - ["scope_bank_valid_st0_l2_top"], - ["scope_bank_valid_st1_l2_top"], - ["scope_bank_valid_st2_l2_top"], - ["scope_bank_stall_pipe_l2_top"], - - ["scope_bank_valid_st0_l1d_top"], - ["scope_bank_valid_st1_l1d_top"], - ["scope_bank_valid_st2_l1d_top"], - ["scope_bank_stall_pipe_l1d_top"], - - ["scope_bank_valid_st0_l1i_top"], - ["scope_bank_valid_st1_l1i_top"], - ["scope_bank_valid_st2_l1i_top"], - ["scope_bank_stall_pipe_l1i_top"], - - ["scope_bank_valid_st0_l1s_top"], - ["scope_bank_valid_st1_l1s_top"], - ["scope_bank_valid_st2_l1s_top"], - ["scope_bank_stall_pipe_l1s_top"], - - ["scope_issue_valid_top", "scope_issue_ready_top"], - ["scope_gpr_rsp_valid_top"], - ["scope_scoreboard_delay_top"], - ["scope_gpr_delay_top"], - ["scope_execute_delay_top"], - - ["scope_busy"] + ["scope_issue_valid_top", "scope_issue_ready_top"] ] } \ No newline at end of file diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index fcac3d34..c360398c 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -291,56 +291,6 @@ def load_config(filename): print("condfig=", config) return config -def gen_cc_header(file, ports): - - header = ''' -#pragma once\n -struct scope_signal_t { - int width; - const char* name; -};\n -inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n -static constexpr scope_signal_t scope_signals[] = {''' - - footer = "};" - - def eval_macro(text): - expanded = expand_text(text) - if expanded: - text = expanded - text = text.replace('$clog2', '__clog2') - return text - - def asize_name(asize): - def Q(arr, ss, asize, idx, N): - for i in range(asize[idx]): - tmp = ss + "_" + str(i) - if (idx + 1) < N: - Q(arr, tmp, asize, idx + 1, N) - else: - arr.append(tmp) - - l = len(asize) - if l == 0: - return [""] - arr = [] - Q(arr, "", asize, 0, l) - return arr - - with open(file, 'w') as f: - print(header, file=f) - i = 0 - for port in ports: - name = port[0] - size = eval_macro(str(port[1])) - for ss in asize_name(port[2]): - if i > 0: - print(",", file=f) - print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') - i += 1 - print("", file=f) - print(footer, file=f) - def gen_vl_header(file, taps, triggers): header = ''' @@ -590,6 +540,68 @@ def gen_vl_header(file, taps, triggers): return all_ports +def gen_cc_header(file, ports): + + header = ''' +#pragma once\n +struct scope_signal_t { + int width; + const char* name; +};\n +inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n +static constexpr scope_signal_t scope_signals[] = {''' + + footer = "};" + + def eval_macro(text): + expanded = expand_text(text) + if expanded: + text = expanded + text = text.replace('$clog2', '__clog2') + return text + + def asize_name(asize): + def Q(arr, ss, asize, idx, N): + for i in range(asize[idx]): + tmp = ss + "_" + str(i) + if (idx + 1) < N: + Q(arr, tmp, asize, idx + 1, N) + else: + arr.append(tmp) + + l = len(asize) + if l == 0: + return [""] + arr = [] + Q(arr, "", asize, 0, l) + return arr + + with open(file, 'w') as f: + print(header, file=f) + i = 0 + for port in ports: + if port[3]: + continue + name = port[0] + size = eval_macro(str(port[1])) + for ss in asize_name(port[2]): + if i > 0: + print(",", file=f) + print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') + i += 1 + for port in ports: + if not port[3]: + continue + name = port[0] + size = eval_macro(str(port[1])) + for ss in asize_name(port[2]): + if i > 0: + print(",", file=f) + print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') + i += 1 + print("", file=f) + print(footer, file=f) + def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') parser.add_argument('-vl', nargs='?', default='scope-defs.vh', metavar='file', help='Output Verilog header') From 1f4af4777cc512713241d11239d56420ef581e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carter=20Ren=C3=A9=20Montgomery?= Date: Tue, 6 Oct 2020 14:35:46 -0400 Subject: [PATCH 05/19] Comments --- hw/rtl/cache/VX_bank.v | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 05f58ebe..f4fc7df5 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -265,6 +265,7 @@ module VX_bank #( `DEBUG_END //determines if the if it is time to pop a req from the queues + //unqual - the req does NOT qualify for execution in the bank. wire mrvq_pop_unqual = mrvq_valid_st0; wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty; wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1; @@ -274,7 +275,8 @@ module VX_bank #( assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe; assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe; assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe; - + + //signals to progress to the next stage wire qual_is_fill_st0; wire qual_valid_st0; wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0; @@ -287,7 +289,8 @@ module VX_bank #( wire qual_going_to_write_st0; wire qual_is_snp_st0; wire qual_snp_invalidate_st0; - + + //signals to be *used* in the next stage wire valid_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st1; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; @@ -298,7 +301,7 @@ module VX_bank #( wire snp_invalidate_st1; wire is_mrvq_st1; - //why is the signal prefixed with qual? + //Determine which req will progress to the next stage assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped @@ -323,12 +326,13 @@ module VX_bank #( //if you are filling from dram then that is the write data? What about core? What is 57? assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57; - // + //note that this is stored even if a DRAM fill is processed assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} : reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} : snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : 0; - + + assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 : (mrvq_pop_unqual && mrvq_rw_st0) ? 1 : (reqq_pop_unqual && reqq_req_rw_st0) ? 1 : @@ -338,15 +342,16 @@ module VX_bank #( assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 : snrq_pop_unqual ? 1 : 0; - + //if we are popping from the miss reserve then assign to the mrvq invalidate. If not and popping from the snoop queue use the snoop invalidate. Else this is 0 assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 : snrq_pop_unqual ? snrq_invalidate_st0 : 0; - + //choose which word of the lien is being written to assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 : reqq_pop_unqual ? reqq_req_writeword_st0 : 0; + assign qual_is_mrvq_st0 = mrvq_pop_unqual; `ifdef DBG_CORE_REQ_INFO From a83048b3bd952447399e0343a7f2c41bf98f6d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carter=20Ren=C3=A9=20Montgomery?= Date: Tue, 6 Oct 2020 14:50:56 -0400 Subject: [PATCH 06/19] Comments --- hw/rtl/cache/VX_tag_data_store.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index e0f356cc..e33fc33f 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -6,7 +6,7 @@ module VX_tag_data_store #( // Size of line inside a bank in bytes parameter BANK_LINE_SIZE = 0, // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + parameter NUM_BANKS = 0, //unused parameter? // Size of a word in bytes parameter WORD_SIZE = 0 ) ( @@ -80,4 +80,4 @@ module VX_tag_data_store #( end end -endmodule \ No newline at end of file +endmodule From 32da50816f658827c6dd36c9c6dec96c0f7234bf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Oct 2020 23:26:02 -0400 Subject: [PATCH 07/19] scope refactoring: adding modules definitions to VCD trace --- driver/opae/Makefile | 18 +- driver/opae/vlsim/Makefile | 6 +- driver/opae/vlsim/vortex_afu_shim.sv | 2 +- driver/opae/vortex.cpp | 8 +- driver/opae/vx_scope.cpp | 67 ++- driver/opae/vx_scope.h | 2 + hw/opae/README | 3 +- hw/opae/ccip_std_afu.sv | 2 +- hw/opae/vortex_afu.qsf | 2 +- hw/opae/vortex_afu.sv | 58 +- hw/rtl/VX_cluster.v | 19 +- hw/rtl/VX_core.v | 17 +- hw/rtl/VX_csr_unit.v | 6 +- hw/rtl/VX_execute.v | 6 +- hw/rtl/VX_fetch.v | 6 +- hw/rtl/VX_gpr_fp_ctrl.v | 16 +- hw/rtl/VX_gpr_ram.v | 10 +- hw/rtl/VX_gpr_stage.v | 9 +- hw/rtl/VX_gpu_unit.v | 16 + hw/rtl/VX_ibuffer.v | 78 +-- hw/rtl/VX_icache_stage.v | 4 +- hw/rtl/VX_ipdom_stack.v | 41 +- hw/rtl/VX_issue.v | 2 +- hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/VX_mem_unit.v | 34 +- hw/rtl/VX_pipeline.v | 13 +- hw/rtl/VX_platform.vh | 2 +- hw/rtl/VX_scope.vh | 145 +++-- hw/rtl/VX_types.vh | 8 + hw/rtl/VX_warp_sched.v | 9 + hw/rtl/VX_writeback.v | 16 +- hw/rtl/Vortex.v | 30 +- hw/rtl/cache/VX_bank.v | 32 +- hw/rtl/cache/VX_cache.v | 8 +- hw/rtl/cache/VX_cache_core_rsp_merge.v | 2 +- hw/rtl/cache/VX_cache_miss_resrv.v | 12 +- hw/rtl/libs/VX_generic_queue.v | 137 +++-- hw/rtl/libs/VX_index_queue.v | 6 +- hw/rtl/libs/VX_scope.v | 6 +- hw/scripts/scope.json | 301 ++++++---- hw/scripts/scope.py | 732 +++++++++++++++---------- hw/syn/quartus/top/Makefile | 2 +- hw/syn/yosys/synth.ys | 117 ++-- 43 files changed, 1162 insertions(+), 850 deletions(-) diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 66517af2..09ab5d79 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -24,13 +24,6 @@ CXXFLAGS += -fPIC # Dump perf stats CXXFLAGS += -DDUMP_PERF_STATS -# Enable scope analyzer -# Enable scope analyzer -ifdef SCOPE - CXXFLAGS += -DSCOPE - SET_SCOPE = SCOPE=1 -endif - LDFLAGS += -shared FPGA_LIBS += -luuid -lopae-c @@ -53,7 +46,14 @@ PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so AFU_JSON_INFO = vortex_afu.h -SRCS = vortex.cpp vx_scope.cpp ../common/vx_utils.cpp +SRCS = vortex.cpp ../common/vx_utils.cpp + +# Enable scope analyzer +ifdef SCOPE + CXXFLAGS += -DSCOPE + SRCS += vx_scope.cpp + SET_SCOPE = SCOPE=1 +endif all: vlsim @@ -64,7 +64,7 @@ json: ../../hw/opae/vortex_afu.json fpga: $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) -ase: $(SRCS) $(ASE_DIR) +asesim: $(SRCS) $(ASE_DIR) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) vlsim: $(SRCS) opae-vlsim diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 947e5698..f6d26d1c 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,10 +20,10 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DEBUG=1 +#DEBUG=1 SCOPE=1 CFLAGS += -fPIC diff --git a/driver/opae/vlsim/vortex_afu_shim.sv b/driver/opae/vlsim/vortex_afu_shim.sv index 4977979a..cf5735e4 100644 --- a/driver/opae/vlsim/vortex_afu_shim.sv +++ b/driver/opae/vlsim/vortex_afu_shim.sv @@ -87,7 +87,7 @@ t_if_ccip_Tx af2cp_sTxPort; vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) -) vortex_afu ( +) afu ( .clk(clk), .reset(reset), .cp2af_sRxPort(cp2af_sRxPort), diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 6ae578e6..703aca7c 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -509,12 +509,6 @@ extern int vx_start(vx_device_h hdevice) { // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN)); -/*#ifdef SCOPE - sleep(15); - vx_scope_stop(device->fpga, 0); - exit(0); -#endif*/ - return 0; } @@ -547,7 +541,7 @@ extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* valu // Ensure ready for new command if (vx_ready_wait(hdevice, -1) != 0) - return -1; + return -1; // write CSR value CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index dca8311f..536fe25f 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #ifdef USE_VLSIM #include "vlsim/fpga.h" @@ -39,14 +42,30 @@ #define CMD_SET_STOP 5 #define CMD_GET_OFFSET 6 -static constexpr int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); +static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t); + +static constexpr int num_signals = sizeof(scope_taps) / sizeof(scope_tap_t); constexpr int calcFrameWidth(int index = 0) { - return (index < num_signals) ? (scope_signals[index].width + calcFrameWidth(index + 1)) : 0; + return (index < num_signals) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0; } static constexpr int fwidth = calcFrameWidth(); +#ifdef HANG_TIMEOUT +static std::thread g_timeout_thread; +static std::mutex g_timeout_mutex; + +static void timeout_callback(fpga_handle fpga) { + std::this_thread::sleep_for(std::chrono::seconds{60}); + if (!g_timeout_mutex.try_lock()) + return; + vx_scope_stop(fpga, HANG_TIMEOUT); + fpgaClose(fpga); + exit(0); +} +#endif + uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { while (delta != 0) { ofs << '#' << timestamp++ << std::endl; @@ -58,6 +77,27 @@ uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { return timestamp; } +void dump_taps(std::ofstream& ofs, int module) { + int i = 1; + for (auto& tap : scope_taps) { + if (tap.module != module) + continue; + ofs << "$var reg " << tap.width << " " << i << " " << tap.name << " $end" << std::endl; + i += 1; + } +} + +void dump_module(std::ofstream& ofs, int parent) { + for (auto& module : scope_modules) { + if (module.parent != parent) + continue; + ofs << "$scope module " << module.name << " $end" << std::endl; + dump_module(ofs, module.index); + dump_taps(ofs, module.index); + ofs << "$upscope $end" << std::endl; + } +} + int vx_scope_start(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) return -1; @@ -69,10 +109,20 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) { std::cout << "scope start delay: " << delay << std::endl; } +#ifdef HANG_TIMEOUT + g_timeout_thread = std::thread(timeout_callback, hfpga); + g_timeout_thread.detach(); +#endif + return 0; } int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { +#ifdef HANG_TIMEOUT + if (!g_timeout_mutex.try_lock()) + return 0; +#endif + if (nullptr == hfpga) return -1; @@ -89,11 +139,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; ofs << "$var reg 1 0 clk $end" << std::endl; - - for (int i = 0; i < num_signals; ++i) { - ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; - } - + dump_module(ofs, -1); + dump_taps(ofs, -1); ofs << "$upscope $end" << std::endl; ofs << "enddefinitions $end" << std::endl; @@ -158,7 +205,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word)); do { - int signal_width = scope_signals[signal_id-1].width; + int signal_width = scope_taps[signal_id-1].width; int word_offset = frame_offset % 64; signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; @@ -183,7 +230,9 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); timestamp = print_clock(ofs, delta + 1, timestamp); signal_id = num_signals; - //std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + if (0 == (frame_no % 100)) { + std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + } } } diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index f2d5518e..2bb09c4a 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,5 +1,7 @@ #pragma once +#define HANG_TIMEOUT 60 + int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1); \ No newline at end of file diff --git a/hw/opae/README b/hw/opae/README index 303f48df..5765123b 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -60,7 +60,7 @@ qsub-sim make ase # tests -./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256 +./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1 ./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 ./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd @@ -81,6 +81,7 @@ tar -zcvf run.log.tar.gz run.log tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd tar -zcvf run.log.tar.gz build_ase_1c/work/run.log +tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd # decompress VCD trace tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz diff --git a/hw/opae/ccip_std_afu.sv b/hw/opae/ccip_std_afu.sv index f56bb80f..1590e82f 100644 --- a/hw/opae/ccip_std_afu.sv +++ b/hw/opae/ccip_std_afu.sv @@ -104,7 +104,7 @@ module ccip_std_afu #( vortex_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) - ) vortex_afu_inst ( + ) afu ( .clk (clk), .reset (reset_T1), diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index 96b1c98b..1356ecb4 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -1,7 +1,7 @@ # Analysis & Synthesis Assignments set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index ff122e99..3faca262 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -93,7 +93,7 @@ typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; state_t state; `ifdef SCOPE -`SCOPE_SIGNALS_DECL +`SCOPE_DECL_SIGNALS `endif // Vortex ports /////////////////////////////////////////////////////////////// @@ -511,8 +511,8 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest; assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; assign avs_pending_reads_next = avs_pending_reads - + (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : - (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); + + $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : + (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW; @@ -573,8 +573,8 @@ begin end if (cci_dram_rd_req_fire) begin - cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1; - cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1; + cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1); + cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next); `endif @@ -582,7 +582,7 @@ begin if (cci_dram_wr_req_fire) begin cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); - cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1; + cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); `endif @@ -683,15 +683,15 @@ end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; -assign cci_rd_req_ctr_next = cci_rd_req_ctr + (cci_rd_req_fire ? 1 : 0); +assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; assign cci_pending_reads_next = cci_pending_reads - + ((cci_rd_req_fire && !cci_rdq_pop) ? 1 : - (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); + + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : + (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; @@ -734,7 +734,7 @@ begin end if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1; + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1); if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 0; // restart new request batch end @@ -787,8 +787,8 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull; assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; assign cci_pending_writes_next = cci_pending_writes - + ((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : - (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); + + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : + (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); @@ -817,8 +817,8 @@ begin if (cci_wr_req_fire) begin assert(cci_wr_req_ctr != 0); - cci_wr_req_addr <= cci_wr_req_addr + 1; - cci_wr_req_ctr <= cci_wr_req_ctr - 1; + cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); + cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); `endif @@ -854,8 +854,8 @@ end assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; -assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr; -assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr; +assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr; +assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr; assign cmd_clflush_done = (0 == snp_rsp_ctr); @@ -894,7 +894,7 @@ begin if (vx_snp_req_fire) begin assert(snp_req_ctr < snp_req_size); - vx_snp_req_addr <= vx_snp_req_addr + 1; + vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1); vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE @@ -954,15 +954,7 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_SIGNALS_ISTAGE_TOP_BIND - `SCOPE_SIGNALS_LSU_TOP_BIND - `SCOPE_SIGNALS_BANK_L3_TOP_BIND - `SCOPE_SIGNALS_BANK_L2_TOP_BIND - `SCOPE_SIGNALS_BANK_L1D_TOP_BIND - `SCOPE_SIGNALS_BANK_L1I_TOP_BIND - `SCOPE_SIGNALS_BANK_L1S_TOP_BIND - `SCOPE_SIGNALS_ISSUE_TOP_BIND - `SCOPE_SIGNALS_EXECUTE_TOP_BIND + `SCOPE_BIND_vortex_afu_vortex() .clk (clk), .reset (reset | vx_reset), @@ -1001,10 +993,10 @@ Vortex #() vortex ( `UNUSED_PIN (io_req_addr), `UNUSED_PIN (io_req_data), `UNUSED_PIN (io_req_tag), - .io_req_ready (1), + .io_req_ready (1'b1), // I/O response - .io_rsp_valid (0), + .io_rsp_valid (1'b0), .io_rsp_data (0), .io_rsp_tag (0), `UNUSED_PIN (io_rsp_ready), @@ -1069,20 +1061,20 @@ end `SCOPE_ASSIGN (scope_busy, vx_busy); -wire scope_changed = `SCOPE_TRIGGERS; +wire scope_changed = `SCOPE_TRIGGER; VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST})), + .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), .BUSW (64), .SIZE (4096), - .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) + .UPDW ($bits({`SCOPE_UPDATE_LIST})) ) scope ( .clk (clk), .reset (reset), .start (scope_start), - .stop (0), + .stop (1'b0), .changed (scope_changed), - .data_in ({`SCOPE_SIGNALS_DATA_LIST,`SCOPE_SIGNALS_UPD_LIST}), + .data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}), .bus_in (cmd_scope_wdata), .bus_out (cmd_scope_rdata), .bus_read (cmd_scope_read), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index d3b75b68..5f69a8e2 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -3,14 +3,7 @@ module VX_cluster #( parameter CLUSTER_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_IO - `SCOPE_SIGNALS_LSU_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L2_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO - `SCOPE_SIGNALS_ISSUE_CLUSTER_IO - `SCOPE_SIGNALS_EXECUTE_CLUSTER_IO + `SCOPE_IO_VX_cluster // Clock input wire clk, @@ -141,13 +134,7 @@ module VX_cluster #( VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( - `SCOPE_SIGNALS_ISTAGE_SELECT(i) - `SCOPE_SIGNALS_LSU_SELECT(i) - `SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(i) - `SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(i) - `SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(i) - `SCOPE_SIGNALS_ISSUE_SELECT(i) - `SCOPE_SIGNALS_EXECUTE_SELECT(i) + `SCOPE_BIND_VX_cluster_core(i) .clk (clk), .reset (reset), @@ -385,7 +372,7 @@ module VX_cluster #( .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) ) l2cache ( - `SCOPE_SIGNALS_BANK_L2_CACHE_BIND + `SCOPE_BIND_VX_cluster_l2cache() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 91e0a6ac..f47eabd8 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -3,13 +3,7 @@ module VX_core #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_BANK_L1D_CORE_IO - `SCOPE_SIGNALS_BANK_L1I_CORE_IO - `SCOPE_SIGNALS_BANK_L1S_CORE_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_core // Clock input wire clk, @@ -181,10 +175,7 @@ module VX_core #( VX_pipeline #( .CORE_ID(CORE_ID) ) pipeline ( - `SCOPE_SIGNALS_ISTAGE_BIND - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_ISSUE_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_BIND_VX_core_pipeline() .clk(clk), .reset(reset), @@ -260,9 +251,7 @@ module VX_core #( VX_mem_unit #( .CORE_ID(CORE_ID) ) mem_unit ( - `SCOPE_SIGNALS_BANK_L1D_CORE_BIND - `SCOPE_SIGNALS_BANK_L1I_CORE_BIND - `SCOPE_SIGNALS_BANK_L1S_CORE_BIND + `SCOPE_BIND_VX_core_mem_unit() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 36ba7baf..40b76d83 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -7,7 +7,7 @@ module VX_csr_unit #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_issue_if csr_to_issue_if, + VX_csr_to_issue_if csr_to_issue_if, VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, @@ -15,8 +15,8 @@ module VX_csr_unit #( VX_csr_req_if csr_req_if, VX_exu_to_cmt_if csr_commit_if ); - VX_csr_req_if csr_pipe_req_if(); - VX_exu_to_cmt_if csr_pipe_rsp_if(); + VX_csr_req_if csr_pipe_req_if(); + VX_exu_to_cmt_if csr_pipe_rsp_if(); wire select_io_req = csr_io_req_if.valid; wire select_io_rsp; diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index f542d3ce..e56e583c 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -3,8 +3,7 @@ module VX_execute #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_execute input wire clk, input wire reset, @@ -55,7 +54,7 @@ module VX_execute #( VX_lsu_unit #( .CORE_ID(CORE_ID) ) lsu_unit ( - `SCOPE_SIGNALS_LSU_BIND + `SCOPE_BIND_VX_execute_lsu_unit() .clk (clk), .reset (reset), .dcache_req_if (dcache_req_if), @@ -122,6 +121,7 @@ module VX_execute #( VX_gpu_unit #( .CORE_ID(CORE_ID) ) gpu_unit ( + `SCOPE_BIND_VX_execute_gpu_unit() .clk (clk), .reset (reset), .gpu_req_if (gpu_req_if), diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index cf0c2e45..1d304ffc 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -3,7 +3,7 @@ module VX_fetch #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_IO_VX_fetch input wire clk, input wire reset, @@ -29,6 +29,8 @@ module VX_fetch #( VX_warp_sched #( .CORE_ID(CORE_ID) ) warp_sched ( + `SCOPE_BIND_VX_fetch_warp_sched() + .clk (clk), .reset (reset), .warp_ctl_if (warp_ctl_if), @@ -43,7 +45,7 @@ module VX_fetch #( VX_icache_stage #( .CORE_ID(CORE_ID) ) icache_stage ( - `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_BIND_VX_fetch_icache_stage() .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index 1d7224ab..c40df875 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -41,24 +41,20 @@ module VX_gpr_fp_ctrl ( read_rs1 <= 1; end - rsp_valid <= gpr_req_if.valid; - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; + rsp_valid <= gpr_req_if.valid; + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; if (read_rs1) begin - rsp_rs1_data <= rs1_data; + rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; end - rsp_rs2_data <= rs2_data; - rsp_rs3_data <= rs1_data; + rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; + rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; assert(read_rs1 || rsp_wid == gpr_req_if.wid); end end - always @(posedge clk) begin - - end - // outputs wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3; assign raddr1 = {gpr_req_if.wid, rs1}; diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 8f1b4483..352a17e0 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -12,15 +12,7 @@ module VX_gpr_ram ( ); `ifndef ASIC - reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; - - initial begin // initialize ram: set r0 = 0 - for (integer j = 0; j < `NUM_WARPS; j++) begin - for (integer i = 0; i < `NUM_REGS; i++) begin - ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}}; - end - end - end + reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 129da4c0..23d9db16 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -15,9 +15,8 @@ module VX_gpr_stage #( ); `UNUSED_VAR (reset) - wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [`NUM_THREADS-1:0][31:0] rs2_data; - wire [`NW_BITS+`NR_BITS-1:0] raddr1; + wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data; + wire [`NW_BITS+`NR_BITS-1:0] raddr1; VX_gpr_ram gpr_ram ( .clk (clk), @@ -57,8 +56,8 @@ module VX_gpr_stage #( rsp_valid <= gpr_req_if.valid; rsp_wid <= gpr_req_if.wid; rsp_pc <= gpr_req_if.PC; - rsp_rs1_data <= rs1_data; - rsp_rs2_data <= rs2_data; + rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; + rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; end end diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index ac6550a3..ffad1717 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -3,6 +3,8 @@ module VX_gpu_unit #( parameter CORE_ID = 0 ) ( + `SCOPE_IO_VX_gpu_unit + input wire clk, input wire reset, @@ -88,4 +90,18 @@ module VX_gpu_unit #( // can accept new request? assign gpu_req_if.ready = gpu_commit_if.ready; + `SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid); + `SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid); + `SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask); + `SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type); + `SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]); + `SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data); + `SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready); + `SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid); + `SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid); + `SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc); + `SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn); + `SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split); + `SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 3f891a45..8c712eff 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -20,16 +20,13 @@ module VX_ibuffer #( localparam ADDRW = $clog2(SIZE); localparam NWARPSW = $clog2(`NUM_WARPS+1); - `USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0]; - reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; - reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0]; - reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0]; - wire [`NUM_WARPS-1:0] q_full; wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size; wire [DATAW-1:0] q_data_in; wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev; + reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out; + reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready; wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready; @@ -39,41 +36,50 @@ module VX_ibuffer #( wire writing = enq_fire && (i == ibuf_enq_if.wid); wire reading = deq_fire && (i == ibuf_deq_if.wid); - wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0]; - wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0]; - + wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading)); + + wire push = writing && !is_slot0; + wire pop = reading && (size_r[i] != 1); + + VX_generic_queue #( + .DATAW(DATAW), + .SIZE(SIZE) + ) queue ( + .clk (clk), + .reset (reset), + .push (push), + .data_in (q_data_in), + .pop (pop), + .data_out (q_data_prev[i]), + `UNUSED_PIN (empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + always @(posedge clk) begin + if (writing && is_slot0) begin + q_data_out[i] <= q_data_in; + end + if (pop) begin + q_data_out[i] <= q_data_prev[i]; + end + end + always @(posedge clk) begin if (reset) begin - rd_ptr_r[i] <= 0; - wr_ptr_r[i] <= 0; - size_r[i] <= 0; + size_r[i] <= 0; end else begin - if (writing) begin - if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin - q_data_out[i] <= q_data_in; - end else begin - entries[i][wr_ptr_a] <= q_data_in; - wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1); - end - if (!reading) begin - size_r[i] <= size_r[i] + SIZEW'(1); - end + if (writing && !reading) begin + size_r[i] <= size_r[i] + SIZEW'(1); end - if (reading) begin - if (size_r[i] != 1) begin - q_data_out[i] <= q_data_prev[i]; - rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1); - end - if (!writing) begin - size_r[i] <= size_r[i] - SIZEW'(1); - end + if (reading && !writing) begin + size_r[i] <= size_r[i] - SIZEW'(1); end end - end + end - assign q_data_prev[i] = entries[i][rd_ptr_a]; - assign q_full[i] = (size_r[i] == SIZE); - assign q_size[i] = size_r[i]; + assign q_full[i] = (size_r[i] == SIZE); + assign q_size[i] = size_r[i]; end /////////////////////////////////////////////////////////////////////////// @@ -144,9 +150,9 @@ module VX_ibuffer #( schedule_table[deq_wid_n] <= 0; end - deq_valid <= deq_valid_n; - deq_wid <= deq_wid_n; - deq_instr <= deq_instr_n; + deq_valid <= deq_valid_n; + deq_wid <= deq_wid_n; + deq_instr <= deq_instr_n; if (warp_added && !warp_removed) begin num_warps <= num_warps + NWARPSW'(1); diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1bb61f92..e0bf94af 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -3,7 +3,7 @@ module VX_icache_stage #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO + `SCOPE_IO_VX_icache_stage input wire clk, input wire reset, @@ -30,7 +30,7 @@ module VX_icache_stage #( always @(posedge clk) begin if (icache_req_fire) begin - rsp_PC_buf[req_tag] <= ifetch_req_if.PC; + rsp_PC_buf[req_tag] <= ifetch_req_if.PC; rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; end end diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index f388d3d0..e00097ae 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -1,4 +1,3 @@ - `include "VX_platform.vh" module VX_ipdom_stack #( @@ -17,33 +16,55 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - `USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - `USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; - `USE_FAST_BRAM reg is_part [0:STACK_SIZE-1]; + reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; + reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; + reg is_part [0:STACK_SIZE-1]; reg [DEPTH-1:0] rd_ptr, wr_ptr; + reg [WIDTH - 1:0] d1, d2; + reg p; + always @(posedge clk) begin if (reset) begin + rd_ptr <= 0; wr_ptr <= 0; end else begin if (push) begin - stack_1[wr_ptr] <= q1; - stack_2[wr_ptr] <= q2; - is_part[wr_ptr] <= 0; rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + DEPTH'(1); end else if (pop) begin wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]); rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]); - is_part[rd_ptr] <= 1; end end end - assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr]; + always @(posedge clk) begin + if (push) begin + stack_1[wr_ptr] <= q1; + end + end + assign d1 = stack_1[rd_ptr]; - assign empty = (0 == wr_ptr); + always @(posedge clk) begin + if (push) begin + stack_2[wr_ptr] <= q2; + end + end + assign d2 = stack_2[rd_ptr]; + + always @(posedge clk) begin + if (push) begin + is_part[wr_ptr] <= 0; + end else if (pop) begin + is_part[rd_ptr] <= 1; + end + end + assign p = is_part[rd_ptr]; + + assign d = p ? d1 : d2; + assign empty = ~(| wr_ptr); assign full = ((STACK_SIZE-1) == wr_ptr); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 1c1e4f8a..46e0388f 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -3,7 +3,7 @@ module VX_issue #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISSUE_IO + `SCOPE_IO_VX_issue input wire clk, input wire reset, diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 797b30cd..52646138 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -3,7 +3,7 @@ module VX_lsu_unit #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_LSU_IO + `SCOPE_IO_VX_lsu_unit input wire clk, input wire reset, diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 9cd29a1a..ae8fddde 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -3,9 +3,7 @@ module VX_mem_unit # ( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_BANK_L1D_CORE_IO - `SCOPE_SIGNALS_BANK_L1I_CORE_IO - `SCOPE_SIGNALS_BANK_L1S_CORE_IO + `SCOPE_IO_VX_mem_unit input wire clk, input wire reset, @@ -79,7 +77,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) ) smem ( - `SCOPE_SIGNALS_BANK_L1S_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_smem() .clk (clk), .reset (reset), @@ -106,7 +104,7 @@ module VX_mem_unit # ( `UNUSED_PIN (dram_req_addr), `UNUSED_PIN (dram_req_data), `UNUSED_PIN (dram_req_tag), - .dram_req_ready (0), + .dram_req_ready (1'b0), // DRAM response .dram_rsp_valid (0), @@ -115,7 +113,7 @@ module VX_mem_unit # ( `UNUSED_PIN (dram_rsp_ready), // Snoop request - .snp_req_valid (0), + .snp_req_valid (1'b0), .snp_req_addr (0), .snp_req_invalidate (0), .snp_req_tag (0), @@ -124,17 +122,17 @@ module VX_mem_unit # ( // Snoop response `UNUSED_PIN (snp_rsp_valid), `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (0), + .snp_rsp_ready (1'b0), // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); @@ -161,7 +159,7 @@ module VX_mem_unit # ( .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) dcache ( - `SCOPE_SIGNALS_BANK_L1D_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_dcache() .clk (clk), .reset (reset), @@ -213,10 +211,10 @@ module VX_mem_unit # ( `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); @@ -242,7 +240,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) ) icache ( - `SCOPE_SIGNALS_BANK_L1I_CACHE_BIND + `SCOPE_BIND_VX_mem_unit_icache() .clk (clk), .reset (reset), @@ -278,26 +276,26 @@ module VX_mem_unit # ( .dram_rsp_ready (icache_dram_rsp_if.ready), // Snoop request - .snp_req_valid (0), + .snp_req_valid (1'b0), .snp_req_addr (0), - .snp_req_invalidate (0), + .snp_req_invalidate (1'b0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), // Snoop response `UNUSED_PIN (snp_rsp_valid), `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (0), + .snp_rsp_ready (1'b0), // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), - .snp_fwdout_ready (0), + .snp_fwdout_ready (1'b0), // Snoop forward in - .snp_fwdin_valid (0), + .snp_fwdin_valid (1'b0), .snp_fwdin_tag (0), `UNUSED_PIN (snp_fwdin_ready) ); diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index c2629eec..86cd7003 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -3,10 +3,7 @@ module VX_pipeline #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_ISTAGE_IO - `SCOPE_SIGNALS_LSU_IO - `SCOPE_SIGNALS_ISSUE_IO - `SCOPE_SIGNALS_EXECUTE_IO + `SCOPE_IO_VX_pipeline // Clock input wire clk, @@ -126,7 +123,7 @@ module VX_pipeline #( VX_fetch #( .CORE_ID(CORE_ID) ) fetch ( - `SCOPE_SIGNALS_ISTAGE_BIND + `SCOPE_BIND_VX_pipeline_fetch() .clk (clk), .reset (reset), .icache_req_if (core_icache_req_if), @@ -153,7 +150,7 @@ module VX_pipeline #( VX_issue #( .CORE_ID(CORE_ID) ) issue ( - `SCOPE_SIGNALS_ISSUE_BIND + `SCOPE_BIND_VX_pipeline_issue() .clk (clk), .reset (reset), @@ -173,8 +170,8 @@ module VX_pipeline #( VX_execute #( .CORE_ID(CORE_ID) ) execute ( - `SCOPE_SIGNALS_LSU_BIND - `SCOPE_SIGNALS_EXECUTE_BIND + `SCOPE_BIND_VX_pipeline_execute() + .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index a1818d62..a377c461 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -52,7 +52,7 @@ /////////////////////////////////////////////////////////////////////////////// `define USE_FAST_BRAM (* syn_ramstyle = "mlab" *) -`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *) +`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 4292bb8f..2c007e33 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,4 +1,3 @@ - `ifndef VX_SCOPE `define VX_SCOPE @@ -6,86 +5,76 @@ `include "scope-defs.vh" -`define SCOPE_ASSIGN(d,s) \ - `IGNORE_WARNINGS_BEGIN \ - assign d = s \ - `IGNORE_WARNINGS_END +`define SCOPE_ASSIGN(d,s) assign d = s `else -`define SCOPE_SIGNALS_ISTAGE_TOP_IO -`define SCOPE_SIGNALS_ISTAGE_TOP_BIND -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_IO -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_BIND -`define SCOPE_SIGNALS_ISTAGE_IO -`define SCOPE_SIGNALS_ISTAGE_BIND -`define SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_ISTAGE_SELECT(__i__) -`define SCOPE_SIGNALS_LSU_TOP_IO -`define SCOPE_SIGNALS_LSU_TOP_BIND -`define SCOPE_SIGNALS_LSU_CLUSTER_IO -`define SCOPE_SIGNALS_LSU_CLUSTER_BIND -`define SCOPE_SIGNALS_LSU_IO -`define SCOPE_SIGNALS_LSU_BIND -`define SCOPE_SIGNALS_LSU_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_LSU_SELECT(__i__) -`define SCOPE_SIGNALS_ISSUE_TOP_IO -`define SCOPE_SIGNALS_ISSUE_TOP_BIND -`define SCOPE_SIGNALS_ISSUE_CLUSTER_IO -`define SCOPE_SIGNALS_ISSUE_CLUSTER_BIND -`define SCOPE_SIGNALS_ISSUE_IO -`define SCOPE_SIGNALS_ISSUE_BIND -`define SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_ISSUE_SELECT(__i__) -`define SCOPE_SIGNALS_EXECUTE_TOP_IO -`define SCOPE_SIGNALS_EXECUTE_TOP_BIND -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_IO -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_BIND -`define SCOPE_SIGNALS_EXECUTE_IO -`define SCOPE_SIGNALS_EXECUTE_BIND -`define SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_EXECUTE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L3_TOP_IO -`define SCOPE_SIGNALS_BANK_L3_TOP_BIND -`define SCOPE_SIGNALS_BANK_L2_TOP_IO -`define SCOPE_SIGNALS_BANK_L2_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1D_TOP_IO -`define SCOPE_SIGNALS_BANK_L1D_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1I_TOP_IO -`define SCOPE_SIGNALS_BANK_L1I_TOP_BIND -`define SCOPE_SIGNALS_BANK_L1S_TOP_IO -`define SCOPE_SIGNALS_BANK_L1S_TOP_BIND -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_IO -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_BIND -`define SCOPE_SIGNALS_BANK_L1D_CORE_IO -`define SCOPE_SIGNALS_BANK_L1D_CORE_BIND -`define SCOPE_SIGNALS_BANK_L1I_CORE_IO -`define SCOPE_SIGNALS_BANK_L1I_CORE_BIND -`define SCOPE_SIGNALS_BANK_L1S_CORE_IO -`define SCOPE_SIGNALS_BANK_L1S_CORE_BIND -`define SCOPE_SIGNALS_BANK_CACHE_IO -`define SCOPE_SIGNALS_BANK_CACHE_BIND -`define SCOPE_SIGNALS_BANK_IO -`define SCOPE_SIGNALS_BANK_BIND -`define SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1D_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1I_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L1S_CORE_SELECT(__i__) -`define SCOPE_SIGNALS_BANK_L3_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L2_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1D_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1I_CACHE_BIND -`define SCOPE_SIGNALS_BANK_L1S_CACHE_BIND -`define SCOPE_SIGNALS_BANK_SELECT(__i__) +`define SCOPE_IO_vortex_afu + +`define SCOPE_IO_VX_icache_stage + +`define SCOPE_IO_VX_fetch + +`define SCOPE_BIND_VX_fetch_icache_stage() + +`define SCOPE_IO_VX_pipeline + +`define SCOPE_BIND_VX_pipeline_fetch() + +`define SCOPE_IO_VX_core + +`define SCOPE_BIND_VX_core_pipeline() + +`define SCOPE_IO_VX_cluster + +`define SCOPE_BIND_VX_cluster_core(__i__) + +`define SCOPE_IO_Vortex + +`define SCOPE_BIND_Vortex_cluster(__i__) + +`define SCOPE_BIND_vortex_afu_vortex() + +`define SCOPE_IO_VX_lsu_unit + +`define SCOPE_IO_VX_execute + +`define SCOPE_BIND_VX_execute_lsu_unit() + +`define SCOPE_BIND_VX_pipeline_execute() + +`define SCOPE_IO_VX_issue + +`define SCOPE_BIND_VX_pipeline_issue() + +`define SCOPE_IO_VX_bank + +`define SCOPE_IO_VX_cache + +`define SCOPE_BIND_VX_cache_bank(__i__) + +`define SCOPE_BIND_Vortex_l3cache() + +`define SCOPE_BIND_VX_cluster_l2cache() + +`define SCOPE_IO_VX_mem_unit + +`define SCOPE_BIND_VX_mem_unit_dcache() + +`define SCOPE_BIND_VX_core_mem_unit() + +`define SCOPE_BIND_VX_mem_unit_icache() + +`define SCOPE_BIND_VX_mem_unit_smem() + +`define SCOPE_DECL_SIGNALS + +`define SCOPE_DATA_LIST + +`define SCOPE_UPDATE_LIST + +`define SCOPE_TRIGGER + `define SCOPE_ASSIGN(d,s) `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index dd7d22b5..26d051ae 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -28,12 +28,16 @@ typedef struct packed { logic [`NUM_THREADS-1:0] tmask; } gpu_tmc_t; +`define GPU_TMC_SIZE (1+`NUM_THREADS) + typedef struct packed { logic valid; logic [`NUM_WARPS-1:0] wmask; logic [31:0] pc; } gpu_wspawn_t; +`define GPU_WSPAWN_SIZE (1+`NUM_WARPS+32) + typedef struct packed { logic valid; logic diverged; @@ -42,10 +46,14 @@ typedef struct packed { logic [31:0] pc; } gpu_split_t; +`define GPU_SPLIT_SIZE (1+1+`NUM_THREADS+`NUM_THREADS+32) + typedef struct packed { logic valid; logic [`NB_BITS-1:0] id; logic [`NW_BITS-1:0] size_m1; } gpu_barrier_t; +`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS) + `endif \ No newline at end of file diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 2f60776c..2938c60b 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -3,6 +3,8 @@ module VX_warp_sched #( parameter CORE_ID = 0 ) ( + `SCOPE_IO_VX_warp_sched + input wire clk, input wire reset, @@ -248,4 +250,11 @@ module VX_warp_sched #( assign busy = (active_warps != 0); + `SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp); + `SCOPE_ASSIGN (scope_wsched_active_warps, active_warps); + `SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table); + `SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready); + `SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule); + `SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 58e01f55..772ac3c0 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -25,6 +25,7 @@ module VX_writeback #( wire wb_valid; wire [`NW_BITS-1:0] wb_wid; + wire [31:0] wb_PC; wire [`NUM_THREADS-1:0] wb_tmask; wire [`NR_BITS-1:0] wb_rd; wire [`NUM_THREADS-1:0][31:0] wb_data; @@ -42,6 +43,13 @@ module VX_writeback #( mul_valid ? mul_commit_if.wid : fpu_valid ? fpu_commit_if.wid : 0; + + assign wb_PC = alu_valid ? alu_commit_if.PC : + lsu_valid ? lsu_commit_if.PC : + csr_valid ? csr_commit_if.PC : + mul_valid ? mul_commit_if.PC : + fpu_valid ? fpu_commit_if.PC : + 0; assign wb_tmask = alu_valid ? alu_commit_if.tmask : lsu_valid ? lsu_commit_if.tmask : @@ -68,16 +76,16 @@ module VX_writeback #( wire stall = 0/*~writeback_if.ready && writeback_if.valid*/; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) + .N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)) ) wb_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (1'b0), - .in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}), - .out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data}) + .in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}), + .out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data}) ); - + assign alu_commit_if.ready = !stall; assign lsu_commit_if.ready = !stall && !alu_valid; assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 82f36353..5e422ed5 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -1,15 +1,7 @@ `include "VX_define.vh" module Vortex ( - `SCOPE_SIGNALS_ISTAGE_TOP_IO - `SCOPE_SIGNALS_LSU_TOP_IO - `SCOPE_SIGNALS_BANK_L3_TOP_IO - `SCOPE_SIGNALS_BANK_L2_TOP_IO - `SCOPE_SIGNALS_BANK_L1D_TOP_IO - `SCOPE_SIGNALS_BANK_L1I_TOP_IO - `SCOPE_SIGNALS_BANK_L1S_TOP_IO - `SCOPE_SIGNALS_ISSUE_TOP_IO - `SCOPE_SIGNALS_EXECUTE_TOP_IO + `SCOPE_IO_Vortex // Clock input wire clk, @@ -79,14 +71,7 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(0) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(0) - `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(0) + `SCOPE_BIND_Vortex_cluster(0) .clk (clk), .reset (reset), @@ -200,14 +185,7 @@ module Vortex ( VX_cluster #( .CLUSTER_ID(i) ) cluster ( - `SCOPE_SIGNALS_ISTAGE_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_LSU_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L2_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1D_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1I_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_BANK_L1S_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_ISSUE_CLUSTER_SELECT(i) - `SCOPE_SIGNALS_EXECUTE_CLUSTER_SELECT(i) + `SCOPE_BIND_Vortex_cluster(i) .clk (clk), .reset (reset), @@ -417,7 +395,7 @@ module Vortex ( .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) l3cache ( - `SCOPE_SIGNALS_BANK_L3_CACHE_BIND + `SCOPE_BIND_Vortex_l3cache() .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index e953b651..625c0e53 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -50,7 +50,7 @@ module VX_bank #( // Snooping request tag width parameter SNP_REQ_TAG_WIDTH = 0 ) ( - `SCOPE_SIGNALS_BANK_IO + `SCOPE_IO_VX_bank input wire clk, input wire reset, @@ -143,7 +143,7 @@ module VX_bank #( ) snp_req_queue ( .clk (clk), .reset (reset), - .push (snp_req_valid), + .push (snp_req_valid && snp_req_ready), .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), .pop (snrq_pop), .data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}), @@ -166,7 +166,7 @@ module VX_bank #( ) dfp_queue ( .clk (clk), .reset (reset), - .push (dram_fill_rsp_valid), + .push (dram_fill_rsp_valid && dram_fill_rsp_ready), .data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}), .pop (dfpq_pop), .data_out({dfpq_addr_st0, dfpq_filldata_st0}), @@ -353,7 +353,7 @@ module VX_bank #( .clk (clk), .reset (reset), .stall (stall_bank_pipe), - .flush (0), + .flush (1'b0), .in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), .out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); @@ -480,7 +480,7 @@ module VX_bank #( .clk (clk), .reset (reset), .stall (stall_bank_pipe), - .flush (0), + .flush (1'b0), .in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}), .out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2}) ); @@ -722,18 +722,18 @@ module VX_bank #( end `endif -`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0); -`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1); -`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2); +`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0); +`SCOPE_ASSIGN (scope_valid_st1, valid_st1); +`SCOPE_ASSIGN (scope_valid_st2, valid_st2); -`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1); -`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1); -`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1); -`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1); -`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe); +`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1); +`SCOPE_ASSIGN (scope_miss_st1, miss_st1); +`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1); +`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1); +`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe); -`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); -`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); -`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); +`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); endmodule diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index c3189499..869c32bf 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -51,15 +51,15 @@ module VX_cache #( parameter DRAM_TAG_WIDTH = 28, // Number of snoop forwarding requests - parameter NUM_SNP_REQUESTS = 2, + parameter NUM_SNP_REQUESTS = 1, // Snooping request tag width - parameter SNP_REQ_TAG_WIDTH = 28, + parameter SNP_REQ_TAG_WIDTH = 1, // Snooping forward tag width parameter SNP_FWD_TAG_WIDTH = 1 ) ( - `SCOPE_SIGNALS_BANK_CACHE_IO + `SCOPE_IO_VX_cache input wire clk, input wire reset, @@ -365,7 +365,7 @@ module VX_cache #( .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) bank ( - `SCOPE_SIGNALS_BANK_SELECT(i) + `SCOPE_BIND_VX_cache_bank(i) .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 7cf8b1c8..7cd20e43 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -91,7 +91,7 @@ module VX_cache_core_rsp_merge #( .clk (clk), .reset (reset), .stall (stall), - .flush (0), + .flush (1'b0), .in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}), .out ({core_rsp_valid, core_rsp_data, core_rsp_tag}) ); diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 6ed5b02c..74745ceb 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -125,12 +125,12 @@ module VX_cache_miss_resrv #( ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; - tail_ptr <= tail_ptr + 1; + tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (increment_head) begin valid_table[head_ptr] <= 0; - head_ptr <= head_ptr + 1; + head_ptr <= head_ptr + $bits(head_ptr)'(1); end else if (recover_state) begin - schedule_ptr <= schedule_ptr - 1; + schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1); end // update entry as 'ready' during DRAM fill response @@ -140,15 +140,15 @@ module VX_cache_miss_resrv #( if (mrvq_pop) begin ready_table[dequeue_index] <= 0; - schedule_ptr <= schedule_ptr + 1; + schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); end if (!(mrvq_push && increment_head)) begin if (mrvq_push) begin - size <= size + 1; + size <= size + $bits(size)'(1); end if (increment_head) begin - size <= size - 1; + size <= size - $bits(size)'(1); end end end diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 6dab29f9..a14f4ec9 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -19,15 +19,14 @@ module VX_generic_queue #( ); `STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!")) - reg [SIZEW-1:0] size_r; - wire reading; - wire writing; - - assign reading = pop && !empty; - assign writing = push && !full; + always @(*) begin + assert(!pop || !empty); + assert(!push || !full); + end if (SIZE == 1) begin // (SIZE == 1) + reg [SIZEW-1:0] size_r; reg [DATAW-1:0] head_r; always @(posedge clk) begin @@ -35,12 +34,12 @@ module VX_generic_queue #( head_r <= 0; size_r <= 0; end else begin - if (writing && !reading) begin + if (push && !pop) begin size_r <= 1; - end else if (reading && !writing) begin + end else if (pop && !push) begin size_r <= 0; end - if (writing) begin + if (push) begin head_r <= data_in; end end @@ -52,11 +51,59 @@ module VX_generic_queue #( assign size = size_r; end else begin // (SIZE > 1) + + `ifdef QUARTUS + + scfifo scfifo_component ( + .clock (clk), + .data (data_in), + .rdreq (pop), + .wrreq (push), + .empty (empty), + .full (full), + .q (data_out), + .sclr (reset), + .usedw (), + .aclr (), + .almost_empty (), + .almost_full (), + .eccstatus () + ); + + defparam + scfifo_component.lpm_type = "scfifo", + scfifo_component.intended_device_family = "Arria 10", + scfifo_component.lpm_numwords = SIZE, + scfifo_component.lpm_width = DATAW, + scfifo_component.lpm_widthu = $clog2(SIZE), + scfifo_component.lpm_showahead = "ON", + scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"), + scfifo_component.use_eab = "ON"; + + reg [SIZEW-1:0] size_r; + + always @(posedge clk) begin + if (reset) begin + size_r <= 0; + end else begin + if (push && !pop) begin + size_r <= size_r + SIZEW'(1); + end + if (pop && !push) begin + size_r <= size_r - SIZEW'(1); + end + end + end + + assign size = size_r; + + `else `USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0]; - if (0 == BUFFERED) begin + if (0 == BUFFERED) begin + reg [SIZEW-1:0] size_r; reg [ADDRW:0] rd_ptr_r; reg [ADDRW:0] wr_ptr_r; @@ -69,30 +116,35 @@ module VX_generic_queue #( wr_ptr_r <= 0; size_r <= 0; end else begin - if (writing) begin - data[wr_ptr_a] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - if (!reading) begin - size_r <= size_r + 1; + if (push) begin + wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1); + if (!pop) begin + size_r <= size_r + SIZEW'(1); end end - - if (reading) begin - rd_ptr_r <= rd_ptr_r + 1; - if (!writing) begin - size_r <= size_r - 1; + if (pop) begin + rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1); + if (!push) begin + size_r <= size_r - SIZEW'(1); end end end + end + + always @(posedge clk) begin + if (push) begin + data[wr_ptr_a] <= data_in; + end end - assign data_out = data[rd_ptr_a]; + assign data_out = data[rd_ptr_a]; assign empty = (wr_ptr_r == rd_ptr_r); assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]); - assign size = size_r; + assign size = size_r; end else begin + reg [SIZEW-1:0] size_r; reg [DATAW-1:0] head_r; reg [DATAW-1:0] curr_r; reg [ADDRW-1:0] wr_ptr_r; @@ -105,7 +157,6 @@ module VX_generic_queue #( always @(posedge clk) begin if (reset) begin size_r <= 0; - head_r <= 0; curr_r <= 0; wr_ptr_r <= 0; rd_ptr_r <= 0; @@ -113,43 +164,50 @@ module VX_generic_queue #( empty_r <= 1; full_r <= 0; end else begin - if (writing) begin - data[wr_ptr_r] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; + if (push) begin + wr_ptr_r <= wr_ptr_r + ADDRW'(1); - if (!reading) begin + if (!pop) begin empty_r <= 0; - if (size_r == ($bits(size_r)'(SIZE-1))) begin + if (size_r == SIZEW'(SIZE-1)) begin full_r <= 1; end - size_r <= size_r + 1; + size_r <= size_r + SIZEW'(1); end end - if (reading) begin + if (pop) begin rd_ptr_r <= rd_ptr_next_r; if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2); + rd_ptr_next_r <= rd_ptr_r + ADDRW'(2); end else begin // (SIZE == 2); rd_ptr_next_r <= ~rd_ptr_next_r; end - if (!writing) begin - if (size_r == 1) begin + if (!push) begin + if (size_r == SIZEW'(1)) begin assert(rd_ptr_next_r == wr_ptr_r); empty_r <= 1; end; full_r <= 0; - size_r <= size_r - 1; + size_r <= size_r - SIZEW'(1); end end - bypass_r <= writing - && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty - + bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop)); curr_r <= data_in; - head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; + end + end + + always @(posedge clk) begin + if (reset) begin + head_r <= 0; + end else begin + if (push) begin + data[wr_ptr_r] <= data_in; + end + head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r]; end end @@ -158,6 +216,9 @@ module VX_generic_queue #( assign full = full_r; assign size = size_r; end + + `endif + end endmodule diff --git a/hw/rtl/libs/VX_index_queue.v b/hw/rtl/libs/VX_index_queue.v index bee8ccb9..b40aa2a0 100644 --- a/hw/rtl/libs/VX_index_queue.v +++ b/hw/rtl/libs/VX_index_queue.v @@ -28,9 +28,13 @@ module VX_index_queue #( assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push && !full; + assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid + always @(*) begin + assert(!push || !full); + end + always @(posedge clk) begin if (reset) begin rd_ptr <= 0; diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 19f385c3..9490d6b3 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -126,11 +126,11 @@ module VX_scope #( || (trigger_id != prev_trigger_id)) begin delta_store[waddr] <= delta; data_store[waddr] <= data_in; - waddr <= waddr + 1; + waddr <= waddr + $bits(waddr)'(1); delta <= 0; delta_flush <= 0; end else begin - delta <= delta + 1; + delta <= delta + DELTAW'(1); delta_flush <= (delta == (MAX_DELTA-1)); end prev_trigger_id <= trigger_id; @@ -159,7 +159,7 @@ module VX_scope #( if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin read_offset <= read_offset + $bits(read_offset)'(BUSW); end else begin - raddr <= raddr + 1; + raddr <= raddr + $bits(raddr)'(1); read_offset <= 0; read_delta <= 1; if (raddr == waddr) begin diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 1001e32c..8b0ae92b 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -6,125 +6,196 @@ "../rtl/VX_define.vh", "../rtl/cache/VX_cache_config.vh" ], - "parameters": { - "L3_ENABLE": "`L3_ENABLE", - "L2_ENABLE": "`L2_ENABLE", - "NUM_CLUSTERS": "`NUM_CLUSTERS", - "NUM_CORES": "`NUM_CORES", - "DNUM_BANKS": "`DNUM_BANKS", - "INUM_BANKS": "`INUM_BANKS", - "SNUM_BANKS": "`SNUM_BANKS", - "L2NUM_BANKS": "`L2NUM_BANKS", - "L3NUM_BANKS": "`L3NUM_BANKS" + "modules": { + "*": { + "enabled": "(`NUM_CLUSTERS > 0)", + "submodules": { + "afu": {"type":"vortex_afu"} + } + }, + "vortex_afu": { + "submodules": { + "vortex": {"type":"Vortex"} + } + }, + "Vortex": { + "submodules": { + "cluster": {"type":"VX_cluster", "count":"`NUM_CLUSTERS"}, + "l3cache": {"type":"VX_cache", "enabled":"`L3_ENABLE", "params":{"NUM_BANKS":"`L3NUM_BANKS"}} + } + }, + "VX_cluster": { + "submodules": { + "core": {"type":"VX_core", "count":"`NUM_CORES"}, + "l2cache": {"type":"VX_cache", "enabled":"`L2_ENABLE", "params":{"NUM_BANKS":"`L2NUM_BANKS"}} + } + }, + "VX_core": { + "submodules": { + "pipeline": {"type":"VX_pipeline", "enabled":false}, + "mem_unit": {"type":"VX_mem_unit", "enabled":true} + } + }, + "VX_pipeline": { + "submodules": { + "fetch": {"type":"VX_fetch", "enabled":true}, + "decode": {"type":"VX_decode", "enabled":true}, + "issue": {"type":"VX_issue", "enabled":true}, + "execute": {"type":"VX_execute", "enabled":true}, + "commit": {"type":"VX_commit", "enabled":true} + } + }, + "VX_fetch": { + "submodules": { + "warp_sched": {"type":"VX_warp_sched"}, + "icache_stage": {"type":"VX_icache_stage"} + } + }, + "VX_warp_sched": {}, + "VX_icache_stage": {}, + "VX_decode": {}, + "VX_issue": {}, + "VX_execute": { + "submodules": { + "lsu_unit": {"type":"VX_lsu_unit"}, + "gpu_unit": {"type":"VX_gpu_unit"} + } + }, + "VX_commit": {}, + "VX_lsu_unit": {}, + "VX_gpu_unit": {}, + "VX_mem_unit": { + "submodules": { + "smem": {"type":"VX_cache", "params":{"NUM_BANKS":"`SNUM_BANKS"}}, + "dcache": {"type":"VX_cache", "params":{"NUM_BANKS":"`DNUM_BANKS"}}, + "icache": {"type":"VX_cache", "params":{"NUM_BANKS":"`INUM_BANKS"}} + } + }, + "VX_cache": { + "submodules": { + "bank": {"type":"VX_bank", "count":"NUM_BANKS"} + } + }, + "VX_bank": {} }, - "taps": { - "top::SCOPE_SIGNALS_AFU": { - "!scope_dram_req_valid": 1, - "scope_dram_req_addr": 32, - "scope_dram_req_rw": 1, - "scope_dram_req_byteen": "`VX_DRAM_BYTEEN_WIDTH", - "scope_dram_req_data": "`VX_DRAM_LINE_WIDTH", - "scope_dram_req_tag": "`VX_DRAM_TAG_WIDTH", - "!scope_dram_req_ready": 1, - "!scope_dram_rsp_valid": 1, - "scope_dram_rsp_data": "`VX_DRAM_LINE_WIDTH", - "scope_dram_rsp_tag": "`VX_DRAM_TAG_WIDTH", - "!scope_dram_rsp_ready": 1, - "!scope_snp_req_valid": 1, - "scope_snp_req_addr": 32, - "scope_snp_req_invalidate": 1, - "scope_snp_req_tag": "`VX_SNP_TAG_WIDTH", - "!scope_snp_req_ready": 1, - "!scope_snp_rsp_valid": 1, - "scope_snp_rsp_tag": "`VX_SNP_TAG_WIDTH", - "!scope_snp_rsp_ready": 1, - "scope_busy": 1 + "taps": { + "afu": { + "!reset": 1, + "?dram_req_valid": 1, + "dram_req_addr": 32, + "dram_req_rw": 1, + "dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH", + "dram_req_data":"`VX_DRAM_LINE_WIDTH", + "dram_req_tag":"`VX_DRAM_TAG_WIDTH", + "?dram_req_ready": 1, + "?dram_rsp_valid": 1, + "dram_rsp_data":"`VX_DRAM_LINE_WIDTH", + "dram_rsp_tag":"`VX_DRAM_TAG_WIDTH", + "?dram_rsp_ready": 1, + "?snp_req_valid": 1, + "snp_req_addr": 32, + "snp_req_invalidate": 1, + "snp_req_tag":"`VX_SNP_TAG_WIDTH", + "?snp_req_ready": 1, + "?snp_rsp_valid": 1, + "snp_rsp_tag":"`VX_SNP_TAG_WIDTH", + "?snp_rsp_ready": 1, + "busy": 1 }, - "core::SCOPE_SIGNALS_ISTAGE": { - "!scope_icache_req_valid": 1, - "scope_icache_req_wid": "`NW_BITS", - "scope_icache_req_addr": 32, - "scope_icache_req_tag": "`ICORE_TAG_ID_BITS", - "!scope_icache_req_ready": 1, - "!scope_icache_rsp_valid": 1, - "scope_icache_rsp_data": 32, - "scope_icache_rsp_tag": "`ICORE_TAG_ID_BITS", - "!scope_icache_rsp_ready": 1 + "afu/vortex/cluster/core/pipeline/fetch/icache_stage": { + "?icache_req_valid": 1, + "icache_req_wid":"`NW_BITS", + "icache_req_addr": 32, + "icache_req_tag":"`ICORE_TAG_ID_BITS", + "?icache_req_ready": 1, + "?icache_rsp_valid": 1, + "icache_rsp_data": 32, + "icache_rsp_tag":"`ICORE_TAG_ID_BITS", + "?icache_rsp_ready": 1 }, - "core::SCOPE_SIGNALS_LSU": { - "!scope_dcache_req_valid": "`NUM_THREADS", - "scope_dcache_req_wid": "`NW_BITS", - "scope_dcache_req_pc": 32, - "scope_dcache_req_addr": "`NUM_THREADS * 32", - "scope_dcache_req_rw": 1, - "scope_dcache_req_byteen": "`NUM_THREADS * 4", - "scope_dcache_req_data": "`NUM_THREADS * 32", - "scope_dcache_req_tag": "`DCORE_TAG_ID_BITS", - "!scope_dcache_req_ready": 1, - "!scope_dcache_rsp_valid": "`NUM_THREADS", - "scope_dcache_rsp_data": "`NUM_THREADS * 32", - "scope_dcache_rsp_tag": "`DCORE_TAG_ID_BITS", - "!scope_dcache_rsp_ready": 1 + "afu/vortex/cluster/core/pipeline/fetch/warp_sched": { + "?wsched_scheduled_warp": 1, + "wsched_active_warps": "`NUM_WARPS", + "wsched_schedule_table": "`NUM_WARPS", + "wsched_schedule_ready": "`NUM_WARPS", + "wsched_warp_to_schedule": "`NW_BITS", + "wsched_warp_pc": "32" }, - "core::SCOPE_SIGNALS_ISSUE": { - "!scope_issue_valid": 1, - "scope_issue_wid": "`NW_BITS", - "scope_issue_tmask": "`NUM_THREADS", - "scope_issue_pc": 32, - "scope_issue_ex_type": "`EX_BITS", - "scope_issue_op_type": "`OP_BITS", - "scope_issue_op_mod": "`MOD_BITS", - "scope_issue_wb": 1, - "scope_issue_rd": "`NR_BITS", - "scope_issue_rs1": "`NR_BITS", - "scope_issue_rs2": "`NR_BITS", - "scope_issue_rs3": "`NR_BITS", - "scope_issue_imm": 32, - "scope_issue_rs1_is_pc": 1, - "scope_issue_rs2_is_imm": 1, - "!scope_issue_ready": 1, - "scope_gpr_rsp_wid": "`NW_BITS", - "scope_gpr_rsp_pc": 32, - "scope_gpr_rsp_a": "`NUM_THREADS * 32", - "scope_gpr_rsp_b": "`NUM_THREADS * 32", - "scope_gpr_rsp_c": "`NUM_THREADS * 32", - "!scope_gpr_delay": 1, - "!scope_writeback_valid": 1, - "scope_writeback_wid": "`NW_BITS", - "scope_writeback_pc": 32, - "scope_writeback_rd": "`NR_BITS", - "scope_writeback_data": "`NUM_THREADS * 32", - "!scope_scoreboard_delay": 1, - "!scope_execute_delay": 1 - }, - "core::SCOPE_SIGNALS_EXECUTE": {}, - "bank::SCOPE_SIGNALS_BANK": { - "!scope_bank_valid_st0": 1, - "!scope_bank_valid_st1": 1, - "!scope_bank_valid_st2": 1, - "scope_bank_addr_st0": 32, - "scope_bank_addr_st1": 32, - "scope_bank_addr_st2": 32, - "scope_bank_is_mrvq_st1": 1, - "scope_bank_miss_st1": 1, - "scope_bank_dirty_st1": 1, - "!scope_bank_force_miss_st1": 1, - "!scope_bank_stall_pipe": 1 + "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { + "?gpu_req_valid": 1, + "gpu_req_wid": "`NW_BITS", + "gpu_req_tmask": "`NUM_THREADS", + "gpu_req_op_type": "`GPU_BITS", + "gpu_req_rs1": "32", + "gpu_req_rs2": "32", + "?gpu_req_ready": 1, + "?gpu_rsp_valid": 1, + "gpu_rsp_wid": "`NW_BITS", + "gpu_rsp_tmc": "`GPU_TMC_SIZE", + "gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE", + "gpu_rsp_split": "`GPU_SPLIT_SIZE", + "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" + }, + "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { + "?dcache_req_valid":"`NUM_THREADS", + "dcache_req_wid":"`NW_BITS", + "dcache_req_pc": 32, + "dcache_req_addr":"`NUM_THREADS * 32", + "dcache_req_rw": 1, + "dcache_req_byteen":"`NUM_THREADS * 4", + "dcache_req_data": "`NUM_THREADS * 32", + "dcache_req_tag":"`DCORE_TAG_ID_BITS", + "?dcache_req_ready": 1, + "?dcache_rsp_valid":"`NUM_THREADS", + "dcache_rsp_data":"`NUM_THREADS * 32", + "dcache_rsp_tag":"`DCORE_TAG_ID_BITS", + "?dcache_rsp_ready": 1 + }, + "afu/vortex/cluster/core/pipeline/issue": { + "?issue_valid": 1, + "issue_wid":"`NW_BITS", + "issue_tmask":"`NUM_THREADS", + "issue_pc": 32, + "issue_ex_type":"`EX_BITS", + "issue_op_type":"`OP_BITS", + "issue_op_mod":"`MOD_BITS", + "issue_wb": 1, + "issue_rd":"`NR_BITS", + "issue_rs1":"`NR_BITS", + "issue_rs2":"`NR_BITS", + "issue_rs3":"`NR_BITS", + "issue_imm": 32, + "issue_rs1_is_pc": 1, + "issue_rs2_is_imm": 1, + "?issue_ready": 1, + "?gpr_rsp_valid": 1, + "gpr_rsp_wid":"`NW_BITS", + "gpr_rsp_pc": 32, + "gpr_rsp_a":"`NUM_THREADS * 32", + "gpr_rsp_b":"`NUM_THREADS * 32", + "gpr_rsp_c":"`NUM_THREADS * 32", + "!gpr_delay": 1, + "?writeback_valid": 1, + "writeback_wid":"`NW_BITS", + "writeback_pc": 32, + "writeback_rd":"`NR_BITS", + "writeback_data":"`NUM_THREADS * 32", + "!scoreboard_delay": 1, + "!execute_delay": 1 + }, + "afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": { + "?valid_st0": 1, + "?valid_st1": 1, + "?valid_st2": 1, + "addr_st0": 32, + "addr_st1": 32, + "addr_st2": 32, + "is_mrvq_st1": 1, + "miss_st1": 1, + "dirty_st1": 1, + "!force_miss_st1": 1, + "!stall_pipe": 1 } - }, - "triggers": [ - ["scope_dram_req_valid", "scope_dram_req_ready"], - ["scope_dram_rsp_valid", "scope_dram_rsp_ready"], - ["scope_snp_req_valid", "scope_snp_req_ready"], - ["scope_snp_rsp_valid", "scope_snp_rsp_ready"], - - ["scope_icache_req_valid_top", "scope_icache_req_ready_top"], - ["scope_icache_rsp_valid_top", "scope_icache_rsp_ready_top"], - - ["scope_dcache_req_valid_top", "scope_dcache_req_ready_top"], - ["scope_dcache_rsp_valid_top", "scope_dcache_rsp_ready_top"], - - ["scope_issue_valid_top", "scope_issue_ready_top"] - ] + } } \ No newline at end of file diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index c360398c..9bedc02c 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -11,12 +11,89 @@ vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$") vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$") vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)") -parameters = [] exclude_files = [] include_dirs = [] macros = [] br_stack = [] +def translate_ternary(text): + + def skip_space(text, i, ln, step): + while (i >= 0) and (i < ln): + c = text[i] + if not c.isspace(): + break + i += step + return i + + def skip_expr(text, i, ln, step): + paren = 0 + checkparen = True + while (i >= 0) and (i < ln): + c = text[i] + if checkparen and (((step < 0) and (c == ')')) or ((step > 0) and (c == '('))): + paren += 1 + elif checkparen and (((step < 0) and (c == '(')) or ((step > 0) and (c == ')'))): + if (0 == paren): + break + paren -= 1 + if (0 == paren): + i = skip_space(text, i + step, ln, step) + checkparen = False + continue + elif (0 == paren) and not (c.isalnum() or (c == '_')): + break + i += step + return (i - step) + + def parse_ternary(text): + ternary = None + ln = len(text) + for i in range(1, ln): + c = text[i] + if not (c == '?'): + continue + # parse condition expression + i0 = skip_space(text, i - 1, ln, -1) + if (i < 0): + raise Exception("invalid condition expression") + i1 = skip_expr(text, i0, ln, -1) + if (i1 > i0): + raise Exception("invalid condition expression") + # parse true expression + i2 = skip_space(text, i + 1, ln, 1) + if (i2 >= ln): + raise Exception("invalid true expression") + i3 = skip_expr(text, i2, ln, 1) + if (i3 < i2): + raise Exception("invalid true expression") + # parse colon + i4 = skip_space(text, i3 + 1, ln, 1) + if (i4 >= ln): + raise Exception("invalid colon") + if not (text[i4] == ':'): + raise Exception("missing colon") + # parse false expression + i5 = skip_space(text, i4 + 1, ln, 1) + if (i5 >= ln): + raise Exception("invalid false expression") + i6 = skip_expr(text, i5, ln, 1) + if (i6 < i5): + raise Exception("invalid false expression") + ternary = (i0, i1, i2, i3, i5, i6) + break + return ternary + + while True: + pos = parse_ternary(text) + if pos is None: + break + # convert to python ternary + newText = text[:pos[1]] + text[pos[2]:pos[3]+1] + " if " + text[pos[1]:pos[0]+1] + " else " + text[pos[4]:pos[5]+1] + text[pos[5]+1:] + text = newText + + return text + def parse_func_args(text): args = [] arg = '' @@ -26,7 +103,6 @@ def parse_func_args(text): paren = 1 for i in range(1, l): c = text[i] - if c == '(': paren += 1 elif c == ')': @@ -36,17 +112,14 @@ def parse_func_args(text): if paren == 0: l = i break - if c == ',' and paren == 1: if arg.strip(): args.append(arg) arg = '' else: arg += c - if paren != 0: raise Exception("missing closing parenthesis: " + text) - if arg.strip(): args.append(arg) @@ -90,9 +163,29 @@ def find_macro(name): return macro return None -def expand_text(text): +def expand_text(text, params): - class DoRepl(object): + def re_pattern_args(args): + p = "(? 0: + p += "|" + p += arg + i += 1 + p += ")(?![0-9a-zA-Z_])" + return p + + class DoReplParam(object): + def __init__(self, params): + self.params = params + self.expanded = False + def __call__(self, match): + name = match.group(1) + self.expanded = True + return self.params[name] + + class DoReplMacro(object): def __init__(self): self.expanded = False self.has_func = False @@ -107,17 +200,6 @@ def expand_text(text): return macro[2] return "`" + name - class DoRepl2(object): - def __init__(self, args, f_args): - map = {} - for i in range(len(args)): - map[args[i]] = f_args[i] - self.map = map - def __call__(self, match): - for key in match.groups(): - return self.map[key] - return group - def repl_func_macro(text): expanded = False match = re.search(vl_expand_re, text) @@ -137,14 +219,11 @@ def expand_text(text): if len(args) != len(f_args[0]): raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args)) - pattern = "(? 0: - pattern += "|" - pattern += args[i] - pattern += ")(?![0-9a-zA-Z_])" - - dorepl = DoRepl2(args, f_args[0]) + params[args[i]] = f_args[0][i] + dorepl = DoReplParam(params) value = re.sub(pattern, dorepl, value) str_head = text[0:match.start()] @@ -163,10 +242,18 @@ def expand_text(text): raise Exception("Macro recursion!") has_func = False while True: - do_repl = DoRepl() + params_updated = False + if not params is None: + do_repl = DoReplParam(params) + pattern = re_pattern_args(params) + new_text = re.sub(pattern, do_repl, text) + if do_repl.expanded: + text = new_text + params_updated = True + do_repl = DoReplMacro() new_text = re.sub(vl_expand_re, do_repl, text) - has_func = do_repl.has_func - if not do_repl.expanded: + has_func = do_repl.has_func + if not (params_updated or do_repl.expanded): break text = new_text changed = True @@ -291,7 +378,28 @@ def load_config(filename): print("condfig=", config) return config -def gen_vl_header(file, taps, triggers): +def eval_node(text, params): + def clog2(x): + l2 = math.log2(x) + cl = math.ceil(l2) + return int(cl) + + if not type(text) == str: + return text + + expanded = expand_text(text, params) + if expanded: + text = expanded + + try: + __text = text.replace('$clog2', '__clog2') + __text = translate_ternary(__text) + e = eval(__text, {'__clog2': clog2}) + return e + except (NameError, SyntaxError): + return text + +def gen_vl_header(file, modules, taps): header = ''' `ifndef VX_SCOPE_DEFS @@ -299,238 +407,274 @@ def gen_vl_header(file, taps, triggers): ''' footer = '`endif' - def signal_size(size, asize): - str_asize = "" - for s in asize: - if type(s) == int: - str_asize += "[" + str(s-1) + ":0]" - else: - str_asize += "[" + str(s) + "-1:0]" - + def signal_size(size, mn): if type(size) == int: - size1 = (size-1) - if size1 != 0: - return str_asize + "[" + str(size1) + ":0]" + if (size != mn): + return "[" + str(size-1) + ":0]" else: - return str_asize + return "" else: - return str_asize + "[(" + size + ")-1:0]" + return "[" + size + "-1:0]" - def generate_ports(tclass, tap, ports, new_taps): + def create_signal(key, ports): + if not key in ports: + ports[key] = [] + return ports[key] - def emit_io(tap, ports, prefix, asize, return_list, new_taps, is_enabled): - stap = tap + "_IO" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - if is_enabled: - for key in ports: - size = ports[key] - name = key - is_trigger = False - if name[0] == '!': - name = name[1:] - is_trigger = True - if not return_list is None: - return_list.append((name + prefix, size, asize, is_trigger)) - print("\toutput wire" + signal_size(size, asize) + " " + name + prefix + ", \\", file=f) - print("", file=f) - emit_bind(tap, ports, prefix, prefix, new_taps, is_enabled) - - def emit_bind(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): - stap = tap + "_BIND" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - for key in ports: - name = key - if name[0] == '!': - name = name[1:] - if is_enabled: - print("\t." + name + to_prefix + " (" + name + from_prefix + "), \\", file=f) - else: - if (from_prefix != to_prefix): - print("\t`UNUSED_PIN (" + name + to_prefix + "), \\", file=f) - print("", file=f) - - def emit_select(tap, ports, from_prefix, to_prefix, new_taps, is_enabled): - stap = tap + "_SELECT(__i__)" - new_taps.append(stap) - print("`define " + stap + " \\", file=f) - if is_enabled: - for key in ports: - name = key - if name[0] == '!': - name = name[1:] - print("\t." + name + to_prefix + " (" + name + from_prefix + "[__i__]), \\", file=f) - print("", file=f) - - def do_top(tap, ports, new_taps): - out_ports = [] - for p in ports: - name = p - is_trigger = False - if name[0] == '!': - name = name[1:] - is_trigger = True - out_ports.append((name, ports[p], [], is_trigger)) - return out_ports - - def do_core(tap, ports, new_taps): - out_ports = [] - nclusters = parameters["NUM_CLUSTERS"] - ncores = parameters["NUM_CORES"] - emit_io(tap + "_TOP", ports, "_top", [nclusters, ncores], out_ports, new_taps, True) - emit_io(tap + "_CLUSTER", ports, "_cluster", [ncores], None, new_taps, True) - emit_io(tap + "", ports, "", [], None, new_taps, True) - emit_select(tap + "_CLUSTER", ports, "_top", "_cluster", new_taps, True) - emit_select(tap + "", ports, "_cluster", "", new_taps, True) - return out_ports - - def do_bank(tap, ports, new_taps): - out_ports = [] - - nclusters = parameters["NUM_CLUSTERS"] - ncores = parameters["NUM_CORES"] - has_l3 = (parameters["L3_ENABLE"] != 0) - has_l2 = (parameters["L2_ENABLE"] != 0) - - emit_io(tap + "_L3_TOP", ports, "_l3_cache", [parameters["L3NUM_BANKS"]], out_ports, new_taps, has_l3) - emit_io(tap + "_L2_TOP", ports, "_l2_top", [nclusters, parameters["L2NUM_BANKS"]], out_ports, new_taps, has_l2) - emit_io(tap + "_L1D_TOP", ports, "_l1d_top", [nclusters, ncores, parameters["DNUM_BANKS"]], out_ports, new_taps, True) - emit_io(tap + "_L1I_TOP", ports, "_l1i_top", [nclusters, ncores, parameters["INUM_BANKS"]], out_ports, new_taps, True) - emit_io(tap + "_L1S_TOP", ports, "_l1s_top", [nclusters, ncores, parameters["SNUM_BANKS"]], out_ports, new_taps, True) - - emit_io(tap + "_L2_CLUSTER", ports, "_l2_cache", [parameters["L2NUM_BANKS"]], None, new_taps, has_l2) - emit_io(tap + "_L1D_CLUSTER", ports, "_l1d_cluster", [ncores, parameters["DNUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1I_CLUSTER", ports, "_l1i_cluster", [ncores, parameters["INUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1S_CLUSTER", ports, "_l1s_cluster", [ncores, parameters["SNUM_BANKS"]], None, new_taps, True) - - emit_io(tap + "_L1D_CORE", ports, "_l1d_cache", [parameters["DNUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1I_CORE", ports, "_l1i_cache", [parameters["INUM_BANKS"]], None, new_taps, True) - emit_io(tap + "_L1S_CORE", ports, "_l1s_cache", [parameters["SNUM_BANKS"]], None, new_taps, True) - - emit_io(tap + "_CACHE", ports, "_cache", ["NUM_BANKS"], None, new_taps, True) - emit_io(tap + "", ports, "", [], None, new_taps, True) - - emit_select(tap + "_L2_CLUSTER", ports, "_l2_top", "_l2_cache", new_taps, has_l2) - emit_select(tap + "_L1D_CLUSTER", ports, "_l1d_top", "_l1d_cluster", new_taps, True) - emit_select(tap + "_L1I_CLUSTER", ports, "_l1i_top", "_l1i_cluster", new_taps, True) - emit_select(tap + "_L1S_CLUSTER", ports, "_l1s_top", "_l1s_cluster", new_taps, True) - - emit_select(tap + "_L1D_CORE", ports, "_l1d_cluster", "_l1d_cache", new_taps, True) - emit_select(tap + "_L1I_CORE", ports, "_l1i_cluster", "_l1i_cache", new_taps, True) - emit_select(tap + "_L1S_CORE", ports, "_l1s_cluster", "_l1s_cache", new_taps, True) - - emit_bind(tap + "_L3_CACHE", ports, "_l3_cache", "_cache", new_taps, has_l3) - emit_bind(tap + "_L2_CACHE", ports, "_l2_cache", "_cache", new_taps, has_l2) - emit_bind(tap + "_L1D_CACHE", ports, "_l1d_cache", "_cache", new_taps, True) - emit_bind(tap + "_L1I_CACHE", ports, "_l1i_cache", "_cache", new_taps, True) - emit_bind(tap + "_L1S_CACHE", ports, "_l1s_cache", "_cache", new_taps, True) - - emit_select(tap + "", ports, "_cache", "", new_taps, True) - - return out_ports - - callbacks = { - "top": do_top, - "core": do_core, - "bank": do_bank - } - - return callbacks[tclass](tap, ports, new_taps) - - def trigger_size(name, ports): - for port in ports: - if port[0] == name: - return (port[1], port[2]) - return None - - def trigger_prefices(asize): - def Q(arr, ss, asize, idx, N): - for i in range(asize[idx]): - tmp = ss + '[' + str(i) + ']' - if (idx + 1) < N: - Q(arr, tmp, asize, idx + 1, N) - else: - arr.append(tmp) - - l = len(asize) - if l == 0: - return [""] - arr = [] - Q(arr, "", asize, 0, l) - return arr + def dic_insert(gdic, ldic, key, value, enabled): + if enabled: + ldic[key] = value + if key in gdic: + return False + if enabled: + gdic[key] = None + return True def trigger_name(name, size): if type(size) == int: - size1 = (size-1) - if size1 != 0: + if size != 1: return "(| " + name + ")" else: return name else: return "(| " + name + ")" - with open(file, 'w') as f: + def trigger_subscripts(asize): + def Q(arr, ss, asize, idx, N): + a = asize[idx] + if (a != 0): + for i in range(a): + tmp = ss + '[' + str(i) + ']' + if (idx + 1) < N: + Q(arr, tmp, asize, idx + 1, N) + else: + arr.append(tmp) + else: + if (idx + 1) < N: + Q(arr, ss, asize, idx + 1, N) + else: + arr.append(ss) + + if asize is None: + return [""] + ln = len(asize) + if (0 == ln): + return [""] + arr = [] + Q(arr, "", asize, 0, ln) + return arr + + + def visit_path(alltaps, ports, path, node, paths, modules, taps): + ntype = node["type"] + + enabled = True + if "enabled" in node: + enabled = eval_node(node["enabled"], None) + + curtaps = {} + + if (len(paths) != 0): + spath = paths.pop(0) + snodes = modules[ntype]["submodules"] + if not spath in snodes: + raise Exception("invalid path: " + spath + " in " + path) + snode = snodes[spath] + + subtaps = visit_path(alltaps, ports, spath, snode, paths, modules, taps) + + scount = 0 + if "count" in snode: + scount = eval_node(snode["count"], None) + + params = None + if "params" in snode: + params = snode["params"] + + new_staps = [] + + nn = "SCOPE_IO_" + ntype + pp = create_signal(nn, ports) + for key in subtaps: + subtap = subtaps[key] + s = subtap[0] + a = subtap[1] + t = subtap[2] + e = subtap[3] + + s = eval_node(s, params) + + e = eval_node(e, params) + if type(e) == str or type(enabled) == str: + me = str(e) + " and " + str(enabled) + else: + me = e and enabled + + aa = [scount] + sa = signal_size(scount, 0) + if a: + for i in a: + x = eval_node(i, params) + aa.append(x) + sa += signal_size(x, 0) + + if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t, me), e): + skey = key.replace('/', '_') + if e: + pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',') + new_staps.append(skey) + + ports[nn] = pp + + if (0 == scount): + nn = "SCOPE_BIND_" + ntype + '_' + spath + "()" + pp = create_signal(nn, ports) + for st in new_staps: + if e: + pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),") + else: + pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp + else: + nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)" + pp = create_signal(nn, ports) + for st in new_staps: + if e: + pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),") + else: + pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp + else: + nn = "SCOPE_IO_" + ntype + pp = create_signal(nn, ports) + for tk in taps: + trigger = 0 + name = tk + size = eval_node(taps[tk], None) + if name[0] == '!': + name = name[1:] + trigger = 1 + elif name[0] == '?': + name = name[1:] + trigger = 2 + if dic_insert(alltaps, curtaps, name, (size, None, trigger, enabled), True): + pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',') + + ports[nn] = pp + + return curtaps + + toptaps = {} + + with open(file, 'w') as f: + + top = modules['*'] + snodes = top["submodules"] + + ports = {} + alltaps = {} + + for key in taps: + skey_list = key.split(',') + _taps = taps[key] + for skey in skey_list: + print('processing node: ' + skey + ' ...') + paths = skey.strip().split('/') + spath = paths.pop(0) + if not spath in snodes: + raise Exception("invalid path: " + spath) + snode = snodes[spath] + curtaps = visit_path(alltaps, ports, spath, snode, paths, modules, _taps) + for tk in curtaps: + toptaps[tk] = curtaps[tk] + print(header, file=f) - all_ports = [] - new_taps = [] + for key in ports: + print("`define " + key + ' \\', file=f) + for port in ports[key]: + print(port + ' \\', file=f) + print("", file=f) - for key in taps: - [tclass, tap] = key.split('::') - ports = generate_ports(tclass, tap, taps[key], new_taps) - for port in ports: - all_ports.append(port) - - print("`define SCOPE_SIGNALS_DECL \\", file=f) - i = 0 - for port in all_ports: + print("`define SCOPE_DECL_SIGNALS \\", file=f) + i = 0 + for key in toptaps: + tap = toptaps[key] + name = key.replace('/', '_') + size = tap[0] + asize = tap[1] + enabled = tap[3] + sa = "" + if asize: + for a in asize: + sa += signal_size(a, 0) if i > 0: print(" \\", file=f) - print("\twire" + signal_size(port[1], port[2]) + " " + port[0] + ";", file=f, end='') + if not enabled: + print("`IGNORE_WARNINGS_BEGIN \\", file=f) + print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + '; \\', file=f) + print("`IGNORE_WARNINGS_END", file=f, end='') + else: + print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_SIGNALS_DATA_LIST \\", file=f) + print("`define SCOPE_DATA_LIST \\", file=f) i = 0 - for port in all_ports: - if port[3]: + for key in toptaps: + tap = toptaps[key] + if tap[2] != 0: continue + name = key.replace('/', '_') if i > 0: print(", \\", file=f) - print("\t" + port[0], file=f, end='') + print("\t scope_" + name, file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_SIGNALS_UPD_LIST \\", file=f) + print("`define SCOPE_UPDATE_LIST \\", file=f) i = 0 - for port in all_ports: - if not port[3]: + for key in toptaps: + tap = toptaps[key] + if tap[2] == 0: continue + name = key.replace('/', '_') if i > 0: print(", \\", file=f) - print("\t" + port[0], file=f, end='') + print("\t scope_" + name, file=f, end='') i += 1 print("", file=f) print("", file=f) - print("`define SCOPE_TRIGGERS \\", file=f) + print("`define SCOPE_TRIGGER \\", file=f) i = 0 - for trigger in triggers: - arr = trigger_size(trigger[0], all_ports) - if arr is None: + excluded_list = [] + for key in toptaps: + if key in excluded_list: continue - [size, asize] = arr - for prefix in trigger_prefices(asize): + tap = toptaps[key] + if tap[2] != 2: + continue + size = tap[0] + asize = tap[1] + sus = trigger_subscripts(asize) + for su in sus: if i > 0: - print(" | \\", file=f) - print("\t(", file=f, end='') - for j in range(len(trigger)): - if j > 0: - print(" && ", file=f, end='') - print(trigger_name(trigger[j] + prefix, size), file=f, end='') + print(" | \\", file=f) + print("\t(", file=f, end='') + name = trigger_name("scope_" + key.replace('/', '_') + su, size) + if key.endswith("_valid"): + ready_signal = key[:-6] + "_ready" + if ready_signal in toptaps: + rname = trigger_name("scope_" + ready_signal.replace('/', '_') + su, size) + print(name + " && " + rname, file=f, end='') + excluded_list.append(ready_signal) + else: + print(name, file=f, end='') + else: + print(name, file=f, end='') print(")", file=f, end='') i += 1 print("", file=f) @@ -538,69 +682,110 @@ def gen_vl_header(file, taps, triggers): print(footer, file=f) - return all_ports + return toptaps -def gen_cc_header(file, ports): +def gen_cc_header(file, taps): header = ''' -#pragma once\n -struct scope_signal_t { +#pragma once + +struct scope_module_t { + const char* name; + int index; + int parent; +}; + +struct scope_tap_t { int width; const char* name; -};\n -inline constexpr int __clog2(int n) { return (n > 1) ? 1 + __clog2((n + 1) >> 1) : 0; }\n -static constexpr scope_signal_t scope_signals[] = {''' - - footer = "};" - - def eval_macro(text): - expanded = expand_text(text) - if expanded: - text = expanded - text = text.replace('$clog2', '__clog2') - return text - - def asize_name(asize): - def Q(arr, ss, asize, idx, N): - for i in range(asize[idx]): - tmp = ss + "_" + str(i) + int module; +}; +''' + def flatten_path(paths, sizes): + def Q(arr, ss, idx, N, paths, sizes): + size = sizes[idx] + if size != 0: + for i in range(sizes[idx]): + tmp = ss + ('/' if (ss != '') else '') + tmp += paths[idx] + '_' + str(i) + if (idx + 1) < N: + Q(arr, tmp, idx + 1, N, paths, sizes) + else: + arr.append(tmp) + else: + tmp = ss + ('/' if (ss != '') else '') + tmp += paths[idx] if (idx + 1) < N: - Q(arr, tmp, asize, idx + 1, N) + Q(arr, tmp, idx + 1, N, paths, sizes) else: - arr.append(tmp) + arr.append(tmp) - l = len(asize) - if l == 0: - return [""] arr = [] - Q(arr, "", asize, 0, l) - return arr + Q(arr, "", 0, len(asize), paths, asize) + return arr + + # flatten the taps + fdic = {} + for key in taps: + tap = taps[key] + size = str(tap[0]) + paths = key.split('/') + if (len(paths) > 1): + name = paths.pop(-1) + asize = tap[1] + for ss in flatten_path(paths, asize): + fdic[ss + '/' + name ] = [size, -1] + else: + fdic[key] = [size, -1] + + # generate module dic + mdic = {} + for key in fdic: + paths = key.split('/') + if len(paths) == 1: + continue + paths.pop(-1) + parent = -1 + for path in paths: + if not path in mdic: + index = len(mdic) + mdic[path] = (index, parent) + parent = index + else: + parent = mdic[path][0] + fdic[key][1] = parent with open(file, 'w') as f: print(header, file=f) + + print("static constexpr scope_module_t scope_modules[] = {", file=f) i = 0 - for port in ports: - if port[3]: - continue - name = port[0] - size = eval_macro(str(port[1])) - for ss in asize_name(port[2]): - if i > 0: - print(",", file=f) - print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') - i += 1 - for port in ports: - if not port[3]: - continue - name = port[0] - size = eval_macro(str(port[1])) - for ss in asize_name(port[2]): - if i > 0: - print(",", file=f) - print("\t{" + size + ", \"" + name + ss + "\"}", file=f, end='') - i += 1 + for key in mdic: + m = mdic[key] + if i > 0: + print(',', file=f) + print("\t{\"" + key + "\", " + str(m[0]) + ", " + str(m[1]) + "}", file=f, end='') + i += 1 print("", file=f) - print(footer, file=f) + print("};", file=f) + + print("", file=f) + print("static constexpr scope_tap_t scope_taps[] = {", file=f) + i = 0 + for key in fdic: + size = fdic[key][0] + parent = fdic[key][1] + paths = key.split('/') + if len(paths) > 1: + name = paths.pop(-1) + else: + name = key + if i > 0: + print(',', file=f) + print("\t{" + size + ", \"" + name + "\", " + str(parent) + "}", file=f, end='') + i += 1 + print("", file=f) + print("};", file=f) def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') @@ -612,7 +797,6 @@ def main(): args = parser.parse_args() print("args=", args) - global parameters global exclude_files global include_dirs global macros @@ -630,13 +814,9 @@ def main(): if "includes" in config: parse_includes(config["includes"]) - - parameters = config["parameters"] - for key in parameters: - parameters[key] = int(eval(expand_text(str(parameters[key])))) - ports = gen_vl_header(args.vl, config["taps"], config["triggers"]) - gen_cc_header(args.cc, ports) + taps = gen_vl_header(args.vl, config["modules"], config["taps"]) + gen_cc_header(args.cc, taps) -if __name__ == "__main__": +if __name__ == '__main__': main() \ No newline at end of file diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 6258682f..544cea65 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -51,7 +51,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/yosys/synth.ys b/hw/syn/yosys/synth.ys index 958f0353..f3ac0b0e 100644 --- a/hw/syn/yosys/synth.ys +++ b/hw/syn/yosys/synth.ys @@ -1,17 +1,22 @@ +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_bypass_buffer.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_cam_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_elastic_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_index_queue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_multiplier.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_onehot_encooder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_serial_div.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_shift_register.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_skid_buffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v @@ -20,114 +25,72 @@ read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I.. read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_store.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_alu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_ctl_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cmt_to_csr_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_to_issue_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_decode_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exu_to_cmt_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_cmt_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_fpu_to_csr_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_rsp_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_ifetch_rsp_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_mul_req_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_writeback_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_alu_unit.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_back_end.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_cluster.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_commit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_core.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_data.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_io_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_pipe.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_d_e_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_csr_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_dcache_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_decode.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_exec_unit.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_f_d_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_execute.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fetch.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_front_end.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_fpu_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_bypass.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_fp_ctrl.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_ram.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_stage.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpr_wrapper.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_inst.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_i_d_reg.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_gpu_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ibuffer.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_icache_stage.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_inst_multiplex.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_instr_demux.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_io_arb.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_ipdom_stack.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_issue.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_lsu_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_arb.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mem_unit.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_mul_unit.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_pipeline.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scheduler.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_user_config.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp.v +read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_scoreboard.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_warp_sched.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/VX_writeback.v read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/Vortex.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_bank_core_req_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_req_bank_sel.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_core_rsp_merge.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_fill_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_dram_req_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_cache_miss_resrv.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_prefetcher.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_forwarder.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_snp_rsp_arb.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_access.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/cache/VX_tag_data_structure.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_backend_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_branch_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_core_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_dram_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_cache_snp_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_io_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_csr_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_exec_unit_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpr_read_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_gpu_inst_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_inst_meta_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_jal_rsp_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_join_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_lsu_req_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_warp_ctl_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wb_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/interfaces/VX_wstall_if.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_countones.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_divide.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_encoder_onehot.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fair_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_fixed_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_queue.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_register.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_generic_stack.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_indexable_queue.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_matrix_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_mult.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_priority_encoder.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_rr_arbiter.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/libs/VX_scope.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_mgr.v -read_verilog -sv -I../../rtl/libs -I../../rtl/cache -I../../rtl/interfaces -I../../rtl ../../rtl/tex_unit/VX_tex_unit.v hierarchy -check -top Vortex add -global_input reset 1 proc -global_arst reset From 4bfc4ee78ffbf8201042741c84ba5de3200c330e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Oct 2020 08:44:55 -0700 Subject: [PATCH 08/19] scope fixes --- driver/opae/vlsim/Makefile | 18 +++++++++++------- hw/opae/sources_1c.txt | 2 +- hw/opae/vortex_afu.sv | 2 +- hw/rtl/VX_cluster.v | 2 +- hw/rtl/VX_core.v | 4 ++-- hw/rtl/VX_execute.v | 4 ++-- hw/rtl/VX_fetch.v | 4 ++-- hw/rtl/VX_mem_unit.v | 6 +++--- hw/rtl/VX_pipeline.v | 6 +++--- hw/rtl/VX_scope.vh | 34 +++++++++++++++++++++------------- hw/rtl/Vortex.v | 2 +- hw/scripts/scope.json | 2 +- hw/scripts/scope.py | 12 +++++++----- 13 files changed, 56 insertions(+), 42 deletions(-) diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index f6d26d1c..e3b52ec7 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,8 +20,8 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 #DEBUG=1 SCOPE=1 @@ -39,6 +39,8 @@ TOP = vortex_afu_shim RTL_DIR=../../../hw/rtl +SCRIPT_DIR=../../../hw/scripts + SRCS = fpga.cpp opae_sim.cpp SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp @@ -67,7 +69,7 @@ endif ifdef SCOPE VL_FLAGS += -DSCOPE CFLAGS += -DSCOPE - SCOPE_CFG = scope + SCOPE_VH = $(RTL_DIR)/scope-defs.vh endif VL_FLAGS += -DNOPAE @@ -80,12 +82,14 @@ PROJECT = libopae-c-vlsim.so all: $(PROJECT) # generate scope data -scope: ../../../hw/scripts/scope.json - ../../../hw/scripts/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl ../../../hw/rtl/scope-defs.vh ../../../hw/scripts/scope.json +scope: $(RTL_DIR)/scope-defs.vh + +$(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json + $(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json -$(PROJECT): $(SRCS) $(SCOPE_CFG) +$(PROJECT): $(SRCS) $(SCOPE_VH) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk clean: - rm -rf $(PROJECT) obj_dir ../scope-defs.h ../../../hw/rtl/scope-defs.vh + rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 897468c2..5c63e9cd 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -3,7 +3,7 @@ +define+SYNTHESIS +define+QUARTUS +define+FPU_FAST -#+define+SCOPE ++define+SCOPE #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 3faca262..fff69538 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -954,7 +954,7 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_BIND_vortex_afu_vortex() + `SCOPE_BIND_vortex_afu_vortex .clk (clk), .reset (reset | vx_reset), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 5f69a8e2..6c899096 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -372,7 +372,7 @@ module VX_cluster #( .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) ) l2cache ( - `SCOPE_BIND_VX_cluster_l2cache() + `SCOPE_BIND_VX_cluster_l2cache .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index f47eabd8..aa1032a8 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -175,7 +175,7 @@ module VX_core #( VX_pipeline #( .CORE_ID(CORE_ID) ) pipeline ( - `SCOPE_BIND_VX_core_pipeline() + `SCOPE_BIND_VX_core_pipeline .clk(clk), .reset(reset), @@ -251,7 +251,7 @@ module VX_core #( VX_mem_unit #( .CORE_ID(CORE_ID) ) mem_unit ( - `SCOPE_BIND_VX_core_mem_unit() + `SCOPE_BIND_VX_core_mem_unit .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index e56e583c..9586bcd6 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -54,7 +54,7 @@ module VX_execute #( VX_lsu_unit #( .CORE_ID(CORE_ID) ) lsu_unit ( - `SCOPE_BIND_VX_execute_lsu_unit() + `SCOPE_BIND_VX_execute_lsu_unit .clk (clk), .reset (reset), .dcache_req_if (dcache_req_if), @@ -121,7 +121,7 @@ module VX_execute #( VX_gpu_unit #( .CORE_ID(CORE_ID) ) gpu_unit ( - `SCOPE_BIND_VX_execute_gpu_unit() + `SCOPE_BIND_VX_execute_gpu_unit .clk (clk), .reset (reset), .gpu_req_if (gpu_req_if), diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index 1d304ffc..fd3b1fdc 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -29,7 +29,7 @@ module VX_fetch #( VX_warp_sched #( .CORE_ID(CORE_ID) ) warp_sched ( - `SCOPE_BIND_VX_fetch_warp_sched() + `SCOPE_BIND_VX_fetch_warp_sched .clk (clk), .reset (reset), @@ -45,7 +45,7 @@ module VX_fetch #( VX_icache_stage #( .CORE_ID(CORE_ID) ) icache_stage ( - `SCOPE_BIND_VX_fetch_icache_stage() + `SCOPE_BIND_VX_fetch_icache_stage .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index ae8fddde..d7a4ffd7 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -77,7 +77,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) ) smem ( - `SCOPE_BIND_VX_mem_unit_smem() + `SCOPE_BIND_VX_mem_unit_smem .clk (clk), .reset (reset), @@ -159,7 +159,7 @@ module VX_mem_unit # ( .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) dcache ( - `SCOPE_BIND_VX_mem_unit_dcache() + `SCOPE_BIND_VX_mem_unit_dcache .clk (clk), .reset (reset), @@ -240,7 +240,7 @@ module VX_mem_unit # ( .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) ) icache ( - `SCOPE_BIND_VX_mem_unit_icache() + `SCOPE_BIND_VX_mem_unit_icache .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 86cd7003..b6a5444c 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -123,7 +123,7 @@ module VX_pipeline #( VX_fetch #( .CORE_ID(CORE_ID) ) fetch ( - `SCOPE_BIND_VX_pipeline_fetch() + `SCOPE_BIND_VX_pipeline_fetch .clk (clk), .reset (reset), .icache_req_if (core_icache_req_if), @@ -150,7 +150,7 @@ module VX_pipeline #( VX_issue #( .CORE_ID(CORE_ID) ) issue ( - `SCOPE_BIND_VX_pipeline_issue() + `SCOPE_BIND_VX_pipeline_issue .clk (clk), .reset (reset), @@ -170,7 +170,7 @@ module VX_pipeline #( VX_execute #( .CORE_ID(CORE_ID) ) execute ( - `SCOPE_BIND_VX_pipeline_execute() + `SCOPE_BIND_VX_pipeline_execute .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 2c007e33..6b58e1a2 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -15,15 +15,19 @@ `define SCOPE_IO_VX_fetch -`define SCOPE_BIND_VX_fetch_icache_stage() +`define SCOPE_BIND_VX_fetch_icache_stage + +`define SCOPE_BIND_VX_fetch_warp_sched + +`define SCOPE_IO_VX_warp_sched `define SCOPE_IO_VX_pipeline -`define SCOPE_BIND_VX_pipeline_fetch() +`define SCOPE_BIND_VX_pipeline_fetch `define SCOPE_IO_VX_core -`define SCOPE_BIND_VX_core_pipeline() +`define SCOPE_BIND_VX_core_pipeline `define SCOPE_IO_VX_cluster @@ -33,19 +37,23 @@ `define SCOPE_BIND_Vortex_cluster(__i__) -`define SCOPE_BIND_vortex_afu_vortex() +`define SCOPE_BIND_vortex_afu_vortex `define SCOPE_IO_VX_lsu_unit +`define SCOPE_IO_VX_gpu_unit + `define SCOPE_IO_VX_execute -`define SCOPE_BIND_VX_execute_lsu_unit() +`define SCOPE_BIND_VX_execute_lsu_unit -`define SCOPE_BIND_VX_pipeline_execute() +`define SCOPE_BIND_VX_execute_gpu_unit + +`define SCOPE_BIND_VX_pipeline_execute `define SCOPE_IO_VX_issue -`define SCOPE_BIND_VX_pipeline_issue() +`define SCOPE_BIND_VX_pipeline_issue `define SCOPE_IO_VX_bank @@ -53,19 +61,19 @@ `define SCOPE_BIND_VX_cache_bank(__i__) -`define SCOPE_BIND_Vortex_l3cache() +`define SCOPE_BIND_Vortex_l3cache -`define SCOPE_BIND_VX_cluster_l2cache() +`define SCOPE_BIND_VX_cluster_l2cache `define SCOPE_IO_VX_mem_unit -`define SCOPE_BIND_VX_mem_unit_dcache() +`define SCOPE_BIND_VX_mem_unit_dcache -`define SCOPE_BIND_VX_core_mem_unit() +`define SCOPE_BIND_VX_core_mem_unit -`define SCOPE_BIND_VX_mem_unit_icache() +`define SCOPE_BIND_VX_mem_unit_icache -`define SCOPE_BIND_VX_mem_unit_smem() +`define SCOPE_BIND_VX_mem_unit_smem `define SCOPE_DECL_SIGNALS diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 5e422ed5..914a9232 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -395,7 +395,7 @@ module Vortex ( .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) l3cache ( - `SCOPE_BIND_Vortex_l3cache() + `SCOPE_BIND_Vortex_l3cache .clk (clk), .reset (reset), diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 8b0ae92b..734fbfb9 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -32,7 +32,7 @@ }, "VX_core": { "submodules": { - "pipeline": {"type":"VX_pipeline", "enabled":false}, + "pipeline": {"type":"VX_pipeline", "enabled":true}, "mem_unit": {"type":"VX_mem_unit", "enabled":true} } }, diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 9bedc02c..3cc189a4 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -527,7 +527,7 @@ def gen_vl_header(file, modules, taps): ports[nn] = pp if (0 == scount): - nn = "SCOPE_BIND_" + ntype + '_' + spath + "()" + nn = "SCOPE_BIND_" + ntype + '_' + spath pp = create_signal(nn, ports) for st in new_staps: if e: @@ -746,13 +746,15 @@ struct scope_tap_t { continue paths.pop(-1) parent = -1 + mk = "" for path in paths: - if not path in mdic: + mk += '/' + path + if not mk in mdic: index = len(mdic) - mdic[path] = (index, parent) + mdic[mk] = (path, index, parent) parent = index else: - parent = mdic[path][0] + parent = mdic[mk][1] fdic[key][1] = parent with open(file, 'w') as f: @@ -764,7 +766,7 @@ struct scope_tap_t { m = mdic[key] if i > 0: print(',', file=f) - print("\t{\"" + key + "\", " + str(m[0]) + ", " + str(m[1]) + "}", file=f, end='') + print("\t{\"" + m[0] + "\", " + str(m[1]) + ", " + str(m[2]) + "}", file=f, end='') i += 1 print("", file=f) print("};", file=f) From 58b8e82908028c32b7b77e13f267cf8ca84a0869 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Oct 2020 17:09:22 -0400 Subject: [PATCH 09/19] scope fixes ... --- driver/opae/vx_scope.cpp | 29 ++++++++----- hw/opae/vortex_afu.sv | 4 +- hw/rtl/VX_scope.vh | 4 +- hw/scripts/scope.json | 24 ++++------ hw/scripts/scope.py | 94 +++++++++++++++++++++------------------- 5 files changed, 79 insertions(+), 76 deletions(-) diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index 536fe25f..337a3f25 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -44,10 +44,10 @@ static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t); -static constexpr int num_signals = sizeof(scope_taps) / sizeof(scope_tap_t); +static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t); constexpr int calcFrameWidth(int index = 0) { - return (index < num_signals) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0; + return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0; } static constexpr int fwidth = calcFrameWidth(); @@ -77,13 +77,12 @@ uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) { return timestamp; } -void dump_taps(std::ofstream& ofs, int module) { - int i = 1; - for (auto& tap : scope_taps) { +void dump_taps(std::ofstream& ofs, int module) { + for (int i = 0; i < num_taps; ++i) { + auto& tap = scope_taps[i]; if (tap.module != module) continue; - ofs << "$var reg " << tap.width << " " << i << " " << tap.name << " $end" << std::endl; - i += 1; + ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl; } } @@ -91,10 +90,16 @@ void dump_module(std::ofstream& ofs, int parent) { for (auto& module : scope_modules) { if (module.parent != parent) continue; - ofs << "$scope module " << module.name << " $end" << std::endl; + if (module.name[0] == '*') { + ofs << "$var reg 1 0 clk $end" << std::endl; + } else { + ofs << "$scope module " << module.name << " $end" << std::endl; + } dump_module(ofs, module.index); dump_taps(ofs, module.index); - ofs << "$upscope $end" << std::endl; + if (module.name[0] != '*') { + ofs << "$upscope $end" << std::endl; + } } } @@ -138,7 +143,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { ofs << "$version Generated by Vortex Scope $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; - ofs << "$var reg 1 0 clk $end" << std::endl; + dump_module(ofs, -1); dump_taps(ofs, -1); ofs << "$upscope $end" << std::endl; @@ -187,7 +192,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { // print clock header CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); timestamp = print_clock(ofs, offset + delta + 2, timestamp); - signal_id = num_signals; + signal_id = num_taps; std::vector signal_data(frame_width+1); @@ -229,7 +234,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { // print clock header CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta)); timestamp = print_clock(ofs, delta + 1, timestamp); - signal_id = num_signals; + signal_id = num_taps; if (0 == (frame_no % 100)) { std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; } diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index fff69538..0c76eedb 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -954,7 +954,7 @@ end assign cmd_run_done = !vx_busy; Vortex #() vortex ( - `SCOPE_BIND_vortex_afu_vortex + `SCOPE_BIND_top_vortex .clk (clk), .reset (reset | vx_reset), @@ -1066,7 +1066,7 @@ wire scope_changed = `SCOPE_TRIGGER; VX_scope #( .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), .BUSW (64), - .SIZE (4096), + .SIZE (100), .UPDW ($bits({`SCOPE_UPDATE_LIST})) ) scope ( .clk (clk), diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 6b58e1a2..c4552d21 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -9,8 +9,6 @@ `else -`define SCOPE_IO_vortex_afu - `define SCOPE_IO_VX_icache_stage `define SCOPE_IO_VX_fetch @@ -37,7 +35,7 @@ `define SCOPE_BIND_Vortex_cluster(__i__) -`define SCOPE_BIND_vortex_afu_vortex +`define SCOPE_BIND_top_vortex `define SCOPE_IO_VX_lsu_unit diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 734fbfb9..da7cfe73 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -7,13 +7,7 @@ "../rtl/cache/VX_cache_config.vh" ], "modules": { - "*": { - "enabled": "(`NUM_CLUSTERS > 0)", - "submodules": { - "afu": {"type":"vortex_afu"} - } - }, - "vortex_afu": { + "top": { "submodules": { "vortex": {"type":"Vortex"} } @@ -26,7 +20,7 @@ }, "VX_cluster": { "submodules": { - "core": {"type":"VX_core", "count":"`NUM_CORES"}, + "core": {"type":"VX_core", "count":"`NUM_CORES", "enabled":true}, "l2cache": {"type":"VX_cache", "enabled":"`L2_ENABLE", "params":{"NUM_BANKS":"`L2NUM_BANKS"}} } }, @@ -79,7 +73,7 @@ "VX_bank": {} }, "taps": { - "afu": { + "top": { "!reset": 1, "?dram_req_valid": 1, "dram_req_addr": 32, @@ -102,7 +96,7 @@ "?snp_rsp_ready": 1, "busy": 1 }, - "afu/vortex/cluster/core/pipeline/fetch/icache_stage": { + "top/vortex/cluster/core/pipeline/fetch/icache_stage": { "?icache_req_valid": 1, "icache_req_wid":"`NW_BITS", "icache_req_addr": 32, @@ -113,7 +107,7 @@ "icache_rsp_tag":"`ICORE_TAG_ID_BITS", "?icache_rsp_ready": 1 }, - "afu/vortex/cluster/core/pipeline/fetch/warp_sched": { + "top/vortex/cluster/core/pipeline/fetch/warp_sched": { "?wsched_scheduled_warp": 1, "wsched_active_warps": "`NUM_WARPS", "wsched_schedule_table": "`NUM_WARPS", @@ -121,7 +115,7 @@ "wsched_warp_to_schedule": "`NW_BITS", "wsched_warp_pc": "32" }, - "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { + "top/vortex/cluster/core/pipeline/execute/gpu_unit": { "?gpu_req_valid": 1, "gpu_req_wid": "`NW_BITS", "gpu_req_tmask": "`NUM_THREADS", @@ -136,7 +130,7 @@ "gpu_rsp_split": "`GPU_SPLIT_SIZE", "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" }, - "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { + "top/vortex/cluster/core/pipeline/execute/lsu_unit": { "?dcache_req_valid":"`NUM_THREADS", "dcache_req_wid":"`NW_BITS", "dcache_req_pc": 32, @@ -151,7 +145,7 @@ "dcache_rsp_tag":"`DCORE_TAG_ID_BITS", "?dcache_rsp_ready": 1 }, - "afu/vortex/cluster/core/pipeline/issue": { + "top/vortex/cluster/core/pipeline/issue": { "?issue_valid": 1, "issue_wid":"`NW_BITS", "issue_tmask":"`NUM_THREADS", @@ -183,7 +177,7 @@ "!scoreboard_delay": 1, "!execute_delay": 1 }, - "afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": { + "top/vortex/l3cache/bank, top/vortex/cluster/l2cache/bank, top/vortex/cluster/core/mem_unit/dcache/bank, top/vortex/cluster/core/mem_unit/icache/bank, top/vortex/cluster/core/mem_unit/smem/bank": { "?valid_st0": 1, "?valid_st1": 1, "?valid_st2": 1, diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 3cc189a4..8be8381f 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -465,23 +465,24 @@ def gen_vl_header(file, modules, taps): return arr - def visit_path(alltaps, ports, path, node, paths, modules, taps): - ntype = node["type"] - - enabled = True - if "enabled" in node: - enabled = eval_node(node["enabled"], None) - + def visit_path(alltaps, ports, ntype, paths, modules, taps): curtaps = {} if (len(paths) != 0): spath = paths.pop(0) snodes = modules[ntype]["submodules"] if not spath in snodes: - raise Exception("invalid path: " + spath + " in " + path) + raise Exception("invalid path: " + spath + " in " + ntype) + snode = snodes[spath] - subtaps = visit_path(alltaps, ports, spath, snode, paths, modules, taps) + stype = snode["type"] + + enabled = True + if "enabled" in snode: + enabled = eval_node(snode["enabled"], None) + + subtaps = visit_path(alltaps, ports, stype, paths, modules, taps) scount = 0 if "count" in snode: @@ -495,20 +496,12 @@ def gen_vl_header(file, modules, taps): nn = "SCOPE_IO_" + ntype pp = create_signal(nn, ports) + for key in subtaps: subtap = subtaps[key] s = subtap[0] a = subtap[1] - t = subtap[2] - e = subtap[3] - - s = eval_node(s, params) - - e = eval_node(e, params) - if type(e) == str or type(enabled) == str: - me = str(e) + " and " + str(enabled) - else: - me = e and enabled + t = subtap[2] aa = [scount] sa = signal_size(scount, 0) @@ -518,9 +511,9 @@ def gen_vl_header(file, modules, taps): aa.append(x) sa += signal_size(x, 0) - if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t, me), e): + if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t), enabled): skey = key.replace('/', '_') - if e: + if enabled: pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',') new_staps.append(skey) @@ -529,24 +522,29 @@ def gen_vl_header(file, modules, taps): if (0 == scount): nn = "SCOPE_BIND_" + ntype + '_' + spath pp = create_signal(nn, ports) + for st in new_staps: - if e: + if enabled: pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),") else: pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp else: nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)" pp = create_signal(nn, ports) + for st in new_staps: - if e: + if enabled: pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),") else: pp.append("\t`UNUSED_PIN (scope_" + st + "),") + ports[nn] = pp else: nn = "SCOPE_IO_" + ntype pp = create_signal(nn, ports) + for tk in taps: trigger = 0 name = tk @@ -557,7 +555,7 @@ def gen_vl_header(file, modules, taps): elif name[0] == '?': name = name[1:] trigger = 2 - if dic_insert(alltaps, curtaps, name, (size, None, trigger, enabled), True): + if dic_insert(alltaps, curtaps, name, (size, None, trigger), True): pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',') ports[nn] = pp @@ -568,9 +566,6 @@ def gen_vl_header(file, modules, taps): with open(file, 'w') as f: - top = modules['*'] - snodes = top["submodules"] - ports = {} alltaps = {} @@ -580,11 +575,8 @@ def gen_vl_header(file, modules, taps): for skey in skey_list: print('processing node: ' + skey + ' ...') paths = skey.strip().split('/') - spath = paths.pop(0) - if not spath in snodes: - raise Exception("invalid path: " + spath) - snode = snodes[spath] - curtaps = visit_path(alltaps, ports, spath, snode, paths, modules, _taps) + ntype = paths.pop(0) + curtaps = visit_path(alltaps, ports, ntype, paths, modules, _taps) for tk in curtaps: toptaps[tk] = curtaps[tk] @@ -603,19 +595,13 @@ def gen_vl_header(file, modules, taps): name = key.replace('/', '_') size = tap[0] asize = tap[1] - enabled = tap[3] sa = "" if asize: for a in asize: sa += signal_size(a, 0) if i > 0: print(" \\", file=f) - if not enabled: - print("`IGNORE_WARNINGS_BEGIN \\", file=f) - print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + '; \\', file=f) - print("`IGNORE_WARNINGS_END", file=f, end='') - else: - print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='') + print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='') i += 1 print("", file=f) print("", file=f) @@ -624,7 +610,8 @@ def gen_vl_header(file, modules, taps): i = 0 for key in toptaps: tap = toptaps[key] - if tap[2] != 0: + trigger = tap[2] + if trigger != 0: continue name = key.replace('/', '_') if i > 0: @@ -638,7 +625,8 @@ def gen_vl_header(file, modules, taps): i = 0 for key in toptaps: tap = toptaps[key] - if tap[2] == 0: + trigger = tap[2] + if trigger == 0: continue name = key.replace('/', '_') if i > 0: @@ -729,23 +717,41 @@ struct scope_tap_t { for key in taps: tap = taps[key] size = str(tap[0]) + trigger = tap[2] + if (trigger != 0): + continue paths = key.split('/') if (len(paths) > 1): name = paths.pop(-1) asize = tap[1] for ss in flatten_path(paths, asize): - fdic[ss + '/' + name ] = [size, -1] + fdic[ss + '/' + name ] = [size, 0] else: - fdic[key] = [size, -1] + fdic[key] = [size, 0] + for key in taps: + tap = taps[key] + size = str(tap[0]) + trigger = tap[2] + if (trigger == 0): + continue + paths = key.split('/') + if (len(paths) > 1): + name = paths.pop(-1) + asize = tap[1] + for ss in flatten_path(paths, asize): + fdic[ss + '/' + name ] = [size, 0] + else: + fdic[key] = [size, 0] # generate module dic mdic = {} + mdic["*"] = ("*", 0, -1) for key in fdic: paths = key.split('/') if len(paths) == 1: continue paths.pop(-1) - parent = -1 + parent = 0 mk = "" for path in paths: mk += '/' + path From 301cc4574079f40dc370884e2fe10b51e9e75d2d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 14 Oct 2020 09:19:26 -0700 Subject: [PATCH 10/19] scope fixes --- driver/opae/vx_scope.cpp | 2 -- driver/opae/vx_scope.h | 2 +- hw/opae/vortex_afu.sv | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index 337a3f25..55cce38f 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -58,8 +58,6 @@ static std::mutex g_timeout_mutex; static void timeout_callback(fpga_handle fpga) { std::this_thread::sleep_for(std::chrono::seconds{60}); - if (!g_timeout_mutex.try_lock()) - return; vx_scope_stop(fpga, HANG_TIMEOUT); fpgaClose(fpga); exit(0); diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index 2bb09c4a..edd26cb4 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,6 +1,6 @@ #pragma once -#define HANG_TIMEOUT 60 +//#define HANG_TIMEOUT 60 int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 0c76eedb..49783361 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -1066,7 +1066,7 @@ wire scope_changed = `SCOPE_TRIGGER; VX_scope #( .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), .BUSW (64), - .SIZE (100), + .SIZE (4096), .UPDW ($bits({`SCOPE_UPDATE_LIST})) ) scope ( .clk (clk), From 7529f72c5dde2492a6baba2e5edf17a38c457f87 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Oct 2020 05:32:55 -0700 Subject: [PATCH 11/19] fixed OPAE crash, added custom bram module to controll rw collision, dogfood testcase argurment, optimzed buffered fifo, quartus build optimization flags --- driver/opae/Makefile | 10 +- driver/opae/vlsim/Makefile | 8 +- driver/opae/vortex.cpp | 19 +-- driver/tests/dogfood/dogfood.cpp | 8 +- hw/opae/README | 6 +- hw/opae/sources_1c.txt | 2 +- hw/opae/vortex_afu.qsf | 19 ++- hw/opae/vortex_afu.sv | 17 ++- hw/rtl/VX_config.vh | 2 - hw/rtl/VX_decode.v | 2 +- hw/rtl/VX_gpr_fp_ctrl.v | 70 ----------- hw/rtl/VX_gpr_ram.v | 21 ++-- hw/rtl/VX_gpr_stage.v | 98 +++++++++------ hw/rtl/VX_ibuffer.v | 41 +++--- hw/rtl/VX_icache_stage.v | 2 +- hw/rtl/VX_platform.vh | 10 +- hw/rtl/cache/VX_bank.v | 2 + hw/rtl/cache/VX_cache_miss_resrv.v | 1 + hw/rtl/cache/VX_tag_data_store.v | 31 +++-- hw/rtl/libs/VX_dp_ram.v | 117 ++++++++++++++++++ hw/rtl/libs/VX_generic_queue.v | 192 ++++++++++++----------------- hw/syn/quartus/project.tcl | 10 +- 22 files changed, 388 insertions(+), 300 deletions(-) delete mode 100644 hw/rtl/VX_gpr_fp_ctrl.v create mode 100644 hw/rtl/libs/VX_dp_ram.v diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 09ab5d79..17572e34 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -7,7 +7,7 @@ CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw LDFLAGS += -L$(OPAE_HOME)/lib -SCOPE=1 +#SCOPE=1 # stack execution protection LDFLAGS +=-z noexecstack @@ -32,8 +32,6 @@ ASE_LIBS += -luuid -lopae-c-ase VLSIM_LIBS += -lopae-c-vlsim -LIB_DIR=../lib - ASE_DIR = ase VLSIM_DIR = vlsim @@ -67,10 +65,10 @@ fpga: $(SRCS) asesim: $(SRCS) $(ASE_DIR) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) -vlsim: $(SRCS) opae-vlsim - $(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM) +vlsim: $(SRCS) vlsim-hw + $(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM) -opae-vlsim: +vlsim-hw: $(SET_SCOPE) $(MAKE) -C vlsim vortex.o: vortex.cpp diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index e3b52ec7..251850cc 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -15,8 +15,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE -DBG_FLAGS += $(DBG_PRINT_FLAGS) -DBG_FLAGS += -DDBG_CORE_REQ_INFO +#DBG_FLAGS += $(DBG_PRINT_FLAGS) +#DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 @@ -72,9 +72,13 @@ ifdef SCOPE SCOPE_VH = $(RTL_DIR)/scope-defs.vh endif +# use our OPAE shim VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE +# use DPI FPU +VL_FLAGS += -DFPU_FAST + RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip PROJECT = libopae-c-vlsim.so diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 703aca7c..bc57aa39 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -31,7 +31,7 @@ fpga_result res = _expr; \ if (res == FPGA_OK) \ break; \ - printf("OPAE Error: '%s' returned %d, %s!\n", \ + printf("[VXDRV] Error: '%s' returned %d, %s!\n", \ #_expr, (int)res, fpgaErrStr(res)); \ return -1; \ } while (false) @@ -118,7 +118,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) { *value = STARTUP_ADDR; break; default: - fprintf(stderr, "invalid caps id: %d\n", caps_id); + fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); return -1; } @@ -156,7 +156,7 @@ extern int vx_dev_open(vx_device_h* hdevice) { fpgaDestroyProperties(&filter); if (num_matches < 1) { - fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID); + fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID); return -1; } @@ -197,9 +197,10 @@ extern int vx_dev_open(vx_device_h* hdevice) { fpgaClose(accel_handle); return ret; } - - fprintf(stdout, "DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n", + #ifndef NDEBUG + fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n", device->implementation_id, device->num_cores, device->num_warps, device->num_threads); + #endif } #ifdef SCOPE @@ -236,18 +237,18 @@ extern int vx_dev_close(vx_device_h hdevice) { int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles); assert(ret == 0); float IPC = (float)(double(instrs) / double(cycles)); - fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC); + fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC); total_instrs += instrs; total_cycles = std::max(total_cycles, cycles); } float IPC = (float)(double(total_instrs) / double(total_cycles)); - fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC); + fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC); } else { uint64_t instrs, cycles; int ret = vx_get_perf(hdevice, 0, &instrs, &cycles); float IPC = (float)(double(instrs) / double(cycles)); assert(ret == 0); - fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC); + fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC); } #endif @@ -373,7 +374,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data)); if (0 == data || 0 == timeout) { if (data != 0) { - fprintf(stdout, "ready-wait timed out: status=%ld\n", data); + fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data); } break; } diff --git a/driver/tests/dogfood/dogfood.cpp b/driver/tests/dogfood/dogfood.cpp index 3cbd5fea..5d2e6016 100644 --- a/driver/tests/dogfood/dogfood.cpp +++ b/driver/tests/dogfood/dogfood.cpp @@ -90,16 +90,20 @@ vx_buffer_h dst_buf = nullptr; static void show_usage() { std::cout << "Vortex Driver Test." << std::endl; - std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl; + std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl; } static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:s:e:k:ch?")) != -1) { + while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) { switch (c) { case 'n': count = atoi(optarg); break; + case 't': + testid_s = atoi(optarg); + testid_e = atoi(optarg); + break; case 's': testid_s = atoi(optarg); break; diff --git a/hw/opae/README b/hw/opae/README index 5765123b..84e08e88 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -60,9 +60,9 @@ qsub-sim make ase # tests -./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1 +./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16 ./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 -./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4 +./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd # modify "vsim_run.tcl" to dump VCD trace @@ -97,7 +97,7 @@ kill -9 # fixing device resource busy issue when deleting /build_ase_1c/ lsof +D build_ase_1c -# quick off cache synthesis +# quick off synthesis make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 & make -C cache clean && make -C cache > cache/build.log 2>&1 & make -C core clean && make -C core > core/build.log 2>&1 & diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 5c63e9cd..897468c2 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -3,7 +3,7 @@ +define+SYNTHESIS +define+QUARTUS +define+FPU_FAST -+define+SCOPE +#+define+SCOPE #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index 1356ecb4..f515b639 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -6,4 +6,21 @@ set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 -set_global_assignment -name VERILOG_MACRO FPU_FAST \ No newline at end of file +set_global_assignment -name VERILOG_MACRO FPU_FAST + +set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" +set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON +set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON +set_global_assignment -name POWER_USE_TA_VALUE 65 +set_global_assignment -name SEED 1 +set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" \ No newline at end of file diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 49783361..e23c4caf 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -175,8 +175,9 @@ logic [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// `IGNORE_WARNINGS_BEGIN -t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); +t_ccip_c0_ReqMmioHdr mmio_hdr; `IGNORE_WARNINGS_END +assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); `STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) @@ -204,9 +205,20 @@ wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hd reg scope_start; `endif +// disable assertions until reset +`ifndef VERILATOR +initial begin + $assertoff; +end +`endif + always_ff @(posedge clk) begin if (reset) begin + `ifndef VERILATOR + $asserton; // enable assertions + `endif + mmio_tx.hdr <= 0; mmio_tx.data <= 0; mmio_tx.mmioRdValid <= 0; @@ -324,7 +336,8 @@ begin end `endif default: begin - `ifdef DBG_PRINT_OPAE + mmio_tx.data <= 64'h0; + `ifdef DBG_PRINT_OPAE $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); `endif end diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 26470373..9f4ff5f7 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -59,8 +59,6 @@ `define EXT_F_ENABLE `endif -//`define FPU_FAST - // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 17c3d7dd..77a73b9f 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -347,7 +347,7 @@ module VX_decode #( assign decode_if.rd = rd; assign decode_if.rs1 = rs1_qual; assign decode_if.rs2 = rs2; - assign decode_if.rs3 = rs3; + assign decode_if.rs3 = 0; `endif assign decode_if.use_rs3 = use_rs3; diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v deleted file mode 100644 index c40df875..00000000 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ /dev/null @@ -1,70 +0,0 @@ -`include "VX_define.vh" - -// control module to support multi-cycle read for fp register - -module VX_gpr_fp_ctrl ( - input wire clk, - input wire reset, - - input wire [`NUM_THREADS-1:0][31:0] rs1_data, - input wire [`NUM_THREADS-1:0][31:0] rs2_data, - VX_gpr_req_if gpr_req_if, - - // outputs - output wire [`NW_BITS+`NR_BITS-1:0] raddr1, - VX_gpr_rsp_if gpr_rsp_if -); - - reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data; - reg rsp_valid; - reg [31:0] rsp_pc; - reg [`NW_BITS-1:0] rsp_wid; - reg read_rs1; - - wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1; - wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready; - - always @(posedge clk) begin - if (reset) begin - rsp_valid <= 0; - rsp_pc <= 0; - rsp_rs1_data <= 0; - rsp_rs2_data <= 0; - rsp_rs3_data <= 0; - rsp_wid <= 0; - read_rs1 <= 1; - end else begin - if (rs3_delay) begin - read_rs1 <= 0; - rsp_wid <= gpr_req_if.wid; - end else if (read_fire) begin - read_rs1 <= 1; - end - - rsp_valid <= gpr_req_if.valid; - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; - - if (read_rs1) begin - rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; - end - rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; - rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; - - assert(read_rs1 || rsp_wid == gpr_req_if.wid); - end - end - - // outputs - wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3; - assign raddr1 = {gpr_req_if.wid, rs1}; - assign gpr_req_if.ready = ~rs3_delay; - - assign gpr_rsp_if.valid = rsp_valid; - assign gpr_rsp_if.wid = rsp_wid; - assign gpr_rsp_if.PC = rsp_pc; - assign gpr_rsp_if.rs1_data = rsp_rs1_data; - assign gpr_rsp_if.rs2_data = rsp_rs2_data; - assign gpr_rsp_if.rs3_data = rsp_rs3_data; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 352a17e0..f60f1964 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -12,21 +12,24 @@ module VX_gpr_ram ( ); `ifndef ASIC - reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; + reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; + reg [`NUM_THREADS-1:0][31:0] q1, q2; always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin if (we[i]) begin - ram[waddr][i][0] <= wdata[i][07:00]; - ram[waddr][i][1] <= wdata[i][15:08]; - ram[waddr][i][2] <= wdata[i][23:16]; - ram[waddr][i][3] <= wdata[i][31:24]; + mem[waddr][i][0] <= wdata[i][07:00]; + mem[waddr][i][1] <= wdata[i][15:08]; + mem[waddr][i][2] <= wdata[i][23:16]; + mem[waddr][i][3] <= wdata[i][31:24]; end end + q1 <= mem[rs1]; + q2 <= mem[rs2]; end - - assign rs1_data = ram[rs1]; - assign rs2_data = ram[rs2]; + + assign rs1_data = q1; + assign rs2_data = q2; `else @@ -134,4 +137,4 @@ module VX_gpr_ram ( `endif -endmodule +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 23d9db16..9e72c023 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -15,8 +15,15 @@ module VX_gpr_stage #( ); `UNUSED_VAR (reset) + reg rsp_valid; + reg [`NW_BITS-1:0] rsp_wid; + reg [31:0] rsp_pc; + reg rs1_is_zero, rs2_is_zero; + wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data; - wire [`NW_BITS+`NR_BITS-1:0] raddr1; + wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2; + + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; VX_gpr_ram gpr_ram ( .clk (clk), @@ -24,60 +31,77 @@ module VX_gpr_stage #( .waddr ({writeback_if.wid, writeback_if.rd}), .wdata (writeback_if.data), .rs1 (raddr1), - .rs2 ({gpr_req_if.wid, gpr_req_if.rs2}), + .rs2 (raddr2), .rs1_data (rs1_data), .rs2_data (rs2_data) - ); + ); -`ifdef EXT_F_ENABLE - VX_gpr_fp_ctrl VX_gpr_fp_ctrl ( - .clk (clk), - .reset (reset), - .rs1_data (rs1_data), - .rs2_data (rs2_data), - .raddr1 (raddr1), - .gpr_req_if (gpr_req_if), - .gpr_rsp_if (gpr_rsp_if) - ); -`else - reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data; - reg rsp_valid; - reg [`NW_BITS-1:0] rsp_wid; - reg [31:0] rsp_pc; - always @(posedge clk) begin if (reset) begin - rsp_valid <= 0; - rsp_wid <= 0; - rsp_pc <= 0; - rsp_rs1_data <= 0; - rsp_rs2_data <= 0; + rsp_valid <= 0; + rsp_wid <= 0; + rsp_pc <= 0; + rs1_is_zero <= 0; + rs2_is_zero <= 0; end else begin - rsp_valid <= gpr_req_if.valid; - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; - rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data; - rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data; + rsp_valid <= gpr_req_if.valid; + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; + rs1_is_zero <= (0 == gpr_req_if.rs1); + rs2_is_zero <= (0 == gpr_req_if.rs2); end + end + +`ifdef EXT_F_ENABLE + + reg [`NUM_THREADS-1:0][31:0] rs3_data; + reg read_rs3, save_rs3; + + wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3; + wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready; + + always @(posedge clk) begin + if (reset) begin + rs3_data <= 0; + read_rs3 <= 0; + end else begin + if (rs3_delay) begin + read_rs3 <= 1; + save_rs3 <= 1; + end else if (read_fire) begin + read_rs3 <= 0; + end + if (save_rs3) begin + rs3_data <= rs1_data; + save_rs3 <= 0; + end + assert(!read_rs3 || rsp_wid == gpr_req_if.wid); + end end + assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)}; + assign gpr_req_if.ready = ~rs3_delay; + assign gpr_rsp_if.rs3_data = rs3_data; + +`else + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign gpr_req_if.ready = 1; - - assign gpr_rsp_if.valid = rsp_valid; - assign gpr_rsp_if.wid = rsp_wid; - assign gpr_rsp_if.PC = rsp_pc; - assign gpr_rsp_if.rs1_data = rsp_rs1_data; - assign gpr_rsp_if.rs2_data = rsp_rs2_data; assign gpr_rsp_if.rs3_data = 0; `UNUSED_VAR (gpr_req_if.valid); `UNUSED_VAR (gpr_req_if.rs3); `UNUSED_VAR (gpr_req_if.use_rs3); `UNUSED_VAR (gpr_rsp_if.ready); + `endif + + assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data; + assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data; + assign gpr_rsp_if.valid = rsp_valid; + assign gpr_rsp_if.wid = rsp_wid; + assign gpr_rsp_if.PC = rsp_pc; assign writeback_if.ready = 1'b1; -endmodule +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 8c712eff..b3e6fd9d 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -20,13 +20,13 @@ module VX_ibuffer #( localparam ADDRW = $clog2(SIZE); localparam NWARPSW = $clog2(`NUM_WARPS+1); + reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; + wire [`NUM_WARPS-1:0] q_full; wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size; wire [DATAW-1:0] q_data_in; - wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev; - + wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev; reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out; - reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0]; wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready; wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready; @@ -36,7 +36,7 @@ module VX_ibuffer #( wire writing = enq_fire && (i == ibuf_enq_if.wid); wire reading = deq_fire && (i == ibuf_deq_if.wid); - wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading)); + wire is_slot0 = (0 == size_r[i]) || ((1 == size_r[i]) && reading); wire push = writing && !is_slot0; wire pop = reading && (size_r[i] != 1); @@ -48,36 +48,37 @@ module VX_ibuffer #( .clk (clk), .reset (reset), .push (push), - .data_in (q_data_in), .pop (pop), + .data_in (q_data_in), .data_out (q_data_prev[i]), `UNUSED_PIN (empty), `UNUSED_PIN (full), `UNUSED_PIN (size) ); - always @(posedge clk) begin - if (writing && is_slot0) begin - q_data_out[i] <= q_data_in; - end - if (pop) begin - q_data_out[i] <= q_data_prev[i]; - end - end - always @(posedge clk) begin if (reset) begin size_r[i] <= 0; - end else begin - if (writing && !reading) begin - size_r[i] <= size_r[i] + SIZEW'(1); + end else begin + if (writing) begin + if (is_slot0) begin + q_data_out[i] <= q_data_in; + end + if (!reading) begin + size_r[i] <= size_r[i] + SIZEW'(1); + end end - if (reading && !writing) begin - size_r[i] <= size_r[i] - SIZEW'(1); + if (reading) begin + if (size_r[i] != 1) begin + q_data_out[i] <= q_data_prev[i]; + end + if (!writing) begin + size_r[i] <= size_r[i] - SIZEW'(1); + end end end end - + assign q_full[i] = (size_r[i] == SIZE); assign q_size[i] = size_r[i]; end diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index e0bf94af..1b957271 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -29,7 +29,7 @@ module VX_icache_stage #( wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0]; always @(posedge clk) begin - if (icache_req_fire) begin + if (icache_req_fire) begin rsp_PC_buf[req_tag] <= ifetch_req_if.PC; rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; end diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index a377c461..d30120dd 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -41,9 +41,9 @@ `define STRINGIFY(x) `"x`" -`define STATIC_ASSERT(cond, msg) \ - generate \ - if (!(cond)) $error msg; \ +`define STATIC_ASSERT(cond, msg) \ + generate \ + if (!(cond)) $error msg; \ endgenerate `define ENABLE_TRACING /* verilator tracing_on */ @@ -51,8 +51,8 @@ /////////////////////////////////////////////////////////////////////////////// -`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *) -`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *) +`define USE_FAST_BRAM (* ramstyle="mlab" *) +`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 625c0e53..35f99563 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -447,6 +447,8 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; + end else begin + assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0; end `endif diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 74745ceb..9f201223 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -58,6 +58,7 @@ module VX_cache_miss_resrv #( ); reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; + reg [MRVQ_SIZE-1:0] valid_table; reg [MRVQ_SIZE-1:0] ready_table; reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr; diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index e0f356cc..a03c890b 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -30,7 +30,6 @@ module VX_tag_data_store #( input wire fill_sent ); - reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0]; reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0]; reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; reg [`BANK_LINE_COUNT-1:0] dirty; @@ -40,8 +39,7 @@ module VX_tag_data_store #( assign read_dirty = dirty [read_addr]; assign read_dirtyb = dirtyb [read_addr]; assign read_tag = tag [read_addr]; - assign read_data = data [read_addr]; - + wire do_write = (| write_enable); always @(posedge clk) begin @@ -69,15 +67,26 @@ module VX_tag_data_store #( if (invalidate) begin valid[write_addr] <= 0; end - - for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin - for (integer i = 0; i < WORD_SIZE; i++) begin - if (write_enable[j][i]) begin - data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8]; - end - end - end end end + wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren; + assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}}; + + VX_dp_ram #( + .DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8), + .SIZE(`BANK_LINE_COUNT), + .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), + .BUFFERED(0), + .RWCHECK(1) + ) dp_ram ( + .clk(clk), + .waddr(write_addr), + .raddr(read_addr), + .wren(ram_wren), + .rden(1'b1), + .din(write_data), + .dout(read_data) + ); + endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v new file mode 100644 index 00000000..b7d70789 --- /dev/null +++ b/hw/rtl/libs/VX_dp_ram.v @@ -0,0 +1,117 @@ +`include "VX_platform.vh" + +module VX_dp_ram #( + parameter DATAW = 1, + parameter SIZE = 1, + parameter BYTEENW = 1, + parameter BUFFERED = 1, + parameter RWCHECK = 1, + parameter ADDRW = $clog2(SIZE), + parameter SIZEW = $clog2(SIZE+1) +) ( + input wire clk, + input wire [ADDRW-1:0] waddr, + input wire [ADDRW-1:0] raddr, + input wire [BYTEENW-1:0] wren, + input wire rden, + input wire [DATAW-1:0] din, + output wire [DATAW-1:0] dout +); + + if (BUFFERED) begin + + reg [DATAW-1:0] mem [SIZE-1:0]; + reg [DATAW-1:0] dout_r; + + if (BYTEENW > 1) begin + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + if (wren[i]) + mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + end + if (rden) + dout_r <= mem[raddr]; + end + end else begin + always @(posedge clk) begin + if (wren) + mem[waddr] <= din; + if (rden) + dout_r <= mem[raddr]; + end + end + + assign dout = dout_r; + + end else begin + + `UNUSED_VAR(rden) + + if (RWCHECK) begin + + reg [DATAW-1:0] mem [SIZE-1:0]; + + if (BYTEENW > 1) begin + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + if (wren[i]) + mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + end + end + end else begin + always @(posedge clk) begin + if (wren) + mem[waddr] <= din; + end + end + + `ifdef SYNTHESIS + reg [DATAW-1:0] din_r; + wire writing; + + if (BYTEENW > 1) begin + assign writing = (| wren); + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8]; + end + end + end else begin + assign writing = wren; + always @(posedge clk) begin + din_r <= din; + end + end + + reg bypass_r; + always @(posedge clk) begin + bypass_r <= writing && (raddr == waddr); + end + + assign dout = bypass_r ? din_r : mem[raddr]; + `else + assign dout = mem[raddr]; + `endif + + end else begin + + reg [DATAW-1:0] mem [SIZE-1:0]; + + if (BYTEENW > 1) begin + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + if (wren[i]) + mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; + end + end + end else begin + always @(posedge clk) begin + if (wren) + mem[waddr] <= din; + end + end + assign dout = mem[raddr]; + end + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index a14f4ec9..3c5c9a78 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -18,16 +18,11 @@ module VX_generic_queue #( output wire [SIZEW-1:0] size ); `STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!")) + + if (SIZE == 1) begin - always @(*) begin - assert(!pop || !empty); - assert(!push || !full); - end - - if (SIZE == 1) begin // (SIZE == 1) - - reg [SIZEW-1:0] size_r; reg [DATAW-1:0] head_r; + reg size_r; always @(posedge clk) begin if (reset) begin @@ -35,8 +30,10 @@ module VX_generic_queue #( size_r <= 0; end else begin if (push && !pop) begin + assert(!full); size_r <= 1; end else if (pop && !push) begin + assert(!empty); size_r <= 0; end if (push) begin @@ -50,63 +47,14 @@ module VX_generic_queue #( assign full = (size_r != 0); assign size = size_r; - end else begin // (SIZE > 1) - - `ifdef QUARTUS - - scfifo scfifo_component ( - .clock (clk), - .data (data_in), - .rdreq (pop), - .wrreq (push), - .empty (empty), - .full (full), - .q (data_out), - .sclr (reset), - .usedw (), - .aclr (), - .almost_empty (), - .almost_full (), - .eccstatus () - ); - - defparam - scfifo_component.lpm_type = "scfifo", - scfifo_component.intended_device_family = "Arria 10", - scfifo_component.lpm_numwords = SIZE, - scfifo_component.lpm_width = DATAW, - scfifo_component.lpm_widthu = $clog2(SIZE), - scfifo_component.lpm_showahead = "ON", - scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"), - scfifo_component.use_eab = "ON"; - - reg [SIZEW-1:0] size_r; - - always @(posedge clk) begin - if (reset) begin - size_r <= 0; - end else begin - if (push && !pop) begin - size_r <= size_r + SIZEW'(1); - end - if (pop && !push) begin - size_r <= size_r - SIZEW'(1); - end - end - end - - assign size = size_r; - - `else - - `USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0]; + end else begin if (0 == BUFFERED) begin - reg [SIZEW-1:0] size_r; reg [ADDRW:0] rd_ptr_r; reg [ADDRW:0] wr_ptr_r; - + reg [ADDRW-1:0] used_r; + wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0]; wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0]; @@ -114,111 +62,127 @@ module VX_generic_queue #( if (reset) begin rd_ptr_r <= 0; wr_ptr_r <= 0; - size_r <= 0; + used_r <= 0; end else begin - if (push) begin + if (push) begin + assert(!full); wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1); if (!pop) begin - size_r <= size_r + SIZEW'(1); + used_r <= used_r + ADDRW'(1); end end if (pop) begin + assert(!empty); rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1); if (!push) begin - size_r <= size_r - SIZEW'(1); + used_r <= used_r - ADDRW'(1); end end end end - always @(posedge clk) begin - if (push) begin - data[wr_ptr_a] <= data_in; - end - end - - assign data_out = data[rd_ptr_a]; - assign empty = (wr_ptr_r == rd_ptr_r); - assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]); - assign size = size_r; + VX_dp_ram #( + .DATAW(DATAW), + .SIZE(SIZE), + .BUFFERED(0), + .RWCHECK(1) + ) dp_ram ( + .clk(clk), + .waddr(wr_ptr_a), + .raddr(rd_ptr_a), + .wren(push), + .rden(pop), + .din(data_in), + .dout(data_out) + ); + + assign empty = (wr_ptr_r == rd_ptr_r); + assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]); + assign size = {full, used_r}; end else begin - reg [SIZEW-1:0] size_r; - reg [DATAW-1:0] head_r; - reg [DATAW-1:0] curr_r; + wire [DATAW-1:0] dout; + + reg [DATAW-1:0] din_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_next_r; + reg [ADDRW-1:0] rd_ptr_n_r; + reg [ADDRW-1:0] used_r; reg empty_r; reg full_r; reg bypass_r; always @(posedge clk) begin - if (reset) begin - size_r <= 0; - curr_r <= 0; - wr_ptr_r <= 0; - rd_ptr_r <= 0; - rd_ptr_next_r <= 1; - empty_r <= 1; - full_r <= 0; + if (reset) begin + wr_ptr_r <= 0; + rd_ptr_r <= 0; + rd_ptr_n_r <= 1; + empty_r <= 1; + full_r <= 0; + used_r <= 0; end else begin if (push) begin wr_ptr_r <= wr_ptr_r + ADDRW'(1); if (!pop) begin empty_r <= 0; - if (size_r == SIZEW'(SIZE-1)) begin + if (used_r == ADDRW'(SIZE-1)) begin full_r <= 1; end - size_r <= size_r + SIZEW'(1); + used_r <= used_r + ADDRW'(1); end end if (pop) begin - rd_ptr_r <= rd_ptr_next_r; + rd_ptr_r <= rd_ptr_n_r; if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + ADDRW'(2); + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); end else begin // (SIZE == 2); - rd_ptr_next_r <= ~rd_ptr_next_r; + rd_ptr_n_r <= ~rd_ptr_n_r; end - if (!push) begin - if (size_r == SIZEW'(1)) begin - assert(rd_ptr_next_r == wr_ptr_r); + if (!push) begin + full_r <= 0; + if (used_r == ADDRW'(1)) begin + assert(rd_ptr_n_r == wr_ptr_r); empty_r <= 1; - end; - full_r <= 0; - size_r <= size_r - SIZEW'(1); + end; + used_r <= used_r - ADDRW'(1); end end - - bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop)); - curr_r <= data_in; end end always @(posedge clk) begin - if (reset) begin - head_r <= 0; - end else begin - if (push) begin - data[wr_ptr_r] <= data_in; - end - head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r]; - end - end + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + bypass_r <= 1; + din_r <= data_in; + end else if (pop) + bypass_r <= 0; + end - assign data_out = bypass_r ? curr_r : head_r; + VX_dp_ram #( + .DATAW(DATAW), + .SIZE(SIZE), + .BUFFERED(1), + .RWCHECK(0) + ) dp_ram ( + .clk(clk), + .waddr(wr_ptr_r), + .raddr(rd_ptr_n_r), + .wren(push), + .rden(pop), + .din(data_in), + .dout(dout) + ); + + assign data_out = bypass_r ? din_r : dout; assign empty = empty_r; assign full = full_r; - assign size = size_r; + assign size = {full_r, used_r}; end - - `endif - end endmodule diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 93102ec5..26d41900 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -43,17 +43,19 @@ set_global_assignment -name VERILOG_MACRO FPU_FAST set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" -set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -set_global_assignment -name FITTER_EFFORT "STANDARD FIT" set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON -set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON set_global_assignment -name POWER_USE_TA_VALUE 65 set_global_assignment -name SEED 1 +set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" set idx 0 foreach arg $q_args_orig { From 8290ad8828c368cf3b214fdc22f76096481644f8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Oct 2020 05:49:45 -0700 Subject: [PATCH 12/19] minor update --- driver/opae/vlsim/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 251850cc..07ded2d0 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,11 +20,11 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 #DEBUG=1 -SCOPE=1 +#SCOPE=1 CFLAGS += -fPIC From e6466b887c1c70695ba5f87cae477e0cbb36e8fb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Oct 2020 08:45:21 -0700 Subject: [PATCH 13/19] minor update --- hw/opae/vortex_afu.qsf | 2 +- hw/rtl/cache/VX_tag_data_store.v | 2 +- hw/rtl/libs/VX_generic_queue.v | 4 ++-- hw/syn/quartus/project.tcl | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index f515b639..c24f3549 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -1,7 +1,7 @@ # Analysis & Synthesis Assignments set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index a03c890b..db99943c 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -78,7 +78,7 @@ module VX_tag_data_store #( .SIZE(`BANK_LINE_COUNT), .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BUFFERED(0), - .RWCHECK(1) + .RWCHECK(0) ) dp_ram ( .clk(clk), .waddr(write_addr), diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 3c5c9a78..68db0d4d 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -3,7 +3,7 @@ module VX_generic_queue #( parameter DATAW = 1, parameter SIZE = 2, - parameter BUFFERED = 0, + parameter BUFFERED = 1, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1) ) ( @@ -85,7 +85,7 @@ module VX_generic_queue #( .DATAW(DATAW), .SIZE(SIZE), .BUFFERED(0), - .RWCHECK(1) + .RWCHECK(0) ) dp_ram ( .clk(clk), .waddr(wr_ptr_a), diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 26d41900..0e85bf48 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -33,7 +33,7 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top) set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 -set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF +set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG From 81dc8c7279f7ddaf7367394676acfe143ba4069a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Oct 2020 16:47:01 -0400 Subject: [PATCH 14/19] minor update --- driver/opae/vlsim/Makefile | 2 +- driver/tests/dogfood/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 07ded2d0..ac38faca 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -77,7 +77,7 @@ VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE # use DPI FPU -VL_FLAGS += -DFPU_FAST +#VL_FLAGS += -DFPU_FAST RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index 46cb364a..69a5ec80 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -1,7 +1,7 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain VORTEX_RT_PATH ?= $(wildcard ../../../runtime) -OPTS ?= -n64 +OPTS ?= -n32 VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ From 43ae82e7888f209f4a2df39a846f135d592f0a80 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Oct 2020 16:40:50 -0700 Subject: [PATCH 15/19] vlsim fix, verilator fst trace, use ram optimization --- benchmarks/opencl/sgemm/sgemm | Bin 45952 -> 46048 bytes driver/opae/vlsim/Makefile | 10 +- driver/opae/vlsim/opae_sim.cpp | 176 +++++++++-------- driver/opae/vlsim/opae_sim.h | 19 +- driver/opae/vx_scope.h | 2 +- driver/tests/dogfood/Makefile | 2 +- hw/opae/README | 2 +- hw/opae/vortex_afu.sv | 298 ++++++++++++++++------------- hw/rtl/VX_define.vh | 5 - hw/rtl/VX_gpr_ram.v | 135 ++----------- hw/rtl/VX_icache_stage.v | 4 +- hw/rtl/VX_ipdom_stack.v | 4 +- hw/rtl/VX_platform.vh | 2 +- hw/rtl/cache/VX_bank.v | 6 +- hw/rtl/cache/VX_cache_miss_resrv.v | 29 ++- hw/rtl/cache/VX_tag_data_access.v | 8 +- hw/rtl/cache/VX_tag_data_store.v | 2 +- hw/rtl/libs/VX_dp_ram.v | 48 ++++- hw/rtl/libs/VX_generic_queue.v | 2 +- hw/rtl/libs/VX_scope.v | 5 +- hw/simulate/Makefile | 7 +- hw/simulate/simulator.cpp | 73 ++++--- hw/simulate/simulator.h | 7 +- 23 files changed, 424 insertions(+), 422 deletions(-) diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm index d8d18df94950d1f0aaacb588a1fa20d43ee40d1c..644db9d7ddee9a375612d65afc3e165e1110bb99 100755 GIT binary patch literal 46048 zcmeHwd3==Bx&QmlkYo}d35jUjZ~#Hpgs^V`LNYLsu*fF3Fl3pKz+`D=!s0E6ji_mi z1#Q(#RbBF`;Go1wcOPy01i%2>lW< z;Q=ayYZR4|%P;?_9{KibzWtidgcG#>CN$-v82(mjd~#S094`}+me;bEW3`MQ9V5wJ z;a&x{OlbHn10VG~(<@gKF4y^`vdem{zX?q}6h#`Ur%o=4)Q*odG_`FVzjgZ5@lz)k zMw<&KO1UYYv#GINFB zM7%FKffRDs`}Re@ErtGU27dgKuUP>2lYvNIokEU<@nLC8xT&mCjR~R9hQ{WmP&8K6 z8ViN2P{qo{q1te3ctb-p7H(a+xGd7#6kb_X9SJL3pO~7+@^BE_%R!t<( z^s%;ZTX=c6s&;-`U0t{}4qH;bysBwKoV2VpTontKH8(a^HPv25F3>J$XljVoBciZc zZm14mD};YBpmd&P%}ufJ)&xL;C{0!#-rP__(F?<|Ws#~_U2|(=MS0Z8A}&VjetBy{ zEbJ%`E+xUu<6w0j%UYW^v{p67bz2;6Tw1*`Ttn^Vw>3m+;}k2xF;%M4)(wsl8hv>~ ztbTq&Q&sCWR@dg{RumA`#Y8k#TQhbn>u#zQx?;(S*wmW(*5;<>Nukvfizi06q1GBh zHIe3;O_Q3Mw*)qmr%{n3wxOmb6lJQI@EQdVZ-qIdt+2jT*NmoN)y2Y*h}F^-i(0XU z#<11W+RzlMvs%Mj8b}ijhc{Vu$RirI8ml76%Ys(n)>f--3vFiAp+;-1P^e~W6>F?2 z(r^u0Pl!3{hSpfsfNWaB0EcU7xYM!9*#CX^}3C7ToleGLLUMg*%HYk1!X5)7aC)ne7tp_v#@tKI5 z?z!!}226iE)(krRG}if^%MHu6h^TZRZvjoO4@qZFRvp4>s3M z7u>WB;%;)mId?VJZWr7=9^C1IyXS}dTyV}g&Gmo_&bhg{_PgL_o=w~V7u?KQ2|wt9 zb3SXXCtdLK3=rWF7o783a~*ZTIsZ4;i!M0V2IlH^!G{X#_tV6|eiRyXr6s{3JXWC}^h@>B78bYj|gOYxbX}WIDeo23aX}WCBK1qL-X}W69 zZb^TMX}W07PDy``X%?raMbfu2O_%IhFX>M*O;_w$Ch1QwoyGJ#NngV>U9x9_q+6M$ zEB5#$y^(3UU{9{3tC`Mb+LH7YOw$#6j=cxKh~-Sv1$$nU^rcMG^?Hs-dOp*1xt@cP zp2;*_t!KZaCo)YJ>)9viF-+67dUi{C7}H!9_Ux4Oc}&yAdRio%$2485XT7Aen5Ikh zER(c{X}VI+JW0P>h%{ZOXM&{v$~0Z4$1mwun5N700X?Ec~$NtIk zKg+bA=@%vabEfG+Jx3({DAU83J}Bu2nWiiC?3eU+n5GN$?346YnWpRX>_)oy^@@&X z)&)EM5NvIb9R#cjcBO4Y7@Tn; z*zt1gTr}x-$03O(J@D#NFV+?2Ty#CM!2I@0Z}@b0To#O@%} zeA>_n^^xvQ0Cvrwn4xp5I@*g7em`Tq*DY#?A?Uei;1q#aM20K@g>cwCu z^jdsEWe2K_4%JbiQgZKwoCD18XOBt2U4`yg{9JiQ;7EDL;-?P>PLifGaB^1QxtuQc zQHKL36zIgPz>%D;`;n;2dE?+)IfDYn4+oAZ+_71Kr?;=`3>?e(R^YjgN5O9&3Emm( zI%^m4qNRQ0t(;sRN(qvuNpdWSfgZ9wCE{DWcSrlX3BaE{}y+5Y<~$kNFs2Ov2Nl{bgnvyOh!N2apbeb zM@Ao+5qK%*^9L0#>h^PEQMYLCIf&oFIBfmztgX(pPq|R-8nr?yE-m6ht=A}@LRGm? zt2C7F%Au{Qm9|JP=_??iwc$H68N4*J)uzl z>Ee1yqrRq4kGoI@G-|g(ecOe)Povs^($x^W?zIVUjgB9`2A>FaeE&rRkFc3^zxd9n zQ&71FL9lBVy*SwMDEzISLb^(B#URqX0Jht8Fy}J|uRj>mo>|i00(uumrw66Ebv*G1 zeXn~SGVKT)Yu|p%ij6~q6Jb|E3WR+ef%rrB=T4x@D4-G$aH{*)$bEOPQ^b`D1v#shDIxN@f4W0j6k={2kesIN4id zmd+#Xr=wuf!-1n*BOKo4GfH%(eVvG|E<(`39IfbhQZ)Gqh6QO)5v9P9PGM3CkXRJ> z9dd?k{f+{2sKCRhvPVF2s{2})0}31ocBIuJpToQQ0V<{MOq--KCZx}d-y&~E;K@#5 zQW=w2GOkk@-;b7{D)pytBfByu<8+nrlgRk3OPDdI9y;R(0YHffNywO8G(G4VmGQw& zVNw~BSTY{3GA?v7u4KkPN4M|@NKSQk!2l@nL1g?1$){u%IJ#c|`0%ciXmnbMDT4fd zIe3CypOL1|iDOq6X)deicoPG46=^!233d%WHBvVU>i0Qj(f)WYjB}t z{!#MDEMuGp{szrPIy&}1W$1VSI{pNL(VzX0!LEaCIo%eh4zMIW|Cs@Ebf)bEs#Kap zVmplh?l{K=WY7`#Uiu z5cC31kssy#71iwsJkTkms;nfDOunHq`9h{C>%+$(dZv@fKanTO`T)xMs}XDn^N=+s zyC~Yv0D!Om9KnMGIpgz_kkJviw|)D)Qk;7wr>M$lvXj$t=5z$^p^9@Gaza0n7FAL| z?nq@f!g92V)P3#S_leYfBK2w|^|#1hHRO|TLFzS*)DkWALM64(k!o7&1tN8C`}Vye zb+1TWsHEQHNc}RUUhGKyGnlCB;fra3&(J1lXb<+I5xBCTx{raSBXDQ?_B%!Jog$d+ z9u8mR2wqIVPr+l95pGd}1DJ^%?mmPB?EVR6(y2h-RG=*`&|L}?0Z7kA+P9yaoAar5 z=+WH-@o(jfIS5^6fdB(`u;$s|p+9)k1YqD2^nh>WtmCx7>Mo5FpWPSnP!}--zB|0P zz%*NEIH|x~A_}`4-gVq*pi{Ph1)N(MbbW>hnhlDI#Rf@90=qkfMKx5?h~>t?4Pd#y zrJIV*|0^uF2fnNe0B z zn21h~NBxM3PaWPhN=Lh2I0lA}T?I%(<6}dqc-nt4=;|UpYKy}9Hke1(-?u4540e6{ z_UlFW>qYmSO7|I#?w5nI`+2zLBM^G3`%V;v?o`hVVfhDu?g;E`-@a3X>=YsQC?Ut; zWXjX?C}g)IWCewE52OwqyK<>)*Jq?WT`JY_0!9A?$^yS$@G1cGHm^X}D5wO0-T~-e zgYcfe<_y}kmAFn7bnH5cX8G5gTnW(>hCsIiv}yPDH<)qBZ~(f8vMxIZKI=!RI;VVB zg0$-;c)w=wHpS4CyJE<<{m1-oaqhy1nByf>EHZi-Ix2H`!P0#lT<8&yoa!!zA$Efx zmzvyxgkG1Jo}^Y8svfk#&cP#?q0eGw_ci{PM^Doy~Oc&x6^4o!p()8QBNu}VmTpZ?CyZ8wjZ-GPhADRy~%u_ zLsxahlc&iJ=z~CaJb`kLgW?C+GP_@SolBb&c6;Es-4l@VDjiXvY#5D;(O3Z%GQxKO z`M_p%e+PW84uGg30ClgVd51wo5IRdjtU%k2!J_!(B0w@1llI+u!FWPIomk@chLB_h z+-W*i)0kzo9R%%@T$y678tmeF^>Fw1scXk$zyM1O*3$R^a~CnA9}9M1oWQ`)h33@7 zS)w#?Y1^DMwm&}oO1&Cuf6ebK+hsy~xVu55Q*|YCtxBJOv}2HlvWT=50KZRp^~Qk8 zK}&l_`u0P?2Y)^J2mkqr-=1Fwf(hRsIF{@qJJt;9~zw&#%9I!tTn6j^3sk|rK>AC-Y#8L(eZpKc5FsWKUX$n z#xq<|Ir`<|Uk4BU)634RyX#eCw(IrS5W`>D@k(XK+vSjP>a70`wm)VEXS~|>3T)}S z^6Jv7O0Omq@;Tk8;&J|wE&ALp)KELlzhqTqiHt<9}7{g;f0UgB>J$J%fVS?eEB zI~2XXxD%UU@$bY-XxB>gqcsiQf+ zlvBu2)T&QTr7$Fha8fqfIq_3Rh@-T)aenBsIIuXhJWyI5TDqii?Kpo^^|n|zn#BHo z{Fg7USQ$8-09}k0bzNDV2BqpD`%ua@Kd zsA?UwL+|5q$cvxAkMkS5N#kC*d=)AJ!cDWT4^>IpU8iE-_mykxnM!VTFS&uO;hMHs z*dMD8>&70}KjF57q!O6cf!a}OZiqBjS4Bcwnp-#NW{y+vL;#BDE|R&WAwlwJD-fw_ ziLy7uIr~iih{y(iV>HVuZ{E@*Hm?mwWAK<*Lvxchd@sAlZ?Q-aJQJ*av9_uRqFL60(u&GJ z`B49O^ty@xQ;p75Z;M!E5f4)yAk>^VdQ1bRPAe)TlUsm`}_eJzTI|4F5x_k@j=%y zB-B^m^y=)9^z|>HOvuB^&RvjQaA{8eExw)B-1BCQnRM|miXcrneotV>u5w|E2D{;qz3>`^zT1+ypIkRHsQ-kz~IdtM}a zdTI8A((F<5v;Fh43qU_V+b8Xh_aq;L95$Peu8%D6kp(`oz(*GN$O0c(;3ErsWPy(? z@R0>RvcN|c_{ahuS>XSl1! z%4PD${YK3iB&0ZZlk`r_kK-~)8$EFBE9na~9$SKvcJ$W5{$B;=IrilI7VwUCT&s0h zr^C%Uyg`Sz>+s7u{DBUi(BTU@{F4sf)?vDAe&Km%9gfuDR2?qR;c6Y$>2R|SZ_wfG zI{dN@f1txBbohb}|D?mWb(qeLVO(eHaHI~W>TrP$SL?7&hnsbHgAQ-kp^}E@_e=c} zU07B&(?1GNw~zKuE}ULC#XoUE@stTuCrfPdwV~cc0WHj=bFAFPG@&du#2!ehY{RNo5Q-Em~qr>UfFAWb0f+4K{3Z!ux)Kdh9?02(73z1j9E{V>< zwt2tZ61^X}_xm)X>Ano#eaz5KzY#*y821cgYy$1>8-nQ3^tVeqmX-My44?k@bpTtL zGtn2NpS+tSncst@(*MbLZszW6#NT7QAoDsDHvJUi{;Xd@|8&b{VpP@|w3BpO5)-oC zWWpnf=~2LoV67^XX%yghlja#y+so5Y&#Iok2?WE`19|LA!-~0T>CmmYtP!t;#_F&LE@&Y1r#vS{y4~XXDy*&itNuLac9;lCMHN?U)EJj6iecP ztQsaJN@9Q3MkXfNJl8yswGma5K1FmpnDsjH!kWGtFR;NJkH_&`5dl%d* z=Vut>(o1c^bN+*+pReMD7$VclB%a~>I)tPh0ZFb6+owIi>~p^ZW2b#Zq8Gx3Xz{Lg80@U4lLLOYrGOCAj^l z1fOM-9_;&EHtavxch@`#KEGaqeb-5F&utRiceeyz{P?)2@+V`Ys7(4v=8hITFksCPB$~3Fba4!MukhnExW4%^BRk z>{SWM-?1_NgAqPhc0 zc1tkl^XNzi5198bOfYyr>0t@x|6YQ!lM=Jmqe(?)=9D4ry@?0MdI(77@_ z>}K@DcDc6+t%TaG2W*0(da&jy%xz`aqL~_TP{&`^T zAy&#ZR%!lIj342UlSqyoQ!A5r>v!L)iV0przx^1)WY41Yw*%^u#q z*0T?N9!2p^-y2BsQ^AV)ulEdv>PGBNTLjNn`aQl zGPThh0_X7VdL!)FEAMA&* zjS3yc?-wNb2Xin!_z--sAD}Pe_i7xgeeesYd1Ae&!%X}%ulB(P2x9I-fw_cODVy=Z zLSV={7nn;8Hqi(Fj&7L(s)6Ry%UXbVXq){PQ~?EW)!AvVi9UD~{R;)$1N1i?5;39i z!8vF}BzXepXOc*?4_>d;=mDC;tEe$N9hPx?a4(EVDFaaL=iuieF+R8us#0C!gPkB# zY2}08f@G$R58jA)EohqYi~Ar=IJ^%ZJey0*H1hkRXtTC`*D>%-xCKhucRveXAnGR+ z)ys8Nmt43TE={6yLuCuBLNqKymWz`Y-wh|>Y{RyT-$jd(D1;YaD77EG8UOd8ue%tw zc7&wLW!Q6|w2yg?z+O8LT}LLzm;M9Y4t=Tcc^w&k*)SBB|E!CT!_n-g>@Ptxau#lY zy`&5J3(CkY$T3pk-!()^riy3zLg@1_F?t6%>WNVVV)cxRg3ehE^I*n>;qCh%`cM3R zgI~cmlwQraNF_5aMto>E>Pq9-@7n@0$X^RhQpl%KGcSD{790$sY50}kS8#0#`PR=r zH%l@|dk^wQw$XCg8TNzVnb_3UXk};Hzk%%?Jto%T^)lIm6wojiuSSQIpl*=$5X(0Wo@r+u zx(%W|n`qrf*?eSnEPe9GO|YDu8S(u96n>#-^r=RU2{a>?ZyUO8)Y~99P>OonfYCAY z(k-aBUqH+o`27vPg5RQGeYJ~jjOucwG1rm5qvNpr$RNSCrWgt3H_UO{8nU=;4UIMO z5wuv!{`X|rh9p6@BgttKhAb}I&{)0zToiU04YQ`O%K=zF^U`g|`(^wxKsXS;f&uIp zr4ADPWgZvfywa%dkaN(Esr&oe#4w)DCWZ#{@fIkab?Prr<$PIF+V+Sn+rAa)!M-Qe zYVD|6ru|VZ(_T{RvzOKS?9Xb^*3D(wl+nVPvD zb(~YPj)j}r8g8_5Y8mf`9TrgJgsIDv+?S--^#d;yFvMo?db=?oNY@0^OT_UxwCErrROWqDtFPXh^BF=;NfI` z(4)(Ux*i?JpvNv}bSnD&L61{kPZe79pr29|>`QUEB^B<5GAecGqOYn-E{vHM-;0j(5%=!VWo3pzT0RDz^ zzXAF8>xTVj-NF2CBLf%t3ODNr8E&VVT=Fa6jBlZz%lWH^do!FL#?>1>8X)!Wmr&E1 z%{tA=_2whNO#+!QsAB4i6h6Z@8BlM(wBMk2Nn?D*bhe^(B8*!T$s8ql$UNtS zhx--7Sx!|bDV~&U&V5w$Vh?vRRDtFWWW(4j8%`wy7~g7hXJdE;*sWQ=$H6UP&QrQ$S9ggT&yxX|m;& zH(j>8@@B}ESKdt7^2(bndrx_DWXmgWo@{yLmC2S@Ub$>}&m-~dtIJiqf+uG%z|2;c61K;#q$}RG6c~{ zY*3!{XfOHG*dRPNz^wVD($_o$r^=r%ea)s!M9Ww*&o3w}z_#VtMcPV{`ygo-i`m>RXX*Y=6%ShWG5(biXqeO2d z?IwwyK(ox>B5Z$W^csQkpdj-^~Cd+`&4ldj*(2o}4icY20!fK13_<)hfqQMw z?ZBjQ(URfYi5?=u$IVf%C-)J+z5Q}uKz+}zBgc=qD>!R0u(^-1SL6<$?6Rq?wpl!v z&OOYr^lf7J57JnYS=p%V+=E07g3X8XA5cus=ueb7-WX3D{{hFUm@$wzpB!*h5XXPO z-9yW z?k@EcW|e27OrFaW@Xk^{VOIIMbgktIxUbYtm=(aYPL^lIZD?L<)9zve7RUVT)A94z${1|i&z&hT-CkRhvwG_JzSeB9$H z6F`Q<)3|d|=DQHx0h|s_vtS8kywlN~tOehJ!S*0#FZ?86n^3BScfqhb5n1p$>fo!W zK|8nw7R&;O>#2p$BXO975j2GVX5`}#FdXa)XJU#yoso?I4~Je0zXBQ45vjNjP3KXd zy1fhcK<#UZjG>?VG$O(0py00%;cR&!OEnEzF8nvNsY*mHy%$0}_3M!T5cGDu_{;8=fDsOrAaUm6uWD1FUrK zo})V!Z_&kE3N)`5Ah~h`%?dD63>$k&kn_NO!MqeJo~{j21ep_rwXi-4$pOp=QYi99o^cy=K=8i zM)Oo_o}rl4DhqTdo_j$w7k1~>tO9G5Lv*tu>ehm9X4l}|b0>IsPRnbx3ao1!p52P) z(;&*`k`tF9y4xYzqlk8cs5Xh{afj$uMRYTW9@Io@l|ZY-;+@r+2q)U1ayG!q$ose1sGxKUwfmQ1_JYQBk zCwEwjk0udyI7Ig=q7xw61EqOwRe|+QzmfC}MRXiQnE>(%sKENA-w=IM5gh~3?*Qbr zLj~3ezajdbB6?|uwe;KYQ(lj%Kuq!AIwU-MYB9*u$1eei9^{vCANo?Hr(Ue+;repf zdkMW9zssIMx5m>SxU99D8=1-XV%U54g6=fijVt7=;ZYv9h>+VLB>4spd+#49`9puGo9OPS(^rr)zIVXN-+j=HS`0 zX6>7_=$ksZ%v#Qk*W?>y?7aoUA@mG3R+aRpY?_Guj1vE}PVc|gpN&ejkJ#L(wXS#% z2K21?CAw)|@*pEZd+(JM7=b)xx1)=?LnCJk36XxZjujZu%Q%92_FT0BL%5EwSw`)9dr&5g??a39hBCIdaEkL5n$Rq z7%q7oR{@G@k3dn&fa17>%34v1IDr@_R?K(e!4-=USA{NLvBZhbTd_#Ty?Y+McIi*F z{1>$RC5@J6y5fo~0zboN-m}rnaB0ImoH~akTRp$({b2$q++wx}5>_4l21?MyuTmAdb0Uk|WN1G1Z6q@XI)bKy42SO`H?1O{$^IrAblJ9Dkx!0+Lm{^a z$n1R3&W5ArOm>*&o=ZTk1|UNP7=&KT%9~+(eJ0=h89HAq8fRLB@?3JT&xxKLSBx?u z5PMFcJ@;Hy?56CB{MqN$ib4fxP-rsn=N4g)kYzk?fCk^8T@)w)26c=h` z(wG?H*aT!3#ZADBFUVrner*WY1*dg|xP?!OlBV>)9U6{m8bq$%R2Ul{Axgw6ITIXUe`N}mp%~@|3(Y?JzzWuP^&O->uE8{%|$jWxqj02Z5lF6n1-+?eIS!KUD|otzxf5{D{H(aebh zZnRPRls7kSD~iI{JqzW-_6cgv7x$$34w>YLAt6`0T)EpFJm|&(07!}H9|SCzY&B&s z%;&w(3sn>gGYw0}0!~NKIxlq;L@tBuyo`ATx6cr9v%6O(sF7A(BBrk*I zSrX5kn57DX9<$J1m|v@f*5==##9l_R^e@sbCrLuCIDj)nA;C%#_;ssXWmwT0YUQL^ zUtVYz=Qm~d$_o3c#iIM#I5mY{QK(DiYE$G z(IUKMBwXE66l-pdM2j{wwH38g)oiNT01Q4EGHtxv{an=0R1;~d4Hw}9Bt`0e=nuk; z&wo%@5xz(knmSp3o{U)~2)BybqODF&)eW&|0>t_tIrh@Gciz1+D8eUH`pkqcrYdS} zYvP+)9R12F@dh0E{)z=-@~b9=n2+GXO{2Bpy25&x-HL5%2}cpdcnfetGot-0et*S8 zktJUW6~QNW6j=@EfTv6{hhnW&Gz5|`Jzf=6*RTPEs8ka2mTO|8Oa(O49`AeoG_aosv}XF{9cueZEKff>J%<0DSbEz&--b5d%bR^`-+I`$ z)-&>~hu2QPiH}gEKV-r^hr@csTyCAuV0GjeI2gYHaKV%S5zwZ63)zsQC6#l)}=d6wsUE;&mREXhMz z(uY0N<}30PGIO-2^t(MP@;#S{DhqFad4q5BkFOx6#DlK@vAvo#ruXM~#-V`nG8Cd} zI6Vjx+BeedUV;6DCXu1OjD~@m;4g=Kz!9YitA`Ur!c_C9oGyV22QDX@<{N zELjoi{Vk~gpr=>M$s`b%ggZ8oQLHgkk2lHTvr$#GA^j4#I=uSN!sn^@CbI+;&QN$` zWnlTD2);zNG&C_(jMvEVvs3tLnd?JXrECcxjTe@v1wv$2tBveSwJ?hc`SL##n1xc$~kkDiRIP zv65bvCiNMu+KkfU%U@MdHIzjnybHp-#SFt!Ypgkf&x)OKT{}&(thgewIe=PU8Pa0V zgj~ZK-tH(W$k)q4)%cDbK60kNSymslLj0PVRTT?;tgWGDQ%F9Sh6l*}Wn;${7yD)Q z;ve4_YikOZYzQ~u9cnd5R<+jDm#B}g`p0kZk6${$KOV*_!2k}X@pUcX*2actv|%&+ z;e(gRX~PNcQ?%4O^{m>4x;mK+>@YRRkFhm3wp6u->YCV^;Xh(;DXZik_XF(&xP32x zLN_x(oG%=~CI=yo{72eVkz-I!>aj&7YnmVhOqOK-gJmuRirH(a2Upm5)1HtK{R8w>uZ{u)F|S-uF_H;K6F@0)Vy~#J1BLbvqC+? z>GiuQ;mrKfs!#-hcxTCrxy1@LMthfQjt(76y;UpupEORpA?q?oQ%gT=Oj)R9W-v7HDS7_uHbD)PiGC)${lEBbvzc z>oaExPl#)l1cfI~=xeayRO9pxjnO)=Vq(ZO`+?a5fe8DJ%4)=>qF+Nh!$c{WN(jL{ zIkbGyk`=}Hu4!oHgit__>(V-6%kM*~ySeKlg$tC>d38BebY zJbkH6(^B(1dHH74nu3l6xz>hjA~f%)rnX3=C59IXQze;VA(deC(`N$9nuZsj{CzO~ zvdQsbwGkhPm9E~IB)xC;s>9(ttfe`Mjv5Qug#7=-tz1o-mxo)5tq}fCw8HAut18c4 zJ({#1H4_Y}{x51Z@C&7w&2CH3^L-1d=fmiG(Ad=6M@Hw~O;0%!r%H3w$ki4(LkAR| z1phPLZ7RogHS;+MjP9Q149X^RNj)Qk(NsVC!Dlq_)*#ab{Z-r3=kGYml$vI2%%BfV zelIT%Dgn6^E8O7908^x8WN_-)5hURJ7zzJ4eq-sNSVv6RDflMZRQ#t8o?`C=x zGebPnOwq4McCq>fd|MgAqMF0CHMQY2#}lnB*s;R20+Z@cE2h?*8=&7phE=T_YCeEl zIaTl2EQG~DsJRYbC)SJG6n!&B4bwL#tYef!Js0e&I{<;P(rKr(pi)y>3%kdkO9eCb8mT&NlIRbq?7TmnO`2 z^n7FqW+)Xai$g2Pf-#wtoQ_Syyk$k{2kAge?wB^j$7TAlTGn!DZI*+s)lcsQy^@1v zy?7_~a^RhHcpLd?+)6iT1KLhl53?`{yvI?ePZHC<6zxC6Xxt49* zBty1%fNP@$-!N}(G^>R(aU~enh*TZ2EK@6@OSz&s^N|mQb;dk6i=LA9o>2611o&2C zb1m{R3s$qP$3}u{DC2rfmd==Eey}>IU5z?OaO1wVXdO>PHCfVR=zLNawasmq ze28Z_z1a9OaYy%F+Ub0@D$>}D&Ajl(@M3-|v;@y!(&1{W66dVF3te37YRUm!f$?FG z?N)7$$~2)m8s-MqnRO`aO3=3Z*vl4gP!kWaj{I{L2s=E^UM$wBPB+T7YX$_h zhSkj-?u(fYr)i;RTTM+k8kG^|L)b?(RanW?(HTv=QAWKJZ{|2>u*%R`efYJ89AAXW zWH%O#dJ7wMV|A=~OHI;&Ew+V-2eJO4Mk>0~TkniLkHQ(1=ad zI+w+`6y`CHCD!FJL_+_4`ZW-;m(5^0w>ii;L;V%&W=G|*W3j6%oByWKk1er6n`2^J zv+!^ow5RBW>->BXWGo%ha_rO?X(@M({WIN53o2? zIxDw=KdA`kXz<2J!-o18+ko2Ob!{Rf4C?4Kdyiiq=!8-opK!;Md7QqAP*A5s#$2|s zv8t)|GTA$(BgxJiJqc@C_C!%N-s3RDOkNYlP+fJdk3!be)pJo}ouU%QcxN_T zeb^bOJH%292UfeCkZ{H+eUfenr>l5D3d0(xY z$BSE|%zJ7R;pxu%Y!l%bmU-`MBHSl$?s3By_ga~jc@M1{=e4pd^S;_dcz?^hw>J?! zz?y(Z-I6aa-Z#h>*dE*whaMx;I z^jCwP_Az6d@xTqZGu00{$^FA#&{NNSrGC-R=y{*Ol=P+l#uV^PsCU|bUlP;y6!d#iz`vLR{(X_(dtCk==vj}+X3Bo)Pe ze@p@YTMBrF=p(EN|ISVUAD04Nk^+7i;H+O$E>pi@!0D&S{xuH$NiOK0)b(hTYE2=3 zmzKX%(;N9+B0rw5k;9ir_hq-Q34EZn>|#Y3)(j7&pnpCE{1w1i-aC`@c?)p*mzfi9 z)bclqAL)cE_&31)uC$5X0XXIFE>&1l-hCoxpmk8|kM2UQ{VC-01s~MM%;C+rz_+&c zWuN}Ys4w{WDc~~!r=AB({i27_GoaxQB*7~HSCm5T{Nr~Z`hvFt-j_Z%0M2rmwTLN~ zRnr=a#_$c{!W!Jn(i+}?haSRM95wP3qbbaT;o9bqKF6=c?PAeTRohlByjvnTaIY<# zI%BeI7xX5=(zmH0RMpyAwGB6t##*=GbOyIhVQaOq5!>G(R|vNrjUO zi)Y|Tk@;;6ky`!j@D$%##tq_I8k%YovrGKEHg+?s!M!}p`z*qX!;MR;H{vVA04;!V zqV-lNEO#D$hy}KL+snDvm|GXv-@pwVylVkfP_wm)t*a`6yMhxHbKqz(rXVcboQRl$Y13k!9sl6n_&k3lF@jiWO~*3uS>CQHFHcr~G# zXq$AwP*N3LcW0M3#OmjxPrx{1FbUc_H*vY0?f>DOBaTf?_lx^X7Kh?vF6xvc?n<6t z#LtaFA*awBPMxAH3b!_e0#2Xd( z9xdF#z&mRcHO5pH&P*o~idH9^&`DM69vJ_{hYNi^&SMouw>9FZ2qB(hk+9yxcnH_p zVih(u$HIlB^DD;3sy65po+>Eh-8L|z81VDr8v}NNpYAs1XndS z)F4+JhVoybWT1q>v#dgPV3>+Q*>H8+2Jlri;hAESY-p-$Hu0+J>eldPlZxPRWD|#( zOjNTwlhXe|ggb&*;$Y88U1mSpg!&yz8mnlX1R{Ji5L0xA-|SnP&<~vZ!m>NzDn`o9 zAJdAMP%j@Go@D-VV6c>Q@;BdYGvOK*4i{;7VLC3Ck#i|hc)Z`?H~Zx#tk;BCE{UBC zKc0V=YdQX73&Y_z--$D!`7WHI?rpy-fZ#XSjQ!32y9w86Mx(!>GvNl{`As*I<{S2T znQ)$@@V*g*q%rzyu(cTx7SZsV{d_0XgeHH(Z_2+@^Uv3GX5Zh0d=m^W@?)3K$=~pQ z5-?T~_2XsE2TXXL0z3ary&1qRMBM!59KnPwreK=E95a}3cM|_Ty@)g6u%tzu;WPQ) zp2Tm?B}|C7-6UMe`tQ~JMon{mVM3oVjFw{TZ-m~P#Ba_yOi20eD>?t~0O|IBa~@(s z?k~Epa{@EPcvh!AGe%Fgc-Pc~T_qFbt1r}Y`)MeG?X_VSFSU}1p;>g1?f}mWRsb%0kS{tPeKst?K z0$(lyV8wMtidLar!mS92Z>B{6lEV>_PKC-s3T+bCSKx37s0uaB$pGRbTer>OBtj=g zRJe-_!sQ^N_;T`{%p<;Cl5dyfQ=voJuR@hS5<~whB|bT<0*)>f5-nYIuSOdweqbUe zdxiT1SW}_mTLwO|v%i-^5-ykeQu$?@^ot5rITQs0_0vm>f{lg2Kx=GM;ij3>3#XU5 zA|cll&Ns;?+J#G2u_h8DUBUP~qzOj&?=Si4lrL@E`~26BMIEOHb-eofceeb1aO472 zZabcJ>RMWP7QrUtzX<;o?q9wB)L(AB=D^OkvS+^CIpVihtr_u;U%r_QRLZLd0*Ca0 zb;7v5;D=MtZ%P5*4B36j8J9x-jVa{#Q@|fcA^)8e^sAwFR3G_$A%&cgDdhN5&}XK| z*Xa~;N>k9EN+I8sf<7k&d~gcA|4IShm_p7!Q_xRO0smbJ`>6u=RUgVy=&efux21sJ zkwX5i6!xr4L4Q8zo%m0_9tEH;``szzP;2jt-k-vL>L;A|Prk|ka3%whu1+E68o(Xe zIhvz9k?R(Q7ihbZ;LkzU5G_m7auStPfPS!6pgASI>d)JyoMicjSWb?%?}9`*8v4hj zZT{BE8qp_uz3W;+t=>q~7mj+pnzwpot+&x1_OA;>qW#_H)(~{JUKR7l{LB5m#)?=|lRq4XEvZ`WYh4#7tql8pQGaEqrN!6U zxQw_!yCBdSh%_T2uu`tK31H2OTNEg%voh2g^>0c5B#4q^RsIcu1`@r{AFT`eqD`T2 zOLbMmP!Sg+ZNDfSi24oV!6hV^c?_)7Q5O!a3;SB)w$=JumeybCZy@U`Vu4^|oMMGP zDsnYHyv{H}qAvl)Baw^#_AmTPzyUqJb8aDV2P~dT&GX zdT*0208}{88g0_T{*3`*iunEOwI&FU__Y>a5K1(d;}3_mrj6tutqG;q2r&(td{jEV zVBl(!N+!rk)#3|4Z`cp8ztI=<0b3u52qZwzvaqJQqH@g|?^IW*t7Mi)dZ)Nbjo1{| zG^)iG{L%OuAF&A{E{ajy!lSrK5zZVJw2I$?PLTdYC~%bCuTz`VG$;N8XTA!tR_!*a zh^YT6@x+lA2n?bb)i;1Ap4jj}2Fd@9q*wDe)n2#BPH2amuPJ!Bgxizaw@RP9&`Iie z&WGN?xLz^AM;TB}>oUQ|n&78Q@Np*i854ZG39kAZn%fqd;0NY8IhBq(++kctOmOJ} z#vM1oMHvc~6DGKtGZN;c32vTOzhZ)$=apS1IN7AGQzp3bHQ~;f;3%;8CH+7$GUHIq zZh{Xq!5tb@DmKC8(uYwqO>pzLR&IjlDj^6xCOFM; z)m3ML=PMw>H758_6MU@+evS#=Y=Re<;B6-OFcW-}2|nBe-)4fJYl81E!PVM}(7R0V zFPP}>G{Mg^!S|Zr=bPXUncz+ne7^}k(gZ(Xf{!-AkC@=*q4&56KE_0U!UUgSf_vIu zw|hEmuNFHs&-NoxOV0^U`;Y82I}lgTv@aDowVpB8<3J-EhPO=)VH z-6uF5rZhFl?jxMOlG4-^yZ3Xtp3>9=yZ3VX5=v9k>)yrb<&>r-*S(F?izrRaq`QsN z6_lnX*1eX~vnfqYtGkZVQz%VMs=J)i6DdtyS$8p~M^T!ZR=1PW!zoQos@uWoA(YOh zw8rU7N>dZ+KJ_sGV=R=Wrqlfjr{8xWO--iz1gGDoG&Pm(BbVz1*NGebZ_JIPbf`Ipu3IJM=4EJzk4mG4^o<{e0LqEAEz``_3m;` zKTK(=;@!oZzMs-mwY#01zKhaSrMn%RzLnDFQCj2lmnltExck&cl>aYL+DYkGIDIvx zsmgYr;B=VMG(>bC;q;Z1rmEY$pVRe7H|;{2uaY;aJAQtJr{m9__P?I0TUoQymI+^Z zb`JUqT>c_-)mDh6GQ#468D+cJcnS!X;QuSSQV3J;+qs_>x8dtPaB z<(z*#%fI|Nsk5lSu1i&Neda^m7o2{1Ex& z-A$mN7+Pbor(;W(rz7@?r(@MgPy3cGEt-ws6)l>M;G`DIJOJXJ?jJ#jr_EoDgH)ZKp*D)_{|MAweiSXs`Ny>oa_S(p*Jtq0 za>kP!KuPAn4_W3Jm9C+5ryE{B*0qcE9YqqxIy{}Li0TzjC+w;{CDeiHYJ=(|Qwh2I zAm;!neBn9HxGB>ewJ%k5xKC7d)V_GkeTrycOPTwnoSoDy9do}UK<|{fPvq?UCK63K zZyq_FGuVCRnEOM4`>@RY;+88q-5=&W=60N@=4VJ-LyZarzyB0;(4lBA# z5ZQG(=b2=^j$3V4$CkfB11E^b;qD^bhj1v&nsBt^#MeqrOgJ&i{nwm(jtE|q?UyE@ zY*F8H5Z^#?`1-#-pwh8zG@;f@R09{BZKVn2k*ETJsy3nKO4OG3S>SXNYNABd3)E;6 z>Kut$C{QOS#w~o2;;`^Cf%?7)^(~3IPoUm5q5dXOzZR&&Ce-&N>M4Qxwh6UYqV@{Z z*G#CL615#DSqz@*UU#BtbR7B+ZNk&>{Zj}UJ)LP|XnN}DaGxnTa+qpO*U$ge(}NuR z4uNOq_BAL-PsdR-zxgC{=iGLT8C^5r%I!yTZa;GUk*I8_2eKe$=XM&L4w4*C$MdwQ zrghDS4ISHQ5Nba}cP(1f$w`)432~6xh=AKk*R5yJl~9*_7_>cIzl0gPJe@3eK2xC6 zykde=J4nKf*7r0Sz5N1kv>&27E1?K0aS0_o82*HkLQ--!TI^wv^mMhdlAUvxQr@0L z;@I|0fd3i)$Pz`XJDwHVpX9vl%zt^jQ)m}X`w#-*#BH5Sjp~X#5+AEi7wWee>L2}Y zC>>#_f0LLy-P=4JwwoZ*RUl_0p226sU1jX-+_K0%$m*R(|ze*l06sJPDKGoQGiJb~2;Wwv~eIM`Af$e?+4?0Bd@> z%P~g;ssX6^TqP3g=~S~xtD&G{`*^UmAEGjJBz0Xi?;~zm4^7MNckd(@2S(-GatqR39Nwt@C!mdm4V>dG^m1F|`=< z4qD~uoGWNTqILcE+Xnr<6!bqb(L?O^zR7I&F&QQ;Y@FWhgJN^dp7!A=%Af5c3S&$>KU5#t?A00N8W-vUFZ!CLrYJWPRyO|1MuR5EPn1!z|pmc z!GoS-#nbi){o6TMwSzjHD>eG7?s%cP;{-Q!9s$Xm9wL}m1t0{(C!l2)BC0UIzxLoM71BHJ8P#1YxfH6+YIfG zo`TL1hW0lhVHaF>6ZxS6reFj-;Ur%lbDxyqE8G~5xx08yu>}G=9oyGJgQs&^Xe3Ih z)3%0!owcuccCNzezAuNV7Iy`)v%{*`WV*A-6k(I|gvmd%DTjUVCQJ^XDIW$&PuCna zxzqMnCv>6+Y9O&=JFS4)57C{aQG}(rgtTi7Y2P7f=Ni)f3GQ884*`fwHG>Ovj`Oxt zq&j|0I%Zqpk&f+cj28i*W6;t8-6WtK06G!Sw>@ZEZ)1jy28RER?Om8vy`AIW5bl1A zx)gTjd8B#g+$R9&I!eCj9JH7+);Vo52eyySr_?9a!y8EqyR`<^v>&27JAxwYh!`|L z(FRy2x|)6egd>Kaw>S)vo~~CRfoj5iWXViea%_76;HU~vFM1mb+1KGd?&-|Gm#vz% ziv#!Z`A<4QH~#?LIZ95u0oCc)_7PIZb%!V>x}YN<>tsyAlIgfy51~RWgy7EGi6h=A zb!vxc6C-E*ap6xaW7wZvgVBE5PwAKtUjf#EeXxQQec2_)&!QCQBLdy=Jlyaob;(p| zyRLZy(%Bb!JKL=DHV>R~oN{>&+}M7IT@^jIlj&H%?M9zo1_A#V2$^36sOt~p_PgO4 zL5qd#nMTjlfxH%o+PcmI+E}n+4IDcKMfi6hK+=IF#V)y=rL{2rwZ0%kvH~s9gS9@o zyV!p7dZVj^s^tM%MpJ3-6d8GtoZaypF#84O1TZHki#HKw!gHRElb}C|HjaNm zMo3W<&c zUo{_lRb%EA@K#hIg)8QFd{o`>Ud;sB#j$&y{m9yN?`!b#_BW!=l9$!~QBBADH68C& zb-X#hC-3*3_UClZtUttFLs8f-y?p*<^Dm!&#eDB`yW;y*r=LR$CwX#LmYxgI71kEu zF`B>884WpuAz!0&JwFn4X)+$6t981= z;ZS(CbKIE7IA_=&jo~qCqjOB-Nc7Xw|?$y)ePhn7^uK zACFWL2M>!eB`B;_2W7Xq)Z36R-n6HjEFP2o_BEDC2N6bs`?u=1Or z?DgbzymCRW+>RAR>5bq~C-N&(^?_C-aK}?ydbk;lAy3j7P?h-ncf7Q#mu%tQ_AR)OWZZyCG%_DW_6%9KVTj&jGXK&p6n#ELrvowg5DN?%Y@0! z@f$*cM(4x{V|@e&)n7S*Jd=VSRVF2WeLwi8oQ1!kAN*4d{*8ggXtOdO){C^^C-ZrN zc-o4sMAQ!QL+|4NkQYB;Fxjd6#+7^J@>M7Z2v^m*+3Vx9 zxlGx=pD0)PGnL%>UUJ=={0*_F-x+Q8%gP?NKjGL#QVvw@Kv4ofkp9KrF=<5lr(Hcgbk^W-cTdj=gdZS0g>bFQva|fo|cj@U4g7foL*gA^Sdd+?C?{*9 zeVcaS@Un?hFBnA?|UxmdLKfj%y?YO~GIcUHd7z8-sufcx> z@boZGQ}~)Veh@N>-gjdA@W zP$pKC{yW6dn_PC6-pvguan$0wp2E&GJz^^`wkM;t; z0{CkZ_=S-p06PNwW5Ck`NW;ceas96V|1j`<`PfN3DBCNM$LZMJG5D<|1BC=pwikeA z6J+q%T9NIz#Zr-7aI@8&?cAAGkv;w_mdSkOhyc&;`BJ~f8Ya`~_aW-6yV@J!bvkI&??h+~ zPnW{K`*BZ*(s#;5GM?1(wFXb|>B6IHzD`QKdQkABOydXzUj;HWpZzD0+$PfY&T}$_ zT8pnu+CP%&@C!F_?UjNR{J2!C>^>>!@Vt~SrT;vcR&+R2#QB3e8H9&SoZckm;W-ee zm0ftq%IP5zkF73Ft9(fD|F%H=&N^A(^c(?MAj8!%Y?9#y8QvhnJ7oBv41XZQ=Vkb7 z8U9U%@5yig&fC#7REA?^I9-MdWVl*}O)}gd!y9CHhYTN-;SXf^ybOOW!@tS!JsA$* zjUwom;aC|?m*D~#u9jhw429MGzpLLrEv&4Z?Hs?VKGqtIIcK=0xrz&?#yC~7wPdEN zxO9SmYEMJ9s1%|qeOdlG=3(i$(osS=Z#E*jT&9(MjY;X4O!MX)$sR5En?;~qDbq^c zjWW#}aiFo%4k<3xpwE+OrB7hv_k&}_5A;#*^%dAPO{)dx>#BQ*(`l9~k+7aDrMo@7 z0S(Zq+e^{ztd?}!`3Ta|>E{WSQryxmxRl8B0n>rEOhIRnHl_>^%SrS@X=67L(qh{R zEf)I8M%uUvLhgW+wDGqinwDn6&jK~et0?`n3Dtz$gbb%mdYm}zw)X+IQQQ(mN0&C4 zjIz7{&!^30=q2dB(&jO=9W6fXCWc;$oTcsL=y{aWT^#)hnRFXP2iVi?x#$33_gvU* zqqzMg^lT?>@7`8nX~m!e0=8YA&6@_C5kg&qJ++gnVCO^i8c!-$}>l$A(6%jPv)Otrp?BQ zy3CJZjcouY)?{9eF3@Jz{|wmL%pJs&sdpjKocS2pHBcvm+cN2lug$@UXy!1~PTLUu zkHBoo{66sv)hm$LmN^fuvz?<;`F$z#ZBkmG{}a;MGxtz#M(Ct_N9Gquz!?2CknPGW zA-@#rS;)_wnKLQj;>6y}3n@{=iH9;5P@MYX zbo+M!OShAZfmEiHgF#d|2>LJ*49$Z+hURM7WoXMex1(y=F4PI0^C3B+T*O@vVVlqK zbo+2h%ms<#C{(83Na`I0DC_hc9NmaIp5DRH+t3W`!^x(Bm0-9Q|ARHh68PJGZj>Z^ zg$T}LqiYFzK0$}z_pe&E_72)_?on8g^-*5dM~?uJpK+fH(D!|lj_en!XR z9NhFG2Rr}7!Oib+a7z~aoS(6K8V9%4ad1Zy2Vc96gS+-{aQA<3aL+$E*qf72eBT(! z!2^XHJa{1o-}G|u&{htZ=iM#tww=lE*yf=wk(7vGCf&{p0kvoi?*XG?g!B?@P88j z1?K>#4fC#S(}-%g*SmouR8n69-u`_w7zkbfcc&2iGAJ?^-2&na5KY2=3H}SdkV3Go z+1>{2x!(mo)0vq+uy`8fcwk962U8YvFm*Kt)BcDKGJoLo*EpE*E(bI5Q+ohs-^i)5 zTRE6>4+nD};^4wsCMlo6LB*vEtZd?->M9Q0*Kn{fi%C2~IjA1V!J^3=)O?RgYM!NT7l7*UNTtCwY=VSfQXBG>du)>i>J@5kt$bbX5T{3a@qxmGG)U31PwOVVdp z+o-wU3c8W_nYpHyTj|$mlpa-q-dV4*9z{hW>wX5<=&@*M+68wbsV}zv5cP*BM-v*? z7-$S3J=TLNXxEGM^^iR->~GWb68&)y*xmBl zUBMZcs0q}C`Yo7s>(v0SRH)HbGGjs}u`bgop$CAADty81Kx-2}f(CuLUW0xjgJ^aF zf2X1mGA4Zwrs}KppF#&2v>)gn5J?VcnDjN0aH-yee&P*8{{xtp;@F=M)~mmZ9*nTB z1M_wqI}%KKgFXjUnzU!46*=%P@-~UFSL*+XED-N_V5TbUXf}V+*QnfD^(iof1S|%6 znIeH{gpKK|oI*eVn5e=^0aM9>&H5iuAxOYYK+~~1wm@coA4%D!??+EZB#!}2r^3x7 z6@cqE>hDN1egX6!43gnMYLi}shxMEFTcxAl1sbnxl2Vvt@+)wI{)oO6{-OMiY{BaT z^vCpj02XQACQbjIo-4CcIF>{%)IUHsR0~mS@b7Y={!%p();USFYa!?e<~oR$^Hbzh zvyr^9*I@2q*PAUt)N&`+vGo>0*tS3ts$iq0-)*6|W-a2D5VFvsWJ9v#CX$(qW%pli`3pj5c^9Ao|a6JE>5k>lrWG;Bu6c{swSo z6h5&HelP?xC8~~tQHCS%&*Fs))dv3#Z<0Z!K+h(UR??u_pc6j23fSeqtchb~8@v)F zPuMnKHpH>A4IYGVN&C&f+(uZ=n`(nQfF<5<1M@?LO>BdU;bao=&tOnX=kZcCTXoIIP zl^~MMKwqCoqS~Mxrjog;4c356q(vJfA5vPiK@Z|T2hH#CA8&)?!qI)S!FF0o+KAsC zL7mn0yDvxHi%)}Gzjp_E20~2&sCUy9eUj@v^hrcC%v-rYyBie?k>%XTOa28<(`-Z6 zOa6e`!chp1p|0ykU%`D4wC^V2N=Pa#qaMM0Mn7e_0HfPK5dA4J8ExsCsIJtOT;GwA z(LWi5?9xrUz=MXS|5U#omJz3GJp9F7&Vga>xh|6s&Y8%bX3wRrFw@-xN;xrl z5@O|yiv*pM3r}Y*s>j$c3Pe@-UxfdH*J)Jb85dFUjEf?6R2*^HXzaKDoE((tejf1@ z@*OZebI~{9!7qVmAO0W4f5F=+tY&pkgk07;Z~2w(S0h z_8*gFE0Rx;t;nVtb}NWx??nFf%xCGKi=~#ldz7jVmlgNPV<)42>6t#vscCX?C)aH)s~=$^*A|A(7P#a*DR|*;aEVkXwcU_iRMeD z3Hl`zmGny~Zih*>cTr51gCKO&{RM0mCyXDQ{w^pjFT$!J=~$d#(*xv|>FCjO3+f2< zJD_rhwNUiSXa%{$zf94q(ZA-N`%Of#S#|}eoq6aq9b>Ffu&3Lv#(f55>}Etp8#iEX z17PN?$*3TXItx)eLKJiZ^1BFTkGdbN$Fa&n0TIwjUl5cCk&Xl08@ zaWhfUE`S8yOqA=%RkSA{fU~Ty(@|ojeH_{%h}$)K0tm&e88WmvROpUnRw^;tMc^rg zm{-iY7M0eq(L$SBw6h>Z(=Li*g_VkoHX25gX1-~&u0tQ=_^Oo*q|FCrU|rg*Vhp&B z`>mw?X+rUhawYKT_M?DW(`>a)6b!{@-9**rE*3_67s(uR--Q8=GAnIpjHY~wOmX%& zXY(E)LxUFD>JS;a@HwgxKjzJ;xko8}OsB1n(bcd@D=S0a?TC@fFD4)xM=_KZxuEP4 z!tbZ-(hf-+FNBx<5<|4(iBh0x=Oh_D>pk?HtPwh+7GZ>eo0VLG@4_S z5omsaTv127UuRxw%YurWq|M5uOkQNA221VR)M(4A z?C%ij_?ZuA>KJ;V2U1=8BDH(!Fyd&fY!u1edJQmlBx3UO#_@Jm-Xz}6%5(7!RNhqH z@yaWuYBa}JAJH6c&fk9$_wAJT{JiN;B9WgrgEzqPX7UDD-Ynh#%bU#`V0m+RCn|3q zZ-C{M^9ER6C2xS`Rq+N`o|`wo@)q(2Se}P>zVa6F&R5=I-ucR_;hnF%THg7}Tf#eE zd397w`FYD|=gU%xjOQ2B!z{~{8Hf(MmZE=0HmBk3`I%b&`Fn4^hQ93 zPToY(kzgBINIA1~qBw^ZQAM)+jA*BqQS=6)ol!wiC(+KVrs$W6c6KR6m!SR)tzx+c z(3*xWUa=CE8^y_lHEgf+ajlv@1D!E77jv=#Nkbhx(Z9If^zi z^ru4*4ZQ%xHrlSTT!wyZXxmF<$UfBcbN)_SVddSh)DlM77fhh-`E+{+?%7uXVCjbC z1rxu<(0d8m1%PEa7z!pm#?bE*bQwwb0vcn%!kfP z7*^nVi=qEb&~5-MU%|wqpeP-E23jlb*|!3aZl3^-`Oy|_=CJgK0L>T%ClAZu`jv4M ze8Vy+o|f@T+=pdR+@A3mYTdAb6d#z8fq`_`Ac{LOK7u2MWm7ya<9Y}imP7G^jIU7K zLGck8zd=12Hkjhh3?Is1Sl;7^kI!hsATlhUx|CvU!-HWUwe&-X@J6++IcVQ!_`Tl& z#Nh|=@Vz9&@dx;9_@F)}fkq90mf}EYOZ+HRsZdTn=19BTJUefKf z@0xC>O;W4H5d++s=C~gvUGXAucxZR9Ob5;ICDi%RP9X2Li7mIXcA$qZr?GN9Vdw^F zl!vlxP8%4N_72?B?X-boqYafx23Nff3d`b~QMod##pO4yxgQ^5NPSa*VfYZUNzsH2u;0=QNIhrwx<qIZIi%sJ9W=M)oYfO0<|el-=JAm3E(y*nZ^;z>cuQ%harSErPJ-S zPDtl9gpJnXm3G?csboMp$8EHKQfdD(9GGsW+4F)inD;(_%A_rbz+oRFrZ4mXb`mAE za2bl>4MY}v10FsH5#94Q)a8wc(W+`;72H`0;)TCJq?#hd5U`6P_d?MF6qyd>kck?JkTa1kP1)`hR4?Y>UPs1D(<$K!{K=|Ka{nHSDQuJ$3Za1QF!lZY&;r=sR5 z)yMf%%$Q3ei*)IGc8$c2DA_Ftth-;|viKe*>{Q*4-a&sd=B0Uz`Bh*4<~e)>xt6atpO$>+Wq^YwT3)50g&I z?(!OXP-l5Wr0QyBQ8oKERaxuq6I*Ls$fe~mkvzGzhBmV-`>E|(ckjQp=4Gm=KNNCz zY^`~OEIvSmZry$0+S(@Y(zQke+9HUqxsBvUg`vB)YPHWuBEKXG8ALHbbQ_3@;7q!< zh(P;>L9|H_b#1K~N1570jmEnB6nLJHJewuYZw#Jm5_ztnst&(gtJM}FMwjBrLMI_C z-jc|(MeytZ&x1)k7aBZU6M41@o?YOXgwBbsJ4K*H44&%}d9I_TZQXq*c>YWBtd~4@ z8ay3>=T2n08YMtiNCX;fWs3~llqmQn>K?4S_d+l|bD_&C0_`n>XP4mF1HmKFWznUG zhB}qt-GXQ@h_)pWl^I003Zi`=`lBSG84g`*ol4SI1kpnvx*Bzeu5XDzYjNBmoAEL67JX-K;IA zP0i#(Ao`yCQP}-Uz4f?)pF13-2UaXZgOKEtCHkJrNJy{e8iMy;!RxU+i5^A2kN59a zK7n4*5&>5SajK^ztH|EVVejhW=-Kfsg*R(2BIySKeG*~vDHnYYL1~xMkg|s4!q_#} zaJUu&3XRCiX_Gbi(2Txk%xJKt-bKCm4)n6vbR`Xz-JXjN!9x8lnJm|q(9w}wu0kw z)V(T#U*3cWv0gz>I!6Fii2s6-xTV|QLN>)ilPidKHh_y2c61eDUMA5Py9Vi216z++ zrM($<8%fdDl7Nj0jpoz@o85#$NiJ!E(J~R#d`>FJ88nfk>m3mwt5=UfRx5#`(aExV z&5HSm6VS4|ZbgL=_pGQzTx7ax#S$Z4zGAVATX#Qw?b5GH`M;L(m$Yb>nSyKWQrd5! z>!1jhj$AFii$>;Av}hz^Y#MlymBdyY%LaG1z;Yj$<+ zoMDY@P=O5wl>$Gk2>Xzf$Kk~s8G)YiT&c%~1`qx0Gs?vmq%k&1oM-q1War0SK#E6X zl4_@P1hR-HcPC_Op_oIJDLo`7E>|~M>ROhHp=qYxNhPZ2SqHIILE^j znV6Y11umap7By#Br=BHTFwqPr&ls3V$r!;fnX^FNFtSi+rkpdZL3-B(r^4A-MkL~e zoMHo*C0s#yD1rOILhhdu&q3UoQw9D!_!g~!C2+k^&YYI3iwfGBP2#3Y8K_NECuii# ziZ~PPLz`t7fTEc*jOb_Q172pp!6x8y3{3{&BYvM{NS(}AkWJo&YXGvT;VU`LEdQ6wTFtZH>f`p61u-*%LgMJvWIr0XAK55k?lx~3yt6pMt50<8_fSfjrPpCBokp)m*_ z_b6J|&`^YLYk8-a$`5U=YmF7nP}G7>vG$^?58(SF3DWx{DT;1x^CuF10v8)uB*cf> zqDAz!rJ^v-jN|nq%C@4)8obtpzo4Qam-KlO7bXh0aI&+}-{fkBKQva2ClX=c)NZmeHv!v+*aL7E(5*-lRYhM?$zq-MG@#&}S7hA^WJ$-Sp{a!&a z;3*aEM#L&u2DB5SWzAp;FpEYULlS~ptQouJ+4Jwc*S^@A}xV%91Wqnt=2bf`rCiA zR12tD3!5vwTAN$HuImT8EQ@n7>t*3;iOT`O{7_8)Mw^ zTdRXBqSH89T@x&EOuY6Bh6W)%e;d52=vabXa`YF7n#=U%T z5FaC3>Yd^(!JF3TYgG7fnd!q=XO{%;wNyjKim8$-?qBi|x!FAx=aSqSb(Q!kRwO#T zRJd!g@$IdDv%gBFv-=|$G5uaYMy=+g;{7-4n~FoVq`|A|nYRJi^afhmf^*=}*|Wv? zID59j&%tNl);Zx1ynl`GTqWn=ZE#c{=Q-!~1t!d7yaZ7(Vj#i$@|t|Xh<~1z^gb~z z&xmgWa*q#!`66OC3kO4sLLx)K_|?7*rutLN(^$ z1&C|{f7Hxdk5Ad*+h_7qX3Y`JOJ7&hd{OUJu>ii7#-COD3|1KRF5xwSns`;5));7N z;#t5}<#iMgUNzU|3wxVdsRp9Xvya$5)*r9Cvf|>Dog0RR0(8Tt-ejv2dAVc>YH-O6 z6r&&CsS84QE8bLy79ZqV->@M_m7<}UzSsuwDTwQ9>d{0RHqt(2^8kWq1cc_KV{LIQWkdK-ur?c=G#MT%-Vz+53 zYMgu1Cib4@rKw#j`j^jAH$x6g$0S#1_hwHJpMo*_j~j`N73=&!+~h-q3|TZo4Wo?+u#ev{BBJ#6tZ%f zGoz@eW?vm@6@9<)9!8oxKbzrfc=AZ8Q{8iBrcwZbo+&i)dU5hDSwz-Mb}`i&Sqsdp7fUX|N4MF~Dg>Rnmv zb<5G1YbEzDy>k>FX?ReQl}3CVlsrUzIF&T_8u3E!t3Hb^PSt0cvh*s;vpYc5GBHu( z7feR9Y3N&EK%>7QNS+?w8Vd&7qIg*@S;uoEq!Rppb|rAKs)O;_+o$8NERBzgE%?YQ z_xr{y=M&Y7K8Ge!ZJ`KyZLDS!^#7YHyqNVa_qUa3Ufic>e(`!yp|h{1uG+a*bb%2q zKwlokbaZoqou8OdIiW>2gnB0?FFaKDu7#pq(OhSq7{+43ezryCNob0yBTdzYO*MXA z0;9Rp=||ppGAO5XsJWucHT7F(7h$gJ#P5b4Y!kC)xD5vIEK7dSx1XMBT}KnIrl4 zJA1bId(3iG^r;q8(pqCl_1T!IZl|2U(`;Y4SjzuEEu$jlBRAmFvnRj|I>6UN;~Ei@qwFKw^+E+;#n;oO>3pzOLid# z3wMp%sc4^lReM=i;!Vq26$@kd#eIs#T{ca1LpD#iN2X$@@ro9+^y*$*SmKObKR`9M8Df($%)mBHA((Ohm#lJDPMTCwBHoSTENqF!TK#j^`LU)AG$823I0-iAxbWaH>8KcAjDVb*TL0t>8$n|u(@FxMtOPe zjkT`FO9NWHa`y!jUs2`yTj-6eQRs<0XrKS%jY=$@`f9^iaGJ?Sq6|;~`$WiYN)LF*zlo=BxVxg1JarCwAYqWjT3-Q>8nmZSMV=M|tZr-u}+! zLNt71VO26!Nf*oDPb_;BxjW@r8FFVCGvewskucTQsqjt2*x$t}+UUZm?y4b3tgvNu zhdVLN;@Mjy*3jUOM0ia53~mus70wa!5F_7bpWz~pa(7=%u==s^k*YrX@`oRZcq@5_ z7nOP=RqB@dXlP>t)$z@g?A_&>cTY5L9lFyHHjrrpv4ln>9zmkoRk08THyRF&#YDZY zVSTJk42a%HGe$oiq2kMoP%P?ATqwwyYk#{+C5NZZvwniF4z7(bPGJ=LS@o>sqkk+hlStt%Hf@lUL; zV$0gpvirlaK`Uww;Xp!t&=-jKi?A75v;h^tzsVcKNLy6zZ*6F%{fbBt+5wJV6p3#D zsYWqr5_YHg5rhGmt9ds>xggX!$#f_I1w{)pIV0=;^uzFRTZ9&zl*z;%*4SM6H2F6_ zi4mKC{TZi3Kk_M7t-@x}Fwr7Kgl36o&%waD<|tJMu^&zCGu}94GfNon(VzAn+CJ3- zQSs!5wra^&XPIa#r>m8$-1T0@o8HDUo|+ey-PB1%#8d#2Gc}Qp7m^T$yLpf`k3+q4 zH5Jn|+Jx~(@hAXK3*;+C;b=Z+|pHMBIv3WU2S)nz2?bQ&aETO@!lcCjE3F z>9XRj#Lhn9gS0mMj4k=H;ys7k`h@3b>OID09xGmIvcHc4vf{mp2PT>+Mck?l(bi7t z3!bZ~_ad9IR(wTDy$?ANp0D-wK3=Of)Ode!B0Y|!mpjcMj$854KWCqCoTTmJvKlXS zO+;EX9K7%2vWhQj^}<*+`P-=^j8!`?2_lkKP5$`Fgv9|r9M5~J#{}Xi{S4BAa@?O( zjyM#|@KZ_fkrGb7$TwdzkkH|u@zOt}kFEvy*ZU|J`jrLg-IgS0Gt>7j-#Y>C%l^Gm zj_Q#9g*08y;Gg6t{~YBd2~Yk-PPc^j`WX-4ed&DnLf8yeJ+vkn6Uu`QpA7FUA-%FE*L#J_Pn8|9<(m zEGN6ykN@c941+D_X!ayKw-Ow7&Pw*S_fAkn+`lulzg~QnZoI?J6QobjtK75Pi z7xePy_fyC@#_(*d_yVEPFBShL1^rto;PlE0%J14FdvXA$b{88Z1n-pcZ{YSN6Q zN0OaB32#e+`vG@Ko6P~exwbF(&4Bl1=ly_FzSIImu>(|wQ136*;I{bRW zj}=o3J!fgfuTHR25c0~i2#t8$5%KzBoAAU7M`UqQpwTsB`mC9K6JcfC8u0qUVc%x# zgGIxe@%#uU!LS$G(t`bRuPKGChd?xeRQ(*qdr3_R-k+9uZn|!9LtR~S9i5x5nJ&&l z)HT&DZo<=pKmbQ4!866YQ{U9Yz!~z%8y)yvupD2C_PUo;;Zcm&iw8trw?_g!Rm<@N z;bMGBy1Ej$1k-49(*%&a4GU|QR?M&QE?ux-g&U7L=U3nen${Hc2ZQD13u~$?D!o%& zQ(Slq*Z)l?>;W6kyxPK8i8u8UWo!|q+P#n*Yu0$Dx=LLov+!G%idZ1nD8KQIr2O`B z!Z(p|N_}IXwJ}j$;%Bh2@mUY<<$gLK;;;3$EUmv1&%yv&0Ov%SHLsr!P=1D0w|QI4 zIPgq68`%E9$rCzmfg)(wbC!hxv2fdDRzO-{LD2`?*1oiLK+(4U*<*+ zx*>pHF5nP7ZPF#yOgwf8%ZKFa#F>zaQxoyU!TZ^2M$p@KEVO?_4rvvz|_f< zqHV0Kt(5&*L$DG(XVkBJ(uD1})+gVq zs4nP>Hig11MAqU9;!#qfB=L+P5k@=FQ6J?9`?fI0m(=s96B1r;J)Xk}vbI<>k}L&3 zG;HuTL}J`M!yJ)y+0|VXh&ETCzrYxzFbUQhC)3Og@&Dt&7sIEjYsN8|T5o(jMw!xR zypkSm#2-I-@tj0vnub~7e;;SWD2K0T=bjO#zHiI7p0ng7z$mDn_D4IggDmBm<|4L zB!IJB<^;a+;0og)FEPkiTQI7*Xu6@f5OuA?Q&bn8c;U|F?Fxr@n05J^og;81aB@U;XQU=y02=0d~LT-3K=mIj!=KDG{&zSeb^{>fyZwJD_HzWVyGe}hT| z131hhso*6QmF(sz>t~A74x)~KHE&n@*DA!zLlQ2cHD9BVz?9zLUnvEt&}k+{9Lw~C zs{|=Ce^m0TP%bMBo@D+iVDJg|kh1?y8Bw8nQx2A=d|{fNa4kU!t760clagPB+oVD) zyVy^PAHTupYc=lJ@-X<-KD-Lmckl$YkfGufUV(r5#+inYVZb^NGxMwG4l1lu8Iv4%;SygeybUQczj{8Q zLX}_1X4tRb_ayPFeH|5!!rLXxm(r_X-$2ByU+s6P5O35;xRUe#Ey=I4sh*3d&~7+Z zh*5Scq2Ep7SIe&n25ZKfbtrace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif this->reset(); @@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { *ioaddr = host_buffers_[wsid].ioaddr; } +void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + std::lock_guard guard(mutex_); + + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + this->step(); + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); + *value = vortex_afu_->af2cp_sTxPort_c2_data; +} + void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { std::lock_guard guard(mutex_); @@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8); this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid); -} - -void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { - std::lock_guard guard(mutex_); - - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; - this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid); - assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); - *value = vortex_afu_->af2cp_sTxPort_c2_data; + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; } void opae_sim::flush() { @@ -117,24 +117,41 @@ void opae_sim::flush() { /////////////////////////////////////////////////////////////////////////////// void opae_sim::reset() { - vortex_afu_->reset = 1; - this->step(); - vortex_afu_->reset = 0; + + host_buffers_.clear(); + dram_reads_.clear(); + cci_reads_.clear(); + cci_writes_.clear(); + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0; + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0; + vortex_afu_->avs_readdatavalid = 0; + vortex_afu_->avs_waitrequest = 0; + vortex_afu_->reset = 1; + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); + + vortex_afu_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void opae_sim::step() { - vortex_afu_->clk = 0; - this->eval(); - - vortex_afu_->clk = 1; - this->eval(); this->sRxPort_bus(); this->sTxPort_bus(); this->avs_bus(); + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); #ifndef NDEBUG fflush(stdout); @@ -149,100 +166,105 @@ void opae_sim::eval() { ++timestamp; } -void opae_sim::sRxPort_bus() { +void opae_sim::sRxPort_bus() { + // check mmio request + bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid + || vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid; + // schedule CCI read responses - int cci_rd_index = -1; - for (int i = 0; i < cci_reads_.size(); i++) { - if (cci_reads_[i].cycles_left > 0) { - cci_reads_[i].cycles_left -= 1; - } - if ((cci_rd_index == -1) - && (cci_reads_[i].cycles_left == 0)) { - cci_rd_index = i; + std::list::iterator cci_rd_it(cci_reads_.end()); + for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_rd_it == ie) && (it->cycles_left == 0)) { + cci_rd_it = it; } } // schedule CCI write responses - int cci_wr_index = -1; - for (int i = 0; i < cci_writes_.size(); i++) { - if (cci_writes_[i].cycles_left > 0) { - cci_writes_[i].cycles_left -= 1; + std::list::iterator cci_wr_it(cci_writes_.end()); + for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_wr_it == ie) && (it->cycles_left == 0)) { + cci_wr_it = it; } - if ((cci_wr_index == -1) - && (cci_writes_[i].cycles_left == 0)) { - cci_wr_index = i; - } - } - - // send CCI read response - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; - if (cci_rd_index != -1) { - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; - memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE); - vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata; - cci_reads_.erase(cci_reads_.begin() + cci_rd_index); } // send CCI write response vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; - if (cci_wr_index != -1) { + if (cci_wr_it != cci_writes_.end()) { vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1; - vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata; - cci_writes_.erase(cci_writes_.begin() + cci_wr_index); + vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata; + cci_writes_.erase(cci_wr_it); } - // mmio - vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + // send CCI read response (ensure mmio disabled) + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + if (!mmio_req_enabled + && (cci_rd_it != cci_reads_.end())) { + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE); + vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; + printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); + for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) { + printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]); + } + printf("\n"); + fflush(stdout); + cci_reads_.erase(cci_rd_it); + } } void opae_sim::sTxPort_bus() { - // check read queue size - vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE); - - // check write queue size - vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE); - // process read requests - if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c0_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull); cci_rd_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); + cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address; cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); - cci_reads_.push_back(cci_req); + printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); + fflush(stdout); + cci_reads_.emplace_back(cci_req); } // process write requests - if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c1_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull); cci_wr_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE); memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); - cci_writes_.push_back(cci_req); + cci_writes_.emplace_back(cci_req); } + + // check queues overflow + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1)); + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); } void opae_sim::avs_bus() { // schedule DRAM read responses - int dram_rd_index = -1; - for (int i = 0; i < dram_reads_.size(); i++) { - if (dram_reads_[i].cycles_left > 0) { - dram_reads_[i].cycles_left -= 1; + std::list::iterator dram_rd_it(dram_reads_.end()); + for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dram_rd_index == -1) - && (dram_reads_[i].cycles_left == 0)) { - dram_rd_index = i; + if ((it != ie) && (it->cycles_left == 0)) { + dram_rd_it = it; } } // send DRAM response vortex_afu_->avs_readdatavalid = 0; - if (dram_rd_index != -1) { + if (dram_rd_it != dram_reads_.end()) { vortex_afu_->avs_readdatavalid = 1; - memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE); - dram_reads_.erase(dram_reads_.begin() + dram_rd_index); + memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); + dram_reads_.erase(dram_rd_it); } // handle DRAM stalls @@ -275,7 +297,7 @@ void opae_sim::avs_bus() { dram_req.cycles_left = DRAM_LATENCY; unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); - dram_reads_.push_back(dram_req); + dram_reads_.emplace_back(dram_req); } } diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 9a4906eb..58b57757 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -5,7 +5,7 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include @@ -13,7 +13,7 @@ #include #include -#include +#include #include #define CACHE_BLOCK_SIZE 64 @@ -41,18 +41,19 @@ private: typedef struct { int cycles_left; std::array block; - unsigned tag; + uint32_t tag; } dram_rd_req_t; typedef struct { int cycles_left; std::array block; - unsigned mdata; + uint64_t addr; + uint32_t mdata; } cci_rd_req_t; typedef struct { int cycles_left; - unsigned mdata; + uint32_t mdata; } cci_wr_req_t; typedef struct { @@ -76,17 +77,17 @@ private: std::unordered_map host_buffers_; - std::vector dram_reads_; + std::list dram_reads_; - std::vector cci_reads_; + std::list cci_reads_; - std::vector cci_writes_; + std::list cci_writes_; std::mutex mutex_; RAM ram_; Vvortex_afu_shim *vortex_afu_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index edd26cb4..2bb09c4a 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,6 +1,6 @@ #pragma once -//#define HANG_TIMEOUT 60 +#define HANG_TIMEOUT 60 int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index 69a5ec80..46cb364a 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -1,7 +1,7 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain VORTEX_RT_PATH ?= $(wildcard ../../../runtime) -OPTS ?= -n32 +OPTS ?= -n64 VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ diff --git a/hw/opae/README b/hw/opae/README index 84e08e88..e05b1df2 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -76,7 +76,7 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt # compress VCD trace tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd -tar -zcvf trace.vcd.tar.gz trace.vcd +tar -zcvf trace.fst.tar.gz trace.fst run.log tar -zcvf run.log.tar.gz run.log tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index e23c4caf..c019e54c 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -74,103 +74,103 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; -logic [127:0] afu_id = `AFU_ACCEL_UUID; +localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); +localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW; -typedef enum logic[3:0] { - STATE_IDLE, - STATE_READ, - STATE_WRITE, - STATE_START, - STATE_RUN, - STATE_CLFLUSH, - STATE_CSR_READ, - STATE_CSR_WRITE -} state_t; - -typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag; -typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; - -state_t state; +localparam STATE_IDLE = 0; +localparam STATE_READ = 1; +localparam STATE_WRITE = 2; +localparam STATE_START = 3; +localparam STATE_RUN = 4; +localparam STATE_CLFLUSH = 5; +localparam STATE_CSR_READ = 6; +localparam STATE_CSR_WRITE = 7; +localparam STATE_MAX_VALUE = 8; +localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); `ifdef SCOPE `SCOPE_DECL_SIGNALS `endif +wire [127:0] afu_id = `AFU_ACCEL_UUID; + +reg [STATE_WIDTH-1:0] state; + // Vortex ports /////////////////////////////////////////////////////////////// -logic vx_dram_req_valid; -logic vx_dram_req_rw; -logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; -logic vx_dram_req_ready; +wire vx_dram_req_valid; +wire vx_dram_req_rw; +wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; +wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; +wire vx_dram_req_ready; -logic vx_dram_rsp_valid; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; -logic vx_dram_rsp_ready; +wire vx_dram_rsp_valid; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; +wire vx_dram_rsp_ready; -logic vx_snp_req_valid; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; -logic vx_snp_req_invalidate = 0; -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; -logic vx_snp_req_ready; +reg vx_snp_req_valid; +reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +wire vx_snp_req_invalidate = 0; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; +wire vx_snp_req_ready; -logic vx_snp_rsp_valid; +reg vx_snp_rsp_valid; `DEBUG_BEGIN -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; `DEBUG_END -logic vx_snp_rsp_ready; +reg vx_snp_rsp_ready; -logic vx_csr_io_req_valid; -logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; -logic [11:0] vx_csr_io_req_addr; -logic vx_csr_io_req_rw; -logic [31:0] vx_csr_io_req_data; -logic vx_csr_io_req_ready; +wire vx_csr_io_req_valid; +wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; +wire [11:0] vx_csr_io_req_addr; +wire vx_csr_io_req_rw; +wire [31:0] vx_csr_io_req_data; +wire vx_csr_io_req_ready; -logic vx_csr_io_rsp_valid; -logic [31:0] vx_csr_io_rsp_data; -logic vx_csr_io_rsp_ready; +wire vx_csr_io_rsp_valid; +wire [31:0] vx_csr_io_rsp_data; +wire vx_csr_io_rsp_ready; -logic vx_reset; -logic vx_busy; +reg vx_reset; +wire vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// -logic avs_rtq_push; -logic avs_rtq_pop; +wire avs_rtq_push; +wire avs_rtq_pop; `DEBUG_BEGIN -logic avs_rtq_empty; -logic avs_rtq_full; +wire avs_rtq_empty; +wire avs_rtq_full; `DEBUG_BEGIN -logic avs_rdq_push; -logic avs_rdq_pop; +wire avs_rdq_push; +wire avs_rdq_pop; t_local_mem_data avs_rdq_dout; -logic avs_rdq_empty; +wire avs_rdq_empty; `DEBUG_BEGIN -logic avs_rdq_full; +wire avs_rdq_full; `DEBUG_END // CMD variables ////////////////////////////////////////////////////////////// t_ccip_clAddr cmd_io_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size; +reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; +reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; `ifdef SCOPE -logic [63:0] cmd_scope_rdata; -logic [63:0] cmd_scope_wdata; -logic cmd_scope_read; -logic cmd_scope_write; +wire [63:0] cmd_scope_rdata; +wire [63:0] cmd_scope_wdata; +wire cmd_scope_read; +wire cmd_scope_write; `endif -logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -logic [11:0] cmd_csr_addr; -logic [31:0] cmd_csr_rdata; -logic [31:0] cmd_csr_wdata; +reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; +reg [11:0] cmd_csr_addr; +reg [31:0] cmd_csr_rdata; +reg [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// @@ -193,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm `DEBUG_BEGIN wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; +wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid; +wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid; +wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull; +wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull; wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; @@ -212,8 +216,7 @@ initial begin end `endif -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin `ifndef VERILATOR $asserton; // enable assertions @@ -316,7 +319,7 @@ begin MMIO_STATUS: begin mmio_tx.data <= 64'(state); `ifdef DBG_PRINT_OPAE - if (state != state_t'(mmio_tx.data)) begin + if (state != STATE_WIDTH'(mmio_tx.data)) begin $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); end `endif @@ -349,14 +352,13 @@ end // COMMAND FSM //////////////////////////////////////////////////////////////// -logic cmd_read_done; -logic cmd_write_done; -logic cmd_clflush_done; -logic cmd_csr_done; -logic cmd_run_done; +wire cmd_read_done; +wire cmd_write_done; +wire cmd_clflush_done; +wire cmd_csr_done; +wire cmd_run_done; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin state <= STATE_IDLE; vx_reset <= 0; @@ -479,27 +481,28 @@ end // AVS Controller ///////////////////////////////////////////////////////////// -logic vortex_enabled; -logic cci_rdq_empty; -t_cci_rdq_data cci_rdq_dout; +wire vortex_enabled; +wire cci_rdq_empty; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; -logic cci_dram_rd_req_fire; -logic cci_dram_wr_req_fire; -logic vx_dram_rd_req_fire; +wire cci_dram_rd_req_fire; +wire cci_dram_wr_req_fire; +wire vx_dram_rd_req_fire; `DEBUG_BEGIN -logic vx_dram_wr_req_fire; +wire vx_dram_wr_req_fire; `DEBUG_END -logic vx_dram_rd_rsp_fire; +wire vx_dram_rd_rsp_fire; t_local_mem_byte_mask vx_dram_req_byteen_; -logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next; -logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; +reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads; +wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next; +wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; -logic cci_dram_rd_req_enable, cci_dram_wr_req_enable; -logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; +wire cci_dram_rd_req_enable, cci_dram_wr_req_enable; +wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); @@ -535,11 +538,10 @@ end else begin assign vx_dram_req_byteen_ = vx_dram_req_byteen; end -always_comb -begin +always @(*) begin case (state) CMD_MEM_READ: avs_address = cci_dram_rd_req_addr; - CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); + CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; endcase @@ -550,8 +552,8 @@ begin endcase case (state) - CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)]; - default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset; + CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; + default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset; endcase end @@ -560,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin mem_bank_select <= 0; @@ -594,7 +595,7 @@ begin end if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); + cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); @@ -654,7 +655,7 @@ VX_generic_queue #( // AVS data read response queue /////////////////////////////////////////////// -logic cci_wr_req_fire; +wire cci_wr_req_fire; assign avs_rdq_push = avs_readdatavalid; assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; @@ -676,31 +677,37 @@ VX_generic_queue #( // CCI-P Read Request /////////////////////////////////////////////////////////// -logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next; +reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; +wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; +wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; +wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; +reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; t_ccip_clAddr cci_rd_req_addr; -t_cci_rdq_tag cci_rd_rsp_ctr; -logic cci_rd_req_fire, cci_rd_rsp_fire; -logic cci_rd_req_enable, cci_rd_req_wait; +wire cci_rd_req_fire, cci_rd_rsp_fire; +reg cci_rd_req_enable, cci_rd_req_wait; -logic cci_rdq_push, cci_rdq_pop; -t_cci_rdq_data cci_rdq_din; +wire cci_rdq_push, cci_rdq_pop; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; -always_comb begin +always @(*) begin af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; - af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr)); + af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; +assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); +assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); + assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; -assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; +assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; assign cci_pending_reads_next = cci_pending_reads + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : @@ -709,8 +716,7 @@ assign cci_pending_reads_next = cci_pending_reads assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; // Send read requests to CCI -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin cci_rd_req_addr <= 0; cci_rd_req_ctr <= 0; @@ -738,21 +744,23 @@ begin if (cci_rd_req_fire) begin cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr_next; - if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 1; // end current request batch + if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 1; // end current request batch + $display("*** %t: CCI Rd Rsp: STOP", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); + $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); `endif end if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1); - if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 0; // restart new request batch + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); + if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 0; // restart new request batch + $display("*** %t: CCI Rd Rsp: START", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr); + $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr); `endif end @@ -763,12 +771,11 @@ begin end cci_pending_reads <= cci_pending_reads_next; - end end VX_generic_queue #( - .DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)), + .DATAW(CCI_RD_RQ_DATAW), .SIZE(CCI_RD_QUEUE_SIZE) ) cci_rd_req_queue ( .clk (clk), @@ -782,14 +789,37 @@ VX_generic_queue #( `UNUSED_PIN (size) ); +`DEBUG_BEGIN +reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; +always @(posedge clk) begin + if (reset) begin + dbg_cci_rd_rsp_mask <= 0; + end else begin + if (cci_rd_rsp_fire) begin + if (cci_rd_rsp_ctr == 0) begin + dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); + end else begin + if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin + $display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data); + assert(0); + end + dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; + end + end + end +end +`DEBUG_END + // CCI-P Write Request ////////////////////////////////////////////////////////// -logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; +reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; t_ccip_clAddr cci_wr_req_addr; -logic cci_wr_req_enable, cci_wr_rsp_fire; +reg cci_wr_req_enable; +wire cci_wr_rsp_fire; -always_comb begin +always @(*) begin af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode @@ -808,7 +838,7 @@ assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; // Send write requests to CCI -always_ff @(posedge clk) +always @(posedge clk) begin if (reset) begin cci_wr_req_addr <= 0; @@ -833,7 +863,7 @@ begin cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout); `endif end @@ -849,12 +879,12 @@ end // Vortex cache snooping ////////////////////////////////////////////////////// -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; +reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next; -logic vx_snp_req_fire, vx_snp_rsp_fire; +wire vx_snp_req_fire, vx_snp_rsp_fire; if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; @@ -872,8 +902,7 @@ assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'( assign cmd_clflush_done = (0 == snp_rsp_ctr); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin vx_snp_req_valid <= 0; vx_snp_req_addr <= 0; @@ -911,7 +940,7 @@ begin vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next)); + $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); `endif end @@ -928,7 +957,7 @@ end // CSRs/////////////////////////////////////////////////////////////////////// -logic csr_io_req_sent; +reg csr_io_req_sent; assign vx_csr_io_req_valid = !csr_io_req_sent && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); @@ -941,8 +970,7 @@ assign vx_csr_io_rsp_ready = 1; assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin csr_io_req_sent <= 0; cmd_csr_rdata <= 0; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index a7a2e0ef..9c8b19dd 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -6,11 +6,6 @@ /////////////////////////////////////////////////////////////////////////////// -// `define SYNTHESIS 1 -// `define ASIC 1 - -/////////////////////////////////////////////////////////////////////////////// - `define NW_BITS `LOG2UP(`NUM_WARPS) `define NT_BITS `LOG2UP(`NUM_THREADS) diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index f60f1964..05833b9d 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -10,131 +10,24 @@ module VX_gpr_ram ( output wire [`NUM_THREADS-1:0][31:0] rs1_data, output wire [`NUM_THREADS-1:0][31:0] rs2_data ); - `ifndef ASIC - - reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; - reg [`NUM_THREADS-1:0][31:0] q1, q2; - - always @(posedge clk) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - if (we[i]) begin - mem[waddr][i][0] <= wdata[i][07:00]; - mem[waddr][i][1] <= wdata[i][15:08]; - mem[waddr][i][2] <= wdata[i][23:16]; - mem[waddr][i][3] <= wdata[i][31:24]; - end - end - q1 <= mem[rs1]; - q2 <= mem[rs2]; - end - - assign rs1_data = q1; - assign rs2_data = q2; - - `else - - wire [`NUM_THREADS-1:0][31:0] write_bit_mask; + reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; + reg [`NUM_THREADS-1:0][31:0] q1, q2; + + always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin - assign write_bit_mask[i] = {32{~we[i]}}; - end - - wire cenb = 0; - wire cena_1 = 0; - wire cena_2 = 0; - - wire [`NUM_THREADS-1:0][31:0] tmp_a; - wire [`NUM_THREADS-1:0][31:0] tmp_b; - - `ifndef SYNTHESIS - for (integer i = 0; i < `NUM_THREADS; i++) begin - for (integer j = 0; j < 32; j++) begin - assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; - assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; + if (we[i]) begin + mem[waddr][i][0] <= wdata[i][07:00]; + mem[waddr][i][1] <= wdata[i][15:08]; + mem[waddr][i][2] <= wdata[i][23:16]; + mem[waddr][i][3] <= wdata[i][31:24]; end end - `else - assign rs1_data = tmp_a; - assign rs2_data = tmp_b; - `endif - for (integer i = 0; i < 'NT; i=i+4) begin - `IGNORE_WARNINGS_BEGIN - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_a[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_1), - .AA(rs1[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); + q1 <= mem[rs1]; + q2 <= mem[rs2]; + end - rf2_`NUM_GPRSx128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_b[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_2), - .AA(rs2[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - `IGNORE_WARNINGS_END - end - - `endif + assign rs1_data = q1; + assign rs2_data = q2; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1b957271..6bb52123 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -20,8 +20,8 @@ module VX_icache_stage #( ); `UNUSED_VAR (reset) - reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; - reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index e00097ae..4e7d42f9 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -16,8 +16,8 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; reg is_part [0:STACK_SIZE-1]; reg [DEPTH-1:0] rd_ptr, wr_ptr; diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index d30120dd..783743ee 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -52,7 +52,7 @@ /////////////////////////////////////////////////////////////////////////////// `define USE_FAST_BRAM (* ramstyle="mlab" *) -`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *) +`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index b5f6350d..d3e31162 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -306,9 +306,9 @@ module VX_bank #( assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped - //decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req - assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : - mrvq_pop_unqual ? mrvq_addr_st0 : + //Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req + assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 : + dfpq_pop_unqual ? dfpq_addr_st0 : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 9f201223..f56d638e 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -56,7 +56,7 @@ module VX_cache_miss_resrv #( output wire miss_resrv_is_snp_st0, output wire miss_resrv_snp_invalidate_st0 ); - reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; + wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MRVQ_SIZE-1:0] valid_table; @@ -72,8 +72,8 @@ module VX_cache_miss_resrv #( assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock - wire enqueue_possible = !miss_resrv_full; - wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; + wire enqueue_possible = !miss_resrv_full; + wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready_push; @@ -86,11 +86,11 @@ module VX_cache_miss_resrv #( assign pending_hazard_st1 = |(valid_address_match); - wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; + wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; assign miss_resrv_valid_st0 = dequeue_possible; - assign miss_resrv_addr_st0 = addr_table[dequeue_index]; + assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, @@ -98,7 +98,7 @@ module VX_cache_miss_resrv #( miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, - miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; + miss_resrv_snp_invalidate_st0} = metadata_table; wire mrvq_push = miss_add && enqueue_possible && !is_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; @@ -125,7 +125,6 @@ module VX_cache_miss_resrv #( valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; - metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (increment_head) begin valid_table[head_ptr] <= 0; @@ -155,6 +154,22 @@ module VX_cache_miss_resrv #( end end + VX_dp_ram #( + .DATAW(`MRVQ_METADATA_WIDTH), + .SIZE(MRVQ_SIZE), + .BYTEENW(1), + .BUFFERED(0), + .RWCHECK(1) + ) metadata_ram ( + .clk(clk), + .waddr(enqueue_index), + .raddr(dequeue_index), + .wren(mrvq_push), + .rden(1'b1), + .din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}), + .dout(metadata_table) + ); + `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin if (mrvq_push || mrvq_pop || increment_head || recover_state) begin diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index a1b80838..6d6d8572 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -183,15 +183,15 @@ module VX_tag_data_access #( if (valid_req_st1) begin if ((| use_write_enable)) begin if (writefill_st1) begin - $display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); + $display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); end else begin - $display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); + $display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); end end else if (miss_st1) begin - $display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); + $display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); end else begin - $display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); + $display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index c0594471..d3a022b2 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -78,7 +78,7 @@ module VX_tag_data_store #( .SIZE(`BANK_LINE_COUNT), .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(write_addr), diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index b7d70789..01a0a167 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -6,6 +6,7 @@ module VX_dp_ram #( parameter BYTEENW = 1, parameter BUFFERED = 1, parameter RWCHECK = 1, + parameter RWBYPASS = 0, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1) ) ( @@ -29,19 +30,46 @@ module VX_dp_ram #( if (wren[i]) mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; end - if (rden) - dout_r <= mem[raddr]; end end else begin always @(posedge clk) begin if (wren) mem[waddr] <= din; - if (rden) - dout_r <= mem[raddr]; end - end - + end + + always @(posedge clk) begin + if (rden) + dout_r <= mem[raddr]; + end + + if (RWBYPASS) begin + reg [DATAW-1:0] din_r; + wire writing; + + if (BYTEENW > 1) begin + assign writing = (| wren); + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8]; + end + end + end else begin + assign writing = wren; + always @(posedge clk) begin + din_r <= din; + end + end + + reg bypass_r; + always @(posedge clk) begin + bypass_r <= writing && (raddr == waddr); + end + + assign dout = bypass_r ? din_r : dout_r; + end else begin assign dout = dout_r; + end end else begin @@ -65,7 +93,7 @@ module VX_dp_ram #( end end - `ifdef SYNTHESIS + if (RWBYPASS) begin reg [DATAW-1:0] din_r; wire writing; @@ -89,13 +117,13 @@ module VX_dp_ram #( end assign dout = bypass_r ? din_r : mem[raddr]; - `else + end else begin assign dout = mem[raddr]; - `endif + end end else begin - reg [DATAW-1:0] mem [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; if (BYTEENW > 1) begin always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 68db0d4d..bb5010b7 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -85,7 +85,7 @@ module VX_generic_queue #( .DATAW(DATAW), .SIZE(SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(wr_ptr_a), diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 9490d6b3..8b089259 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -36,8 +36,9 @@ module VX_scope #( localparam GET_COUNT = 3'd3; localparam GET_OFFSET = 3'd6; - reg [DATAW-1:0] data_store [SIZE-1:0]; - reg [DELTAW-1:0] delta_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0]; + reg [UPDW-1:0] prev_trigger_id; reg [DELTAW-1:0] delta; reg [BUSW-1:0] bus_out_r; diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 20e7e85b..88ac722c 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -44,7 +44,7 @@ gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG) + verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG) gen-st: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) @@ -53,7 +53,7 @@ gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG) gen-mt: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) @@ -77,11 +77,12 @@ build-mt: gen-mt (cd obj_dir && make -j -f VVortex.mk) run: run-s + run-s: build-s (cd obj_dir && ./VVortex) run-sd: build-sd - (cd obj_dir && valgrind ./VVortex) + (cd obj_dir && ./VVortex) run-st: build-st (cd obj_dir && ./VVortex) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 60fde196..2698cc74 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -28,15 +28,11 @@ Simulator::Simulator() { ram_ = nullptr; vortex_ = new VVortex(); - dram_rsp_active_ = false; - snp_req_active_ = false; - csr_req_active_ = false; - #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); + trace_ = new VerilatedFstC(); vortex_->trace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif // reset the device @@ -66,27 +62,49 @@ void Simulator::reset() { std::cout << timestamp << ": [sim] reset()" << std::endl; #endif - vortex_->reset = 1; - this->step(); - vortex_->reset = 0; - + print_bufs_.clear(); dram_rsp_vec_.clear(); + dram_rsp_active_ = false; + snp_req_active_ = false; + csr_req_active_ = false; + + snp_req_size_ = 0; + pending_snp_reqs_ = 0; + csr_rsp_value_ = nullptr; + + vortex_->dram_rsp_valid = 0; + vortex_->dram_req_ready = 0; + vortex_->io_req_ready = 0; + vortex_->io_rsp_valid = 0; + vortex_->snp_req_valid = 0; + vortex_->snp_rsp_ready = 0; + vortex_->csr_io_req_valid = 0; + vortex_->csr_io_rsp_ready = 0; + + vortex_->reset = 1; + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); + + vortex_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void Simulator::step() { - vortex_->clk = 0; - this->eval(); - - vortex_->clk = 1; - this->eval(); - this->eval_dram_bus(); this->eval_io_bus(); this->eval_csr_bus(); this->eval_snp_bus(); + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); } void Simulator::eval() { @@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() { } // schedule DRAM responses - int dequeue_index = -1; - for (int i = 0; i < dram_rsp_vec_.size(); i++) { - if (dram_rsp_vec_[i].cycles_left > 0) { - dram_rsp_vec_[i].cycles_left -= 1; + std::list::iterator dram_rsp_it(dram_rsp_vec_.end()); + for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dequeue_index == -1) - && (dram_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; + if ((dram_rsp_it == ie) && (it->cycles_left == 0)) { + dram_rsp_it = it; } } @@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() { dram_rsp_active_ = false; } if (!dram_rsp_active_) { - if (dequeue_index != -1) { + if (dram_rsp_it != dram_rsp_vec_.end()) { vortex_->dram_rsp_valid = 1; - memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE); - vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; - dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); + memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE); + vortex_->dram_rsp_tag = dram_rsp_it->tag; + dram_rsp_vec_.erase(dram_rsp_it); dram_rsp_active_ = true; } else { vortex_->dram_rsp_valid = 0; @@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() { dram_req.cycles_left = DRAM_LATENCY; dram_req.tag = vortex_->dram_req_tag; ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); - dram_rsp_vec_.push_back(dram_req); + dram_rsp_vec_.emplace_back(dram_req); } } } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index cfea9bec..0dcf8a3b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -5,13 +5,14 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include #include "ram.h" #include +#include #include #include #include @@ -62,7 +63,7 @@ private: void eval_csr_bus(); void eval_snp_bus(); - std::vector dram_rsp_vec_; + std::list dram_rsp_vec_; bool dram_rsp_active_; bool snp_req_active_; @@ -75,6 +76,6 @@ private: RAM *ram_; VVortex *vortex_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file From 48897d9778dfd9e83e85e740754379aa5543413e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Oct 2020 18:29:25 -0700 Subject: [PATCH 16/19] minor update --- driver/opae/vlsim/opae_sim.cpp | 9 ++++----- hw/opae/vortex_afu.sv | 37 ++++++++++++++++------------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index 8190820f..dfddb482 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -206,11 +206,10 @@ void opae_sim::sRxPort_bus() { vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE); vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; - printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); - for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) { + /*printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); + for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]); - } - printf("\n"); + printf("\n");*/ fflush(stdout); cci_reads_.erase(cci_rd_it); } @@ -226,7 +225,7 @@ void opae_sim::sTxPort_bus() { cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); - printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); + //printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); fflush(stdout); cci_reads_.emplace_back(cci_req); } diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index c019e54c..41110da1 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -746,7 +746,6 @@ always @(posedge clk) begin cci_rd_req_ctr <= cci_rd_req_ctr_next; if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 1; // end current request batch - $display("*** %t: CCI Rd Rsp: STOP", $time); end `ifdef DBG_PRINT_OPAE $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); @@ -757,7 +756,6 @@ always @(posedge clk) begin cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin cci_rd_req_wait <= 0; // restart new request batch - $display("*** %t: CCI Rd Rsp: START", $time); end `ifdef DBG_PRINT_OPAE $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr); @@ -789,26 +787,25 @@ VX_generic_queue #( `UNUSED_PIN (size) ); -`DEBUG_BEGIN -reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; -always @(posedge clk) begin - if (reset) begin - dbg_cci_rd_rsp_mask <= 0; - end else begin - if (cci_rd_rsp_fire) begin - if (cci_rd_rsp_ctr == 0) begin - dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); - end else begin - if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin - $display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data); - assert(0); - end - dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; - end +`ifdef VERILATOR +`DEBUG_BLOCK( + reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; + always @(posedge clk) begin + if (reset) begin + dbg_cci_rd_rsp_mask <= 0; + end else begin + if (cci_rd_rsp_fire) begin + if (cci_rd_rsp_ctr == 0) begin + dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); + end else begin + assert(!dbg_cci_rd_rsp_mask[cci_rd_rsp_tag]); + dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; + end + end end end -end -`DEBUG_END +) +`endif // CCI-P Write Request ////////////////////////////////////////////////////////// From 09b1c0eea7df02042b2b78ec3b3caad7e32f2d48 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 26 Oct 2020 02:02:05 -0700 Subject: [PATCH 17/19] minor update --- Makefile | 2 ++ benchmarks/opencl/sgemm/main.cc | 2 +- benchmarks/opencl/sgemm/sgemm | Bin 46048 -> 0 bytes driver/opae/vlsim/Makefile | 4 ++-- 4 files changed, 5 insertions(+), 3 deletions(-) delete mode 100755 benchmarks/opencl/sgemm/sgemm diff --git a/Makefile b/Makefile index 84235936..7e37c19c 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,12 @@ all: $(MAKE) -C driver $(MAKE) -C runtime $(MAKE) -C simX + $(MAKE) -C ben benchmarks/opencl clean: $(MAKE) -C hw clean $(MAKE) -C driver clean $(MAKE) -C simX clean $(MAKE) -C runtime clean + $(MAKE) -C ben benchmarks/opencl clean diff --git a/benchmarks/opencl/sgemm/main.cc b/benchmarks/opencl/sgemm/main.cc index 4678a7a7..81775741 100644 --- a/benchmarks/opencl/sgemm/main.cc +++ b/benchmarks/opencl/sgemm/main.cc @@ -106,7 +106,7 @@ int main (int argc, char **argv) { size_t kernel_size; cl_int binary_status; - srand(time(NULL)); + srand(50); // read kernel binary from file if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm deleted file mode 100755 index 644db9d7ddee9a375612d65afc3e165e1110bb99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 46048 zcmeHwd3==Bx&QmlkYo}d35jUjZ~#Hpgs^V`LNYLsu*fF3Fl3pKz+`D=!s0E6ji_mi z1#Q(#RbBF`;Go1wcOPy01i%2>lW< z;Q=ayYZR4|%P;?_9{KibzWtidgcG#>CN$-v82(mjd~#S094`}+me;bEW3`MQ9V5wJ z;a&x{OlbHn10VG~(<@gKF4y^`vdem{zX?q}6h#`Ur%o=4)Q*odG_`FVzjgZ5@lz)k zMw<&KO1UYYv#GINFB zM7%FKffRDs`}Re@ErtGU27dgKuUP>2lYvNIokEU<@nLC8xT&mCjR~R9hQ{WmP&8K6 z8ViN2P{qo{q1te3ctb-p7H(a+xGd7#6kb_X9SJL3pO~7+@^BE_%R!t<( z^s%;ZTX=c6s&;-`U0t{}4qH;bysBwKoV2VpTontKH8(a^HPv25F3>J$XljVoBciZc zZm14mD};YBpmd&P%}ufJ)&xL;C{0!#-rP__(F?<|Ws#~_U2|(=MS0Z8A}&VjetBy{ zEbJ%`E+xUu<6w0j%UYW^v{p67bz2;6Tw1*`Ttn^Vw>3m+;}k2xF;%M4)(wsl8hv>~ ztbTq&Q&sCWR@dg{RumA`#Y8k#TQhbn>u#zQx?;(S*wmW(*5;<>Nukvfizi06q1GBh zHIe3;O_Q3Mw*)qmr%{n3wxOmb6lJQI@EQdVZ-qIdt+2jT*NmoN)y2Y*h}F^-i(0XU z#<11W+RzlMvs%Mj8b}ijhc{Vu$RirI8ml76%Ys(n)>f--3vFiAp+;-1P^e~W6>F?2 z(r^u0Pl!3{hSpfsfNWaB0EcU7xYM!9*#CX^}3C7ToleGLLUMg*%HYk1!X5)7aC)ne7tp_v#@tKI5 z?z!!}226iE)(krRG}if^%MHu6h^TZRZvjoO4@qZFRvp4>s3M z7u>WB;%;)mId?VJZWr7=9^C1IyXS}dTyV}g&Gmo_&bhg{_PgL_o=w~V7u?KQ2|wt9 zb3SXXCtdLK3=rWF7o783a~*ZTIsZ4;i!M0V2IlH^!G{X#_tV6|eiRyXr6s{3JXWC}^h@>B78bYj|gOYxbX}WIDeo23aX}WCBK1qL-X}W69 zZb^TMX}W07PDy``X%?raMbfu2O_%IhFX>M*O;_w$Ch1QwoyGJ#NngV>U9x9_q+6M$ zEB5#$y^(3UU{9{3tC`Mb+LH7YOw$#6j=cxKh~-Sv1$$nU^rcMG^?Hs-dOp*1xt@cP zp2;*_t!KZaCo)YJ>)9viF-+67dUi{C7}H!9_Ux4Oc}&yAdRio%$2485XT7Aen5Ikh zER(c{X}VI+JW0P>h%{ZOXM&{v$~0Z4$1mwun5N700X?Ec~$NtIk zKg+bA=@%vabEfG+Jx3({DAU83J}Bu2nWiiC?3eU+n5GN$?346YnWpRX>_)oy^@@&X z)&)EM5NvIb9R#cjcBO4Y7@Tn; z*zt1gTr}x-$03O(J@D#NFV+?2Ty#CM!2I@0Z}@b0To#O@%} zeA>_n^^xvQ0Cvrwn4xp5I@*g7em`Tq*DY#?A?Uei;1q#aM20K@g>cwCu z^jdsEWe2K_4%JbiQgZKwoCD18XOBt2U4`yg{9JiQ;7EDL;-?P>PLifGaB^1QxtuQc zQHKL36zIgPz>%D;`;n;2dE?+)IfDYn4+oAZ+_71Kr?;=`3>?e(R^YjgN5O9&3Emm( zI%^m4qNRQ0t(;sRN(qvuNpdWSfgZ9wCE{DWcSrlX3BaE{}y+5Y<~$kNFs2Ov2Nl{bgnvyOh!N2apbeb zM@Ao+5qK%*^9L0#>h^PEQMYLCIf&oFIBfmztgX(pPq|R-8nr?yE-m6ht=A}@LRGm? zt2C7F%Au{Qm9|JP=_??iwc$H68N4*J)uzl z>Ee1yqrRq4kGoI@G-|g(ecOe)Povs^($x^W?zIVUjgB9`2A>FaeE&rRkFc3^zxd9n zQ&71FL9lBVy*SwMDEzISLb^(B#URqX0Jht8Fy}J|uRj>mo>|i00(uumrw66Ebv*G1 zeXn~SGVKT)Yu|p%ij6~q6Jb|E3WR+ef%rrB=T4x@D4-G$aH{*)$bEOPQ^b`D1v#shDIxN@f4W0j6k={2kesIN4id zmd+#Xr=wuf!-1n*BOKo4GfH%(eVvG|E<(`39IfbhQZ)Gqh6QO)5v9P9PGM3CkXRJ> z9dd?k{f+{2sKCRhvPVF2s{2})0}31ocBIuJpToQQ0V<{MOq--KCZx}d-y&~E;K@#5 zQW=w2GOkk@-;b7{D)pytBfByu<8+nrlgRk3OPDdI9y;R(0YHffNywO8G(G4VmGQw& zVNw~BSTY{3GA?v7u4KkPN4M|@NKSQk!2l@nL1g?1$){u%IJ#c|`0%ciXmnbMDT4fd zIe3CypOL1|iDOq6X)deicoPG46=^!233d%WHBvVU>i0Qj(f)WYjB}t z{!#MDEMuGp{szrPIy&}1W$1VSI{pNL(VzX0!LEaCIo%eh4zMIW|Cs@Ebf)bEs#Kap zVmplh?l{K=WY7`#Uiu z5cC31kssy#71iwsJkTkms;nfDOunHq`9h{C>%+$(dZv@fKanTO`T)xMs}XDn^N=+s zyC~Yv0D!Om9KnMGIpgz_kkJviw|)D)Qk;7wr>M$lvXj$t=5z$^p^9@Gaza0n7FAL| z?nq@f!g92V)P3#S_leYfBK2w|^|#1hHRO|TLFzS*)DkWALM64(k!o7&1tN8C`}Vye zb+1TWsHEQHNc}RUUhGKyGnlCB;fra3&(J1lXb<+I5xBCTx{raSBXDQ?_B%!Jog$d+ z9u8mR2wqIVPr+l95pGd}1DJ^%?mmPB?EVR6(y2h-RG=*`&|L}?0Z7kA+P9yaoAar5 z=+WH-@o(jfIS5^6fdB(`u;$s|p+9)k1YqD2^nh>WtmCx7>Mo5FpWPSnP!}--zB|0P zz%*NEIH|x~A_}`4-gVq*pi{Ph1)N(MbbW>hnhlDI#Rf@90=qkfMKx5?h~>t?4Pd#y zrJIV*|0^uF2fnNe0B z zn21h~NBxM3PaWPhN=Lh2I0lA}T?I%(<6}dqc-nt4=;|UpYKy}9Hke1(-?u4540e6{ z_UlFW>qYmSO7|I#?w5nI`+2zLBM^G3`%V;v?o`hVVfhDu?g;E`-@a3X>=YsQC?Ut; zWXjX?C}g)IWCewE52OwqyK<>)*Jq?WT`JY_0!9A?$^yS$@G1cGHm^X}D5wO0-T~-e zgYcfe<_y}kmAFn7bnH5cX8G5gTnW(>hCsIiv}yPDH<)qBZ~(f8vMxIZKI=!RI;VVB zg0$-;c)w=wHpS4CyJE<<{m1-oaqhy1nByf>EHZi-Ix2H`!P0#lT<8&yoa!!zA$Efx zmzvyxgkG1Jo}^Y8svfk#&cP#?q0eGw_ci{PM^Doy~Oc&x6^4o!p()8QBNu}VmTpZ?CyZ8wjZ-GPhADRy~%u_ zLsxahlc&iJ=z~CaJb`kLgW?C+GP_@SolBb&c6;Es-4l@VDjiXvY#5D;(O3Z%GQxKO z`M_p%e+PW84uGg30ClgVd51wo5IRdjtU%k2!J_!(B0w@1llI+u!FWPIomk@chLB_h z+-W*i)0kzo9R%%@T$y678tmeF^>Fw1scXk$zyM1O*3$R^a~CnA9}9M1oWQ`)h33@7 zS)w#?Y1^DMwm&}oO1&Cuf6ebK+hsy~xVu55Q*|YCtxBJOv}2HlvWT=50KZRp^~Qk8 zK}&l_`u0P?2Y)^J2mkqr-=1Fwf(hRsIF{@qJJt;9~zw&#%9I!tTn6j^3sk|rK>AC-Y#8L(eZpKc5FsWKUX$n z#xq<|Ir`<|Uk4BU)634RyX#eCw(IrS5W`>D@k(XK+vSjP>a70`wm)VEXS~|>3T)}S z^6Jv7O0Omq@;Tk8;&J|wE&ALp)KELlzhqTqiHt<9}7{g;f0UgB>J$J%fVS?eEB zI~2XXxD%UU@$bY-XxB>gqcsiQf+ zlvBu2)T&QTr7$Fha8fqfIq_3Rh@-T)aenBsIIuXhJWyI5TDqii?Kpo^^|n|zn#BHo z{Fg7USQ$8-09}k0bzNDV2BqpD`%ua@Kd zsA?UwL+|5q$cvxAkMkS5N#kC*d=)AJ!cDWT4^>IpU8iE-_mykxnM!VTFS&uO;hMHs z*dMD8>&70}KjF57q!O6cf!a}OZiqBjS4Bcwnp-#NW{y+vL;#BDE|R&WAwlwJD-fw_ ziLy7uIr~iih{y(iV>HVuZ{E@*Hm?mwWAK<*Lvxchd@sAlZ?Q-aJQJ*av9_uRqFL60(u&GJ z`B49O^ty@xQ;p75Z;M!E5f4)yAk>^VdQ1bRPAe)TlUsm`}_eJzTI|4F5x_k@j=%y zB-B^m^y=)9^z|>HOvuB^&RvjQaA{8eExw)B-1BCQnRM|miXcrneotV>u5w|E2D{;qz3>`^zT1+ypIkRHsQ-kz~IdtM}a zdTI8A((F<5v;Fh43qU_V+b8Xh_aq;L95$Peu8%D6kp(`oz(*GN$O0c(;3ErsWPy(? z@R0>RvcN|c_{ahuS>XSl1! z%4PD${YK3iB&0ZZlk`r_kK-~)8$EFBE9na~9$SKvcJ$W5{$B;=IrilI7VwUCT&s0h zr^C%Uyg`Sz>+s7u{DBUi(BTU@{F4sf)?vDAe&Km%9gfuDR2?qR;c6Y$>2R|SZ_wfG zI{dN@f1txBbohb}|D?mWb(qeLVO(eHaHI~W>TrP$SL?7&hnsbHgAQ-kp^}E@_e=c} zU07B&(?1GNw~zKuE}ULC#XoUE@stTuCrfPdwV~cc0WHj=bFAFPG@&du#2!ehY{RNo5Q-Em~qr>UfFAWb0f+4K{3Z!ux)Kdh9?02(73z1j9E{V>< zwt2tZ61^X}_xm)X>Ano#eaz5KzY#*y821cgYy$1>8-nQ3^tVeqmX-My44?k@bpTtL zGtn2NpS+tSncst@(*MbLZszW6#NT7QAoDsDHvJUi{;Xd@|8&b{VpP@|w3BpO5)-oC zWWpnf=~2LoV67^XX%yghlja#y+so5Y&#Iok2?WE`19|LA!-~0T>CmmYtP!t;#_F&LE@&Y1r#vS{y4~XXDy*&itNuLac9;lCMHN?U)EJj6iecP ztQsaJN@9Q3MkXfNJl8yswGma5K1FmpnDsjH!kWGtFR;NJkH_&`5dl%d* z=Vut>(o1c^bN+*+pReMD7$VclB%a~>I)tPh0ZFb6+owIi>~p^ZW2b#Zq8Gx3Xz{Lg80@U4lLLOYrGOCAj^l z1fOM-9_;&EHtavxch@`#KEGaqeb-5F&utRiceeyz{P?)2@+V`Ys7(4v=8hITFksCPB$~3Fba4!MukhnExW4%^BRk z>{SWM-?1_NgAqPhc0 zc1tkl^XNzi5198bOfYyr>0t@x|6YQ!lM=Jmqe(?)=9D4ry@?0MdI(77@_ z>}K@DcDc6+t%TaG2W*0(da&jy%xz`aqL~_TP{&`^T zAy&#ZR%!lIj342UlSqyoQ!A5r>v!L)iV0przx^1)WY41Yw*%^u#q z*0T?N9!2p^-y2BsQ^AV)ulEdv>PGBNTLjNn`aQl zGPThh0_X7VdL!)FEAMA&* zjS3yc?-wNb2Xin!_z--sAD}Pe_i7xgeeesYd1Ae&!%X}%ulB(P2x9I-fw_cODVy=Z zLSV={7nn;8Hqi(Fj&7L(s)6Ry%UXbVXq){PQ~?EW)!AvVi9UD~{R;)$1N1i?5;39i z!8vF}BzXepXOc*?4_>d;=mDC;tEe$N9hPx?a4(EVDFaaL=iuieF+R8us#0C!gPkB# zY2}08f@G$R58jA)EohqYi~Ar=IJ^%ZJey0*H1hkRXtTC`*D>%-xCKhucRveXAnGR+ z)ys8Nmt43TE={6yLuCuBLNqKymWz`Y-wh|>Y{RyT-$jd(D1;YaD77EG8UOd8ue%tw zc7&wLW!Q6|w2yg?z+O8LT}LLzm;M9Y4t=Tcc^w&k*)SBB|E!CT!_n-g>@Ptxau#lY zy`&5J3(CkY$T3pk-!()^riy3zLg@1_F?t6%>WNVVV)cxRg3ehE^I*n>;qCh%`cM3R zgI~cmlwQraNF_5aMto>E>Pq9-@7n@0$X^RhQpl%KGcSD{790$sY50}kS8#0#`PR=r zH%l@|dk^wQw$XCg8TNzVnb_3UXk};Hzk%%?Jto%T^)lIm6wojiuSSQIpl*=$5X(0Wo@r+u zx(%W|n`qrf*?eSnEPe9GO|YDu8S(u96n>#-^r=RU2{a>?ZyUO8)Y~99P>OonfYCAY z(k-aBUqH+o`27vPg5RQGeYJ~jjOucwG1rm5qvNpr$RNSCrWgt3H_UO{8nU=;4UIMO z5wuv!{`X|rh9p6@BgttKhAb}I&{)0zToiU04YQ`O%K=zF^U`g|`(^wxKsXS;f&uIp zr4ADPWgZvfywa%dkaN(Esr&oe#4w)DCWZ#{@fIkab?Prr<$PIF+V+Sn+rAa)!M-Qe zYVD|6ru|VZ(_T{RvzOKS?9Xb^*3D(wl+nVPvD zb(~YPj)j}r8g8_5Y8mf`9TrgJgsIDv+?S--^#d;yFvMo?db=?oNY@0^OT_UxwCErrROWqDtFPXh^BF=;NfI` z(4)(Ux*i?JpvNv}bSnD&L61{kPZe79pr29|>`QUEB^B<5GAecGqOYn-E{vHM-;0j(5%=!VWo3pzT0RDz^ zzXAF8>xTVj-NF2CBLf%t3ODNr8E&VVT=Fa6jBlZz%lWH^do!FL#?>1>8X)!Wmr&E1 z%{tA=_2whNO#+!QsAB4i6h6Z@8BlM(wBMk2Nn?D*bhe^(B8*!T$s8ql$UNtS zhx--7Sx!|bDV~&U&V5w$Vh?vRRDtFWWW(4j8%`wy7~g7hXJdE;*sWQ=$H6UP&QrQ$S9ggT&yxX|m;& zH(j>8@@B}ESKdt7^2(bndrx_DWXmgWo@{yLmC2S@Ub$>}&m-~dtIJiqf+uG%z|2;c61K;#q$}RG6c~{ zY*3!{XfOHG*dRPNz^wVD($_o$r^=r%ea)s!M9Ww*&o3w}z_#VtMcPV{`ygo-i`m>RXX*Y=6%ShWG5(biXqeO2d z?IwwyK(ox>B5Z$W^csQkpdj-^~Cd+`&4ldj*(2o}4icY20!fK13_<)hfqQMw z?ZBjQ(URfYi5?=u$IVf%C-)J+z5Q}uKz+}zBgc=qD>!R0u(^-1SL6<$?6Rq?wpl!v z&OOYr^lf7J57JnYS=p%V+=E07g3X8XA5cus=ueb7-WX3D{{hFUm@$wzpB!*h5XXPO z-9yW z?k@EcW|e27OrFaW@Xk^{VOIIMbgktIxUbYtm=(aYPL^lIZD?L<)9zve7RUVT)A94z${1|i&z&hT-CkRhvwG_JzSeB9$H z6F`Q<)3|d|=DQHx0h|s_vtS8kywlN~tOehJ!S*0#FZ?86n^3BScfqhb5n1p$>fo!W zK|8nw7R&;O>#2p$BXO975j2GVX5`}#FdXa)XJU#yoso?I4~Je0zXBQ45vjNjP3KXd zy1fhcK<#UZjG>?VG$O(0py00%;cR&!OEnEzF8nvNsY*mHy%$0}_3M!T5cGDu_{;8=fDsOrAaUm6uWD1FUrK zo})V!Z_&kE3N)`5Ah~h`%?dD63>$k&kn_NO!MqeJo~{j21ep_rwXi-4$pOp=QYi99o^cy=K=8i zM)Oo_o}rl4DhqTdo_j$w7k1~>tO9G5Lv*tu>ehm9X4l}|b0>IsPRnbx3ao1!p52P) z(;&*`k`tF9y4xYzqlk8cs5Xh{afj$uMRYTW9@Io@l|ZY-;+@r+2q)U1ayG!q$ose1sGxKUwfmQ1_JYQBk zCwEwjk0udyI7Ig=q7xw61EqOwRe|+QzmfC}MRXiQnE>(%sKENA-w=IM5gh~3?*Qbr zLj~3ezajdbB6?|uwe;KYQ(lj%Kuq!AIwU-MYB9*u$1eei9^{vCANo?Hr(Ue+;repf zdkMW9zssIMx5m>SxU99D8=1-XV%U54g6=fijVt7=;ZYv9h>+VLB>4spd+#49`9puGo9OPS(^rr)zIVXN-+j=HS`0 zX6>7_=$ksZ%v#Qk*W?>y?7aoUA@mG3R+aRpY?_Guj1vE}PVc|gpN&ejkJ#L(wXS#% z2K21?CAw)|@*pEZd+(JM7=b)xx1)=?LnCJk36XxZjujZu%Q%92_FT0BL%5EwSw`)9dr&5g??a39hBCIdaEkL5n$Rq z7%q7oR{@G@k3dn&fa17>%34v1IDr@_R?K(e!4-=USA{NLvBZhbTd_#Ty?Y+McIi*F z{1>$RC5@J6y5fo~0zboN-m}rnaB0ImoH~akTRp$({b2$q++wx}5>_4l21?MyuTmAdb0Uk|WN1G1Z6q@XI)bKy42SO`H?1O{$^IrAblJ9Dkx!0+Lm{^a z$n1R3&W5ArOm>*&o=ZTk1|UNP7=&KT%9~+(eJ0=h89HAq8fRLB@?3JT&xxKLSBx?u z5PMFcJ@;Hy?56CB{MqN$ib4fxP-rsn=N4g)kYzk?fCk^8T@)w)26c=h` z(wG?H*aT!3#ZADBFUVrner*WY1*dg|xP?!OlBV>)9U6{m8bq$%R2Ul{Axgw6ITIXUe`N}mp%~@|3(Y?JzzWuP^&O->uE8{%|$jWxqj02Z5lF6n1-+?eIS!KUD|otzxf5{D{H(aebh zZnRPRls7kSD~iI{JqzW-_6cgv7x$$34w>YLAt6`0T)EpFJm|&(07!}H9|SCzY&B&s z%;&w(3sn>gGYw0}0!~NKIxlq;L@tBuyo`ATx6cr9v%6O(sF7A(BBrk*I zSrX5kn57DX9<$J1m|v@f*5==##9l_R^e@sbCrLuCIDj)nA;C%#_;ssXWmwT0YUQL^ zUtVYz=Qm~d$_o3c#iIM#I5mY{QK(DiYE$G z(IUKMBwXE66l-pdM2j{wwH38g)oiNT01Q4EGHtxv{an=0R1;~d4Hw}9Bt`0e=nuk; z&wo%@5xz(knmSp3o{U)~2)BybqODF&)eW&|0>t_tIrh@Gciz1+D8eUH`pkqcrYdS} zYvP+)9R12F@dh0E{)z=-@~b9=n2+GXO{2Bpy25&x-HL5%2}cpdcnfetGot-0et*S8 zktJUW6~QNW6j=@EfTv6{hhnW&Gz5|`Jzf=6*RTPEs8ka2mTO|8Oa(O49`AeoG_aosv}XF{9cueZEKff>J%<0DSbEz&--b5d%bR^`-+I`$ z)-&>~hu2QPiH}gEKV-r^hr@csTyCAuV0GjeI2gYHaKV%S5zwZ63)zsQC6#l)}=d6wsUE;&mREXhMz z(uY0N<}30PGIO-2^t(MP@;#S{DhqFad4q5BkFOx6#DlK@vAvo#ruXM~#-V`nG8Cd} zI6Vjx+BeedUV;6DCXu1OjD~@m;4g=Kz!9YitA`Ur!c_C9oGyV22QDX@<{N zELjoi{Vk~gpr=>M$s`b%ggZ8oQLHgkk2lHTvr$#GA^j4#I=uSN!sn^@CbI+;&QN$` zWnlTD2);zNG&C_(jMvEVvs3tLnd?JXrECcxjTe@v1wv$2tBveSwJ?hc`SL##n1xc$~kkDiRIP zv65bvCiNMu+KkfU%U@MdHIzjnybHp-#SFt!Ypgkf&x)OKT{}&(thgewIe=PU8Pa0V zgj~ZK-tH(W$k)q4)%cDbK60kNSymslLj0PVRTT?;tgWGDQ%F9Sh6l*}Wn;${7yD)Q z;ve4_YikOZYzQ~u9cnd5R<+jDm#B}g`p0kZk6${$KOV*_!2k}X@pUcX*2actv|%&+ z;e(gRX~PNcQ?%4O^{m>4x;mK+>@YRRkFhm3wp6u->YCV^;Xh(;DXZik_XF(&xP32x zLN_x(oG%=~CI=yo{72eVkz-I!>aj&7YnmVhOqOK-gJmuRirH(a2Upm5)1HtK{R8w>uZ{u)F|S-uF_H;K6F@0)Vy~#J1BLbvqC+? z>GiuQ;mrKfs!#-hcxTCrxy1@LMthfQjt(76y;UpupEORpA?q?oQ%gT=Oj)R9W-v7HDS7_uHbD)PiGC)${lEBbvzc z>oaExPl#)l1cfI~=xeayRO9pxjnO)=Vq(ZO`+?a5fe8DJ%4)=>qF+Nh!$c{WN(jL{ zIkbGyk`=}Hu4!oHgit__>(V-6%kM*~ySeKlg$tC>d38BebY zJbkH6(^B(1dHH74nu3l6xz>hjA~f%)rnX3=C59IXQze;VA(deC(`N$9nuZsj{CzO~ zvdQsbwGkhPm9E~IB)xC;s>9(ttfe`Mjv5Qug#7=-tz1o-mxo)5tq}fCw8HAut18c4 zJ({#1H4_Y}{x51Z@C&7w&2CH3^L-1d=fmiG(Ad=6M@Hw~O;0%!r%H3w$ki4(LkAR| z1phPLZ7RogHS;+MjP9Q149X^RNj)Qk(NsVC!Dlq_)*#ab{Z-r3=kGYml$vI2%%BfV zelIT%Dgn6^E8O7908^x8WN_-)5hURJ7zzJ4eq-sNSVv6RDflMZRQ#t8o?`C=x zGebPnOwq4McCq>fd|MgAqMF0CHMQY2#}lnB*s;R20+Z@cE2h?*8=&7phE=T_YCeEl zIaTl2EQG~DsJRYbC)SJG6n!&B4bwL#tYef!Js0e&I{<;P(rKr(pi)y>3%kdkO9eCb8mT&NlIRbq?7TmnO`2 z^n7FqW+)Xai$g2Pf-#wtoQ_Syyk$k{2kAge?wB^j$7TAlTGn!DZI*+s)lcsQy^@1v zy?7_~a^RhHcpLd?+)6iT1KLhl53?`{yvI?ePZHC<6zxC6Xxt49* zBty1%fNP@$-!N}(G^>R(aU~enh*TZ2EK@6@OSz&s^N|mQb;dk6i=LA9o>2611o&2C zb1m{R3s$qP$3}u{DC2rfmd==Eey}>IU5z?OaO1wVXdO>PHCfVR=zLNawasmq ze28Z_z1a9OaYy%F+Ub0@D$>}D&Ajl(@M3-|v;@y!(&1{W66dVF3te37YRUm!f$?FG z?N)7$$~2)m8s-MqnRO`aO3=3Z*vl4gP!kWaj{I{L2s=E^UM$wBPB+T7YX$_h zhSkj-?u(fYr)i;RTTM+k8kG^|L)b?(RanW?(HTv=QAWKJZ{|2>u*%R`efYJ89AAXW zWH%O#dJ7wMV|A=~OHI;&Ew+V-2eJO4Mk>0~TkniLkHQ(1=ad zI+w+`6y`CHCD!FJL_+_4`ZW-;m(5^0w>ii;L;V%&W=G|*W3j6%oByWKk1er6n`2^J zv+!^ow5RBW>->BXWGo%ha_rO?X(@M({WIN53o2? zIxDw=KdA`kXz<2J!-o18+ko2Ob!{Rf4C?4Kdyiiq=!8-opK!;Md7QqAP*A5s#$2|s zv8t)|GTA$(BgxJiJqc@C_C!%N-s3RDOkNYlP+fJdk3!be)pJo}ouU%QcxN_T zeb^bOJH%292UfeCkZ{H+eUfenr>l5D3d0(xY z$BSE|%zJ7R;pxu%Y!l%bmU-`MBHSl$?s3By_ga~jc@M1{=e4pd^S;_dcz?^hw>J?! zz?y(Z-I6aa-Z#h>*dE*whaMx;I z^jCwP_Az6d@xTqZGu00{$^FA#&{NNSrGC-R=y{*Ol=P+l#uV^PsCU|bUlP;y6!d#iz`vLR{(X_(dtCk==vj}+X3Bo)Pe ze@p@YTMBrF=p(EN|ISVUAD04Nk^+7i;H+O$E>pi@!0D&S{xuH$NiOK0)b(hTYE2=3 zmzKX%(;N9+B0rw5k;9ir_hq-Q34EZn>|#Y3)(j7&pnpCE{1w1i-aC`@c?)p*mzfi9 z)bclqAL)cE_&31)uC$5X0XXIFE>&1l-hCoxpmk8|kM2UQ{VC-01s~MM%;C+rz_+&c zWuN}Ys4w{WDc~~!r=AB({i27_GoaxQB*7~HSCm5T{Nr~Z`hvFt-j_Z%0M2rmwTLN~ zRnr=a#_$c{!W!Jn(i+}?haSRM95wP3qbbaT;o9bqKF6=c?PAeTRohlByjvnTaIY<# zI%BeI7xX5=(zmH0RMpyAwGB6t##*=GbOyIhVQaOq5!>G(R|vNrjUO zi)Y|Tk@;;6ky`!j@D$%##tq_I8k%YovrGKEHg+?s!M!}p`z*qX!;MR;H{vVA04;!V zqV-lNEO#D$hy}KL+snDvm|GXv-@pwVylVkfP_wm)t*a`6yMhxHbKqz(rXVcboQRl$Y13k!9sl6n_&k3lF@jiWO~*3uS>CQHFHcr~G# zXq$AwP*N3LcW0M3#OmjxPrx{1FbUc_H*vY0?f>DOBaTf?_lx^X7Kh?vF6xvc?n<6t z#LtaFA*awBPMxAH3b!_e0#2Xd( z9xdF#z&mRcHO5pH&P*o~idH9^&`DM69vJ_{hYNi^&SMouw>9FZ2qB(hk+9yxcnH_p zVih(u$HIlB^DD;3sy65po+>Eh-8L|z81VDr8v}NNpYAs1XndS z)F4+JhVoybWT1q>v#dgPV3>+Q*>H8+2Jlri;hAESY-p-$Hu0+J>eldPlZxPRWD|#( zOjNTwlhXe|ggb&*;$Y88U1mSpg!&yz8mnlX1R{Ji5L0xA-|SnP&<~vZ!m>NzDn`o9 zAJdAMP%j@Go@D-VV6c>Q@;BdYGvOK*4i{;7VLC3Ck#i|hc)Z`?H~Zx#tk;BCE{UBC zKc0V=YdQX73&Y_z--$D!`7WHI?rpy-fZ#XSjQ!32y9w86Mx(!>GvNl{`As*I<{S2T znQ)$@@V*g*q%rzyu(cTx7SZsV{d_0XgeHH(Z_2+@^Uv3GX5Zh0d=m^W@?)3K$=~pQ z5-?T~_2XsE2TXXL0z3ary&1qRMBM!59KnPwreK=E95a}3cM|_Ty@)g6u%tzu;WPQ) zp2Tm?B}|C7-6UMe`tQ~JMon{mVM3oVjFw{TZ-m~P#Ba_yOi20eD>?t~0O|IBa~@(s z?k~Epa{@EPcvh!AGe Date: Mon, 26 Oct 2020 05:27:10 -0400 Subject: [PATCH 18/19] minor rupdate --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7e37c19c..410dc008 100644 --- a/Makefile +++ b/Makefile @@ -4,12 +4,12 @@ all: $(MAKE) -C driver $(MAKE) -C runtime $(MAKE) -C simX - $(MAKE) -C ben benchmarks/opencl + $(MAKE) -C benchmarks/opencl clean: $(MAKE) -C hw clean $(MAKE) -C driver clean $(MAKE) -C simX clean $(MAKE) -C runtime clean - $(MAKE) -C ben benchmarks/opencl clean + $(MAKE) -C benchmarks/opencl clean From 4bd5ee26732ca9c13854dfa1731e5fb00cd6c28f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 26 Oct 2020 12:59:58 -0400 Subject: [PATCH 19/19] fixed rtlsim regression --- driver/rtlsim/Makefile | 2 +- hw/simulate/simulator.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 4db05ba8..1a66b335 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -55,7 +55,7 @@ VL_FLAGS += verilator.vlt # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS) + VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else VL_FLAGS += -DNDEBUG diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 2698cc74..4f6403e7 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -96,15 +96,15 @@ void Simulator::reset() { } void Simulator::step() { - this->eval_dram_bus(); - this->eval_io_bus(); - this->eval_csr_bus(); - this->eval_snp_bus(); - vortex_->clk = 0; this->eval(); vortex_->clk = 1; this->eval(); + + this->eval_dram_bus(); + this->eval_io_bus(); + this->eval_csr_bus(); + this->eval_snp_bus(); } void Simulator::eval() { @@ -216,7 +216,7 @@ void Simulator::eval_snp_bus() { #endif } if (vortex_->snp_req_valid && vortex_->snp_req_ready) { - if (snp_req_size_) { + if (snp_req_size_ != 0) { vortex_->snp_req_addr += 1; vortex_->snp_req_tag += 1; --snp_req_size_; @@ -289,7 +289,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { vortex_->snp_req_valid = 1; vortex_->snp_rsp_ready = 1; - snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; + snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; --snp_req_size_; pending_snp_reqs_ = 1;