Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

1
hw/syn/altera/opae/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
build*/*

152
hw/syn/altera/opae/Makefile Normal file
View File

@@ -0,0 +1,152 @@
DEVICE_FAMILY ?= arria10
XLEN ?= 32
PREFIX ?= build$(XLEN)
TARGET ?= fpga
NUM_CORES ?= 1
SCRIPT_DIR = ../../../scripts
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
AFU_DIR = $(RTL_DIR)/afu/opae
THIRD_PARTY_DIR = ../../../../third_party
IP_CACHE_DIR = ../ip_cache/$(DEVICE_FAMILY)
BUILD_DIR = $(PREFIX)_$(DEVICE_FAMILY)_$(TARGET)_$(NUM_CORES)c
ifeq ($(shell which qsub-synth),)
RUN_SYNTH=$(OPAE_PLATFORM_ROOT)/bin/run.sh > build.log 2>&1 &
else
RUN_SYNTH=qsub-synth
endif
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
# Control logic analyzer monitors
DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU
DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
ifeq ($(DEVICE_FAMILY), stratix10)
CONFIGS += -DALTERA_S10
endif
ifeq ($(DEVICE_FAMILY), arria10)
CONFIGS += -DALTERA_A10
endif
# cluster configuration
CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2
CONFIGS_4c := -DNUM_CLUSTERS=1 -DNUM_CORES=4
CONFIGS_8c := -DNUM_CLUSTERS=1 -DNUM_CORES=8
CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
# include paths
FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR)
RTL_INCLUDE += $(FPU_INCLUDE)
# compilation flags
CFLAGS += -DSYNTHESIS -DQUARTUS
CFLAGS += -DXLEN_$(XLEN)
CFLAGS += $(CONFIGS)
CFLAGS += $(RTL_INCLUDE)
ifneq ($(TARGET), fpga)
CFLAGS += -DSIMULATION
endif
# Debugigng
ifdef DEBUG
ifeq ($(TARGET), fpga)
CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS)
SCOPE_JSON += $(BUILD_DIR)/scope.json
else
CFLAGS += $(DBG_TRACE_FLAGS)
endif
else
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CFLAGS += -DSCOPE
endif
# Enable perf counters
ifdef PERF
CFLAGS += -DPERF_ENABLE
endif
# ast dump flags
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DNOPAE
all: swconfig ip-gen setup build
ip-gen: $(IP_CACHE_DIR)/ip-gen.log
$(IP_CACHE_DIR)/ip-gen.log:
../ip_gen.sh $(IP_CACHE_DIR)
swconfig: vortex_afu.h
vortex_afu.h: vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(BUILD_DIR)/setup.cfg:
mkdir -p $(BUILD_DIR); cp setup.cfg $(BUILD_DIR)/setup.cfg
$(BUILD_DIR)/vortex_afu.qsf:
mkdir -p $(BUILD_DIR); cp vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf
$(BUILD_DIR)/vortex_afu.json:
mkdir -p $(BUILD_DIR); cp vortex_afu.json $(BUILD_DIR)/vortex_afu.json
gen-sources: $(BUILD_DIR)/sources.txt
$(BUILD_DIR)/sources.txt:
mkdir -p $(BUILD_DIR); $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -C$(BUILD_DIR)/src -O$(BUILD_DIR)/sources.txt
setup: $(BUILD_DIR)/synth
$(BUILD_DIR)/synth: $(BUILD_DIR)/sources.txt $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.json
ifeq ($(TARGET), asesim)
afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
else
afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth
endif
build: ip-gen setup $(SCOPE_JSON)
ifeq ($(TARGET), asesim)
make -C $(BUILD_DIR)/synth > $(BUILD_DIR)/synth/build.log 2>&1 &
else
cd $(BUILD_DIR)/synth && $(RUN_SYNTH)
endif
gen-ast: $(BUILD_DIR)/vortex.xml
$(BUILD_DIR)/vortex.xml: setup
verilator --xml-only -O0 $(XML_CFLAGS) vortex_afu.sv --xml-output $(BUILD_DIR)/vortex.xml
scope-json: $(BUILD_DIR)/scope.json
$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml
$(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json
clean:
rm -rf vortex_afu.h $(BUILD_DIR)

19
hw/syn/altera/opae/fpga_prog.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# FPGA programming
# first argument is the bitstream
fpgaconf --bus 0xaf $1

52
hw/syn/altera/opae/run_ase.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
BUILD_DIR=$1
PROGRAM=$(basename "$2")
PROGRAM_DIR=`dirname $2`
VORTEX_RT_PATH=$SCRIPT_DIR/../../../../runtime
# Export ASE_WORKDIR variable
export ASE_WORKDIR=$SCRIPT_DIR/$BUILD_DIR/work
shift 2
# cleanup incomplete runs
rm -f $ASE_WORKDIR/.app_lock.pid
rm -f $ASE_WORKDIR/.ase_ready.pid
rm -f $SCRIPT_DIR/$BUILD_DIR/nohup.out
# Start Simulator in background
pushd $SCRIPT_DIR/$BUILD_DIR
echo " [DBG] starting ASE simnulator (stdout saved to '$SCRIPT_DIR/$BUILD_DIR/nohup.out')"
nohup make sim &
popd
# Wait for simulator readiness
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
while [ ! -f $ASE_WORKDIR/.ase_ready.pid ]
do
sleep 1
done
# run application
pushd $PROGRAM_DIR
echo " [DBG] running ./$PROGRAM $*"
ASE_LOG=0 LD_LIBRARY_PATH=$POCL_RT_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $*
popd

View File

@@ -0,0 +1,4 @@
vortex_afu.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -0,0 +1,54 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-200",
"clock-frequency-low": "auto-100",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-dcr-write": 4,
"cmd-max-value": 4,
"mmio-cmd-type": 10,
"mmio-cmd-arg0": 12,
"mmio-cmd-arg1": 14,
"mmio-cmd-arg2": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-dev-caps": 24,
"mmio-isa-caps": 26,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View File

@@ -0,0 +1,36 @@
# Analysis & Synthesis Assignments
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
#set_global_assignment -name SEED 1