performance refactoring - rebalanced stream buffers accross the device to enforce output buffering rule at compoments boudaries, finally resolved block ram R/W collusion discrepencies,

This commit is contained in:
Blaise Tine
2020-12-19 02:45:06 -08:00
parent 29cd2f5dff
commit 4bbd7bf408
76 changed files with 1313 additions and 1098 deletions

View File

@@ -12,3 +12,15 @@
/core/*
!/core/Makefile
/core8/*
!/core8/Makefile
/top1/*
!/top1/Makefile
/top2/*
!/top2/Makefile
/top8/*
!/top8/Makefile

View File

@@ -0,0 +1,72 @@
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NUM_THREADS=8"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View File

@@ -1,6 +1,6 @@
set_time_format -unit ns -decimal_places 3
create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
create_clock -name {clk} -period "220 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View File

@@ -31,6 +31,7 @@ set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
@@ -39,7 +40,14 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name VERILOG_MACRO FPU_FAST
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
@@ -50,12 +58,6 @@ set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name POWER_USE_TA_VALUE 65
set_global_assignment -name SEED 1
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set idx 0
foreach arg $q_args_orig {

View File

@@ -0,0 +1,40 @@
package require cmdline
set options {
{ "project.arg" "" "Project name" }
{ "outdir.arg" "timing-html" "Output directory" }
}
array set opts [::cmdline::getoptions quartus(args) $options]
# Verify required parameters
set requiredParameters {project}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_open $opts(project)
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
create_timing_netlist
read_sdc
update_timing_netlist
foreach_in_collection op [get_available_operating_conditions] {
set_operating_conditions $op
report_timing -setup -npaths 150 -detail full_path -multi_corner -pairs_only -nworst 8 \
-file "$opts(outdir)/timing_paths_$op.html" \
-panel_name "Critical paths for $op"
create_slack_histogram -num_bins 50 -clock clk -multi_corner -file "$opts(outdir)/slack_histogram_$op.html"
}

View File

@@ -1,24 +0,0 @@
project_open VX_pipeline
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
create_timing_netlist
read_sdc
update_timing_netlist
foreach_in_collection op [get_available_operating_conditions] {
set_operating_conditions $op
report_timing -setup -npaths 150 -detail full_path -multi_corner -pairs_only -nworst 8 \
-file "bin/timing_paths_$op.html" \
-panel_name "Critical paths for $op"
create_slack_histogram -num_bins 50 -clock clk -multi_corner -file "bin/slack_histogram_$op.html"
}

View File

@@ -0,0 +1,72 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=1"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View File

@@ -0,0 +1,72 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=2"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View File

@@ -0,0 +1,74 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
#DEVICE = 1SX280HN2F43E2VG
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox