project directories reorganization
This commit is contained in:
36
hw/opae/Makefile
Normal file
36
hw/opae/Makefile
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
ASE_BUILD_DIR=build_ase
|
||||
FPGA_BUILD_DIR=build_fpga
|
||||
|
||||
all: ase fpga
|
||||
|
||||
ase: setup-ase
|
||||
make -C $(ASE_BUILD_DIR)
|
||||
|
||||
fpga: setup-fpga
|
||||
cd $(FPGA_BUILD_DIR) && qsub-synth
|
||||
|
||||
setup-ase: $(ASE_BUILD_DIR)/Makefile
|
||||
|
||||
setup-fpga: $(FPGA_BUILD_DIR)/build/dcp.qpf
|
||||
|
||||
$(ASE_BUILD_DIR)/Makefile:
|
||||
afu_sim_setup --s sources.txt $(ASE_BUILD_DIR)
|
||||
|
||||
$(FPGA_BUILD_DIR)/build/dcp.qpf:
|
||||
afu_synth_setup -s sources.txt $(FPGA_BUILD_DIR)
|
||||
|
||||
run-ase:
|
||||
cd $(ASE_BUILD_DIR) && make sim
|
||||
|
||||
wave:
|
||||
vsim -view $(ASE_BUILD_DIR)/work/vsim.wlf -do wave.do
|
||||
|
||||
run-fpga:
|
||||
# TODO
|
||||
|
||||
clean-ase:
|
||||
rm -rf $(ASE_BUILD_DIR)
|
||||
|
||||
clean-fpga:
|
||||
rm -rf $(FPGA_BUILD_DIR)
|
||||
39
hw/opae/README
Normal file
39
hw/opae/README
Normal file
@@ -0,0 +1,39 @@
|
||||
use the following step to build vortex and run it on fpga on intel cloud server using OPAE.
|
||||
This script is also present at ~/dev/runVortex
|
||||
|
||||
## To configure quartus and opae. Run this after logging in.
|
||||
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
|
||||
#########################
|
||||
## Vortex Run commands ##
|
||||
#########################
|
||||
## Synthesis
|
||||
cd ~/dev/Vortex/driver/hw/
|
||||
# Configure a Quartus build area
|
||||
afu_synth_setup -s sources.txt build_fpga
|
||||
cd build_fpga
|
||||
# Run Quartus in the vLab batch queue
|
||||
qsub-synth
|
||||
# Check if the job is submitted to the queue and running. Status should be R
|
||||
qstat | grep tinebp
|
||||
# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C
|
||||
watch ‘qstat | grep tinebp’
|
||||
## Executing on FPGA
|
||||
# From the build_fpga directory acquire a fpga node
|
||||
qsub-fpga
|
||||
# Go to the directory whree qsub-synth was run above
|
||||
cd $PBS_O_WORKDIR
|
||||
# Load the image onto an FPGA
|
||||
fpgaconf vortex_afu.gbs
|
||||
# If this says Multiple ports. Then use --bus with fpgaconf. #bus info can be found by fpgainfo port
|
||||
#fpgaconf --bus 0xaf vortex_afu.gbs
|
||||
## Running the Test case
|
||||
cd ../../sw/opae
|
||||
make clean
|
||||
make
|
||||
# For shared library
|
||||
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
|
||||
# Run the program
|
||||
cd ../../tests/basic
|
||||
make clean
|
||||
make
|
||||
./basic
|
||||
48
hw/opae/ccip_interface_reg.sv
Normal file
48
hw/opae/ccip_interface_reg.sv
Normal file
@@ -0,0 +1,48 @@
|
||||
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
|
||||
|
||||
// Register all interface signals
|
||||
|
||||
import ccip_if_pkg::*;
|
||||
module ccip_interface_reg(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // 400MHz - CC-P clock domain. Primary Clock
|
||||
input logic pck_cp2af_softReset_T0, // CCI-P ACTIVE HIGH Soft Reset
|
||||
input logic [1:0] pck_cp2af_pwrState_T0, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error_T0, // CCI-P Protocol Error Detected
|
||||
// Interface structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx_T0, // CCI-P Rx Port
|
||||
input t_if_ccip_Tx pck_af2cp_sTx_T0, // CCI-P Tx Port
|
||||
|
||||
output logic pck_cp2af_softReset_T1,
|
||||
output logic [1:0] pck_cp2af_pwrState_T1,
|
||||
output logic pck_cp2af_error_T1,
|
||||
|
||||
output t_if_ccip_Rx pck_cp2af_sRx_T1,
|
||||
output t_if_ccip_Tx pck_af2cp_sTx_T1
|
||||
|
||||
);
|
||||
(* preserve *) logic pck_cp2af_softReset_T0_q;
|
||||
(* preserve *) logic [1:0] pck_cp2af_pwrState_T0_q;
|
||||
(* preserve *) logic pck_cp2af_error_T0_q;
|
||||
(* preserve *) t_if_ccip_Rx pck_cp2af_sRx_T0_q;
|
||||
(* preserve *) t_if_ccip_Tx pck_af2cp_sTx_T0_q;
|
||||
|
||||
always@(posedge pClk)
|
||||
begin
|
||||
pck_cp2af_softReset_T0_q <= pck_cp2af_softReset_T0;
|
||||
pck_cp2af_pwrState_T0_q <= pck_cp2af_pwrState_T0;
|
||||
pck_cp2af_error_T0_q <= pck_cp2af_error_T0;
|
||||
pck_cp2af_sRx_T0_q <= pck_cp2af_sRx_T0;
|
||||
pck_af2cp_sTx_T0_q <= pck_af2cp_sTx_T0;
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin
|
||||
pck_cp2af_softReset_T1 = pck_cp2af_softReset_T0_q;
|
||||
pck_cp2af_pwrState_T1 = pck_cp2af_pwrState_T0_q;
|
||||
pck_cp2af_error_T1 = pck_cp2af_error_T0_q;
|
||||
pck_cp2af_sRx_T1 = pck_cp2af_sRx_T0_q;
|
||||
pck_af2cp_sTx_T1 = pck_af2cp_sTx_T0_q;
|
||||
end
|
||||
|
||||
endmodule
|
||||
172
hw/opae/ccip_std_afu.sv
Normal file
172
hw/opae/ccip_std_afu.sv
Normal file
@@ -0,0 +1,172 @@
|
||||
// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex
|
||||
|
||||
// Top Level Vortex Driver
|
||||
|
||||
// To be done:
|
||||
// Check how to run this with OPAE. Looks like setup issue
|
||||
|
||||
|
||||
`include "platform_if.vh"
|
||||
|
||||
import local_mem_cfg_pkg::*;
|
||||
|
||||
module ccip_std_afu
|
||||
#(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
)
|
||||
(
|
||||
// CCI-P Clocks and Resets
|
||||
input logic pClk, // Primary CCI-P interface clock.
|
||||
input logic pClkDiv2, // Aligned, pClk divided by 2.
|
||||
input logic pClkDiv4, // Aligned, pClk divided by 4.
|
||||
input logic uClk_usr, // User clock domain. Refer to clock programming guide.
|
||||
input logic uClk_usrDiv2, // Aligned, user clock divided by 2.
|
||||
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
|
||||
|
||||
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
|
||||
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
|
||||
|
||||
// CCI-P structures
|
||||
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
|
||||
output t_if_ccip_Tx pck_af2cp_sTx, // CCI-P Tx Port
|
||||
|
||||
// Local memory interface
|
||||
avalon_mem_if.to_fiu local_mem[NUM_LOCAL_MEM_BANKS]
|
||||
);
|
||||
|
||||
// ====================================================================
|
||||
// Pick the proper clk and reset, as chosen by the AFU's JSON file
|
||||
// ====================================================================
|
||||
|
||||
// The platform may transform the CCI-P clock from pClk to a clock
|
||||
// chosen in the AFU's JSON file.
|
||||
logic clk;
|
||||
assign clk = `PLATFORM_PARAM_CCI_P_CLOCK;
|
||||
|
||||
logic reset;
|
||||
assign reset = `PLATFORM_PARAM_CCI_P_RESET;
|
||||
|
||||
|
||||
// ====================================================================
|
||||
// Register signals at interface before consuming them
|
||||
// ====================================================================
|
||||
|
||||
(* noprune *) logic [1:0] cp2af_pwrState_T1;
|
||||
(* noprune *) logic cp2af_error_T1;
|
||||
|
||||
logic reset_T1;
|
||||
t_if_ccip_Rx cp2af_sRx_T1;
|
||||
t_if_ccip_Tx af2cp_sTx_T0;
|
||||
|
||||
ccip_interface_reg inst_green_ccip_interface_reg
|
||||
(
|
||||
.pClk (clk),
|
||||
.pck_cp2af_softReset_T0 (reset),
|
||||
.pck_cp2af_pwrState_T0 (pck_cp2af_pwrState),
|
||||
.pck_cp2af_error_T0 (pck_cp2af_error),
|
||||
.pck_cp2af_sRx_T0 (pck_cp2af_sRx),
|
||||
.pck_af2cp_sTx_T0 (af2cp_sTx_T0),
|
||||
|
||||
.pck_cp2af_softReset_T1 (reset_T1),
|
||||
.pck_cp2af_pwrState_T1 (cp2af_pwrState_T1),
|
||||
.pck_cp2af_error_T1 (cp2af_error_T1),
|
||||
.pck_cp2af_sRx_T1 (cp2af_sRx_T1),
|
||||
.pck_af2cp_sTx_T1 (pck_af2cp_sTx)
|
||||
);
|
||||
|
||||
|
||||
// ====================================================================
|
||||
// User AFU goes here
|
||||
// ====================================================================
|
||||
|
||||
//
|
||||
// vortex_afu depends on CCI-P and local memory being in the same
|
||||
// clock domain. This is accomplished by choosing a common clock
|
||||
// in the AFU's JSON description. The platform instantiates clock-
|
||||
// crossing shims automatically, as needed.
|
||||
//
|
||||
|
||||
//
|
||||
// Memory banks are used very simply here. Only bank is active at
|
||||
// a time, selected by mem_bank_select. mem_bank_select is set
|
||||
// by a CSR from the host.
|
||||
//
|
||||
t_local_mem_byte_mask avs_byteenable;
|
||||
logic avs_waitrequest;
|
||||
t_local_mem_data avs_readdata;
|
||||
logic avs_readdatavalid;
|
||||
t_local_mem_burst_cnt avs_burstcount;
|
||||
t_local_mem_data avs_writedata;
|
||||
t_local_mem_addr avs_address;
|
||||
logic avs_write;
|
||||
logic avs_read;
|
||||
|
||||
// choose which memory bank to test
|
||||
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
|
||||
|
||||
vortex_afu
|
||||
#(
|
||||
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
|
||||
)
|
||||
vortex_afu_inst
|
||||
(
|
||||
.clk (clk),
|
||||
.SoftReset (reset_T1),
|
||||
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
.avs_address (avs_address),
|
||||
.avs_waitrequest (avs_waitrequest),
|
||||
.avs_write (avs_write),
|
||||
.avs_read (avs_read),
|
||||
.avs_byteenable (avs_byteenable),
|
||||
.avs_burstcount (avs_burstcount),
|
||||
.avs_readdatavalid (avs_readdatavalid),
|
||||
.mem_bank_select (mem_bank_select),
|
||||
|
||||
.cp2af_sRxPort (cp2af_sRx_T1),
|
||||
.af2cp_sTxPort (af2cp_sTx_T0)
|
||||
);
|
||||
|
||||
//
|
||||
// Export the local memory interface signals as vectors so that bank
|
||||
// selection can use array syntax.
|
||||
//
|
||||
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
|
||||
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
|
||||
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
|
||||
|
||||
genvar b;
|
||||
generate
|
||||
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
|
||||
begin : lmb
|
||||
always_comb
|
||||
begin
|
||||
// Local memory to AFU signals
|
||||
avs_waitrequest_v[b] = local_mem[b].waitrequest;
|
||||
avs_readdata_v[b] = local_mem[b].readdata;
|
||||
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
|
||||
|
||||
// Replicate address and write data to all banks. Only
|
||||
// the request signals have to be bank-specific.
|
||||
local_mem[b].burstcount = avs_burstcount;
|
||||
local_mem[b].writedata = avs_writedata;
|
||||
local_mem[b].address = avs_address;
|
||||
local_mem[b].byteenable = avs_byteenable;
|
||||
|
||||
// Request a write to this bank?
|
||||
local_mem[b].write = avs_write &&
|
||||
($bits(mem_bank_select)'(b) == mem_bank_select);
|
||||
|
||||
// Request a read from this bank?
|
||||
local_mem[b].read = avs_read &&
|
||||
($bits(mem_bank_select)'(b) == mem_bank_select);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
|
||||
assign avs_readdata = avs_readdata_v[mem_bank_select];
|
||||
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
|
||||
|
||||
endmodule
|
||||
97
hw/opae/opae_setup.sh
Normal file
97
hw/opae/opae_setup.sh
Normal file
@@ -0,0 +1,97 @@
|
||||
|
||||
|
||||
## Required tools
|
||||
# gcc (>4.9)
|
||||
# libjson
|
||||
# python
|
||||
# Quartus
|
||||
# RTL Simulator (VCS or ModelSim or QuestaSim)
|
||||
|
||||
|
||||
|
||||
## Download OPAE SDK from https://github.com/OPAE/opae-sdk/archive/1.4.0-1.tar.gz
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/
|
||||
|
||||
## Update the following file based on /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
# ./opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
|
||||
|
||||
|
||||
###################################################################################################
|
||||
################################### TO BE DONE EVERY TIME #########################################
|
||||
###################################################################################################
|
||||
## Change the shell to bash before running
|
||||
bash
|
||||
|
||||
## Setup Environment
|
||||
## Running the default script results in multiple versions of libcurl during cmake.
|
||||
#source /nethome/achawda6/specialProblem/rg_intel_fpga_end_19.3.sh
|
||||
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
||||
|
||||
## Setup the variables for using the Quartus modelsim
|
||||
source /nethome/achawda6/specialProblem/modelsim_env.sh
|
||||
|
||||
## Run this to setup the environment variables
|
||||
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/ase_setup_template.sh
|
||||
|
||||
## gcc version should be greater than 4.9 to support c++14
|
||||
source /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/env_check.sh
|
||||
|
||||
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
|
||||
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
####################################################################################################
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Setup OPAE
|
||||
mkdir mybuild
|
||||
cd mybuild
|
||||
|
||||
## Update the directory path where you want to install OPAE
|
||||
cmake .. -DBUILD_ASE=1 -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
|
||||
make
|
||||
make install
|
||||
|
||||
|
||||
|
||||
|
||||
## Setup ASE
|
||||
## Add the installed OPAE path in PATH
|
||||
export PATH=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall/bin:${PATH}
|
||||
|
||||
## Use this version of HDL files
|
||||
/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/scripts/afu_sim_setup --sources=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/libopae/plugins/ase/rtl/sources_ase_server.txt run1Build
|
||||
cd run1Build/
|
||||
python scripts/ipc_clean.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Running Sample
|
||||
## Download opae-bbb from https://github.com/OPAE/intel-fpga-bbb
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1
|
||||
git clone https://github.com/OPAE/intel-fpga-bbb
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
mkdir mybuild
|
||||
cd mybuild
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/mybuild/opaeInstall
|
||||
make
|
||||
make install
|
||||
|
||||
export FPGA_BBB_CCI_SRC=/nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Running hello world
|
||||
cd /nethome/achawda6/specialProblem/opae-sdk-1.4.0-1/intel-fpga-bbb/samples/tutorial/01_hello_world
|
||||
afu_sim_setup --source hw/rtl/sources.txt build_sim
|
||||
cd build_sim
|
||||
## Update libstdc++6 if it errors out
|
||||
make
|
||||
make sim
|
||||
8
hw/opae/set_env.sh
Normal file
8
hw/opae/set_env.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
||||
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH
|
||||
export QUARTUS_HOME=$QUARTUS_ROOTDIR
|
||||
export MTI_HOME=/tools/reconfig/intel/19.3/modelsim_ase
|
||||
export FPGA_FAMILY=arria10
|
||||
120
hw/opae/sources.txt
Normal file
120
hw/opae/sources.txt
Normal file
@@ -0,0 +1,120 @@
|
||||
vortex_afu.json
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE_BYTES=64
|
||||
|
||||
+incdir+.
|
||||
+incdir+../../rtl
|
||||
+incdir+../../rtl/shared_memory
|
||||
+incdir+../../rtl/cache
|
||||
+incdir+../../rtl/VX_cache
|
||||
+incdir+../../rtl/interfaces
|
||||
+incdir+../../rtl/pipe_regs
|
||||
+incdir+../../rtl/compat
|
||||
|
||||
../../rtl/VX_define_synth.v
|
||||
../../rtl/VX_define.v
|
||||
../../rtl/VX_cache/VX_cache_config.v
|
||||
../../rtl/Vortex_SOC.v
|
||||
../../rtl/Vortex_Cluster.v
|
||||
../../rtl/Vortex.v
|
||||
../../rtl/VX_front_end.v
|
||||
../../rtl/VX_back_end.v
|
||||
../../rtl/VX_fetch.v
|
||||
../../rtl/VX_scheduler.v
|
||||
../../rtl/VX_execute_unit.v
|
||||
../../rtl/VX_warp.v
|
||||
../../rtl/VX_icache_stage.v
|
||||
../../rtl/VX_gpr_wrapper.v
|
||||
../../rtl/byte_enabled_simple_dual_port_ram.v
|
||||
../../rtl/VX_gpgpu_inst.v
|
||||
../../rtl/VX_writeback.v
|
||||
../../rtl/VX_countones.v
|
||||
../../rtl/VX_csr_handler.v
|
||||
../../rtl/VX_csr_pipe.v
|
||||
../../rtl/VX_generic_queue_ll.v
|
||||
../../rtl/VX_warp_scheduler.v
|
||||
../../rtl/VX_priority_encoder.v
|
||||
../../rtl/VX_generic_queue.v
|
||||
../../rtl/pipe_regs/VX_f_d_reg.v
|
||||
../../rtl/pipe_regs/VX_i_d_reg.v
|
||||
../../rtl/pipe_regs/VX_d_e_reg.v
|
||||
../../rtl/VX_gpr.v
|
||||
../../rtl/VX_gpr_stage.v
|
||||
../../rtl/VX_dmem_controller.v
|
||||
../../rtl/VX_alu.v
|
||||
../../rtl/VX_generic_stack.v
|
||||
../../rtl/VX_generic_priority_encoder.v
|
||||
../../rtl/VX_csr_data.v
|
||||
../../rtl/VX_lsu.v
|
||||
../../rtl/VX_decode.v
|
||||
../../rtl/VX_inst_multiplex.v
|
||||
../../rtl/VX_csr_wrapper.v
|
||||
../../rtl/VX_priority_encoder_w_mask.v
|
||||
../../rtl/VX_generic_register.v
|
||||
../../rtl/VX_lsu_addr_gen.v
|
||||
../../rtl/compat/VX_mult.v
|
||||
../../rtl/compat/VX_divide.v
|
||||
../../rtl/VX_cache/VX_snp_fwd_arb.v
|
||||
../../rtl/VX_cache/VX_cache_dram_req_arb.v
|
||||
../../rtl/VX_cache/VX_cache_dfq_queue.v
|
||||
../../rtl/VX_cache/VX_cache_wb_sel_merge.v
|
||||
../../rtl/VX_cache/VX_mrv_queue.v
|
||||
../../rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v
|
||||
../../rtl/VX_cache/VX_tag_data_access.v
|
||||
../../rtl/VX_cache/VX_cache.v
|
||||
../../rtl/VX_cache/VX_cache_core_req_bank_sel.v
|
||||
../../rtl/VX_cache/VX_cache_req_queue.v
|
||||
../../rtl/VX_cache/VX_bank.v
|
||||
../../rtl/VX_cache/VX_cache_miss_resrv.v
|
||||
../../rtl/VX_cache/VX_fill_invalidator.v
|
||||
../../rtl/VX_cache/VX_tag_data_structure.v
|
||||
../../rtl/VX_cache/VX_prefetcher.v
|
||||
../../rtl/cache/VX_generic_pe.v
|
||||
../../rtl/cache/cache_set.v
|
||||
../../rtl/cache/VX_d_cache.v
|
||||
../../rtl/cache/VX_Cache_Bank.v
|
||||
../../rtl/cache/VX_cache_data_per_index.v
|
||||
../../rtl/cache/VX_d_cache_encapsulate.v
|
||||
../../rtl/cache/VX_cache_bank_valid.v
|
||||
../../rtl/cache/VX_cache_data.v
|
||||
../../rtl/shared_memory/VX_shared_memory_block.v
|
||||
../../rtl/shared_memory/VX_priority_encoder_sm.v
|
||||
../../rtl/shared_memory/VX_shared_memory.v
|
||||
../../rtl/shared_memory/VX_bank_valids.v
|
||||
../../rtl/interfaces/VX_exec_unit_req_inter.v
|
||||
../../rtl/interfaces/VX_branch_response_inter.v
|
||||
../../rtl/interfaces/VX_inst_meta_inter.v
|
||||
../../rtl/interfaces/VX_join_inter.v
|
||||
../../rtl/interfaces/VX_icache_response_inter.v
|
||||
../../rtl/interfaces/VX_gpr_wspawn_inter.v
|
||||
../../rtl/interfaces/VX_inst_exec_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v
|
||||
../../rtl/interfaces/VX_csr_req_inter.v
|
||||
../../rtl/interfaces/VX_icache_request_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_res_inter.v
|
||||
../../rtl/interfaces/VX_frE_to_bckE_req_inter.v
|
||||
../../rtl/interfaces/VX_dram_req_rsp_inter.v
|
||||
../../rtl/interfaces/VX_dcache_request_inter.v
|
||||
../../rtl/interfaces/VX_gpr_data_inter.v
|
||||
../../rtl/interfaces/VX_dcache_response_inter.v
|
||||
../../rtl/interfaces/VX_csr_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_req_inter.v
|
||||
../../rtl/interfaces/VX_lsu_req_inter.v
|
||||
../../rtl/interfaces/VX_gpu_snp_req_rsp.v
|
||||
../../rtl/interfaces/VX_mw_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpr_jal_inter.v
|
||||
../../rtl/interfaces/VX_gpu_inst_req_inter.v
|
||||
../../rtl/interfaces/VX_wstall_inter.v
|
||||
../../rtl/interfaces/VX_wb_inter.v
|
||||
../../rtl/interfaces/VX_gpr_clone_inter.v
|
||||
../../rtl/interfaces/VX_gpr_read_inter.v
|
||||
../../rtl/interfaces/VX_mem_req_inter.v
|
||||
../../rtl/interfaces/VX_jal_response_inter.v
|
||||
../../rtl/interfaces/VX_warp_ctl_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v
|
||||
../../rtl/interfaces/VX_gpu_dcache_dram_res_inter.v
|
||||
../../rtl/interfaces/VX_inst_mem_wb_inter.v
|
||||
|
||||
ccip_interface_reg.sv
|
||||
ccip_std_afu.sv
|
||||
vortex_afu.sv
|
||||
49
hw/opae/vortex_afu.json
Normal file
49
hw/opae/vortex_afu.json
Normal file
@@ -0,0 +1,49 @@
|
||||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto",
|
||||
"clock-frequency-low": "auto",
|
||||
|
||||
"mmio-csr-cmd": 10,
|
||||
"mmio-csr-status": 12,
|
||||
"mmio-csr-io-addr": 14,
|
||||
"mmio-csr-mem-addr": 16,
|
||||
"mmio-csr-data-size": 18,
|
||||
|
||||
"cmd-type-read": 1,
|
||||
"cmd-type-write": 2,
|
||||
"cmd-type-run": 3,
|
||||
"cmd-type-clflush": 4,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
576
hw/opae/vortex_afu.sv
Normal file
576
hw/opae/vortex_afu.sv
Normal file
@@ -0,0 +1,576 @@
|
||||
// Interface between CSR and FSM
|
||||
// All the MMIOs read/write are done from CSR and passed to the FSM for state transitions
|
||||
|
||||
// To be done:
|
||||
// Change address size to buffer's address size and data size based on IO address size. Check from hello_world
|
||||
|
||||
`include "platform_if.vh"
|
||||
import local_mem_cfg_pkg::*;
|
||||
`include "afu_json_info.vh"
|
||||
|
||||
module vortex_afu #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
// global signals
|
||||
input clk,
|
||||
input SoftReset,
|
||||
|
||||
// IF signals between CCI and AFU
|
||||
input t_if_ccip_Rx cp2af_sRxPort,
|
||||
output t_if_ccip_Tx af2cp_sTxPort,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata,
|
||||
input t_local_mem_data avs_readdata,
|
||||
output t_local_mem_addr avs_address,
|
||||
input logic avs_waitrequest,
|
||||
output logic avs_write,
|
||||
output logic avs_read,
|
||||
output t_local_mem_byte_mask avs_byteenable,
|
||||
output t_local_mem_burst_cnt avs_burstcount,
|
||||
input avs_readdatavalid,
|
||||
|
||||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
|
||||
localparam VX_SNOOP_DELAY = 300;
|
||||
localparam VX_SNOOP_LEVELS = 2;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
|
||||
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
|
||||
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
|
||||
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
|
||||
localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH;
|
||||
|
||||
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
|
||||
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
|
||||
localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR;
|
||||
localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR;
|
||||
localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
|
||||
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
typedef enum logic[3:0] {
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_WRITE,
|
||||
STATE_RUN,
|
||||
STATE_CLFLUSH
|
||||
} state_t;
|
||||
|
||||
state_t state;
|
||||
|
||||
// Vortex signals /////////////////////////////////////////////////////////////
|
||||
|
||||
logic vx_dram_req_read;
|
||||
logic vx_dram_req_write;
|
||||
logic [31:0] vx_dram_req_addr;
|
||||
logic [31:0] vx_dram_req_data[15:0];
|
||||
logic vx_dram_req_delay;
|
||||
|
||||
logic vx_dram_fill_accept;
|
||||
logic vx_dram_fill_rsp;
|
||||
logic [31:0] vx_dram_fill_rsp_addr;
|
||||
logic [31:0] vx_dram_fill_rsp_data[15:0];
|
||||
|
||||
logic vx_snp_req;
|
||||
logic [31:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_delay;
|
||||
|
||||
logic vx_ebreak;
|
||||
|
||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
logic avs_raq_push;
|
||||
t_local_mem_addr avs_raq_din;
|
||||
logic avs_raq_pop;
|
||||
t_local_mem_addr avs_raq_dout;
|
||||
logic avs_raq_empty;
|
||||
logic avs_raq_full;
|
||||
|
||||
logic avs_rdq_push;
|
||||
t_local_mem_data avs_rdq_din;
|
||||
logic avs_rdq_pop;
|
||||
t_local_mem_data avs_rdq_dout;
|
||||
logic avs_rdq_empty;
|
||||
logic avs_rdq_full;
|
||||
|
||||
// CSR variables //////////////////////////////////////////////////////////////
|
||||
|
||||
logic [2:0] csr_cmd;
|
||||
t_ccip_clAddr csr_io_addr;
|
||||
t_local_mem_addr csr_mem_addr;
|
||||
logic [31:0] csr_data_size;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMmioHdr mmioHdr;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
end
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c2.hdr <= 0;
|
||||
af2cp_sTxPort.c2.data <= 0;
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 0;
|
||||
csr_cmd <= 0;
|
||||
csr_io_addr <= 0;
|
||||
csr_mem_addr <= 0;
|
||||
csr_data_size <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
csr_cmd <= 0;
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 0;
|
||||
|
||||
// serve MMIO write request
|
||||
if (cp2af_sRxPort.c0.mmioWrValid)
|
||||
begin
|
||||
case (mmioHdr.address)
|
||||
MMIO_CSR_IO_ADDR: begin
|
||||
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
|
||||
end
|
||||
MMIO_CSR_MEM_ADDR: begin
|
||||
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
|
||||
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
|
||||
end
|
||||
MMIO_CSR_DATA_SIZE: begin
|
||||
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
|
||||
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
|
||||
end
|
||||
MMIO_CSR_CMD: begin
|
||||
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
|
||||
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// serve MMIO read requests
|
||||
if (cp2af_sRxPort.c0.mmioRdValid)
|
||||
begin
|
||||
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
|
||||
case (mmioHdr.address)
|
||||
// AFU header
|
||||
16'h0000: af2cp_sTxPort.c2.data <= {
|
||||
4'b0001, // Feature type = AFU
|
||||
8'b0, // reserved
|
||||
4'b0, // afu minor revision = 0
|
||||
7'b0, // reserved
|
||||
1'b1, // end of DFH list = 1
|
||||
24'b0, // next DFH offset = 0
|
||||
4'b0, // afu major revision = 0
|
||||
12'b0 // feature ID = 0
|
||||
};
|
||||
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
|
||||
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
|
||||
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
|
||||
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
|
||||
MMIO_CSR_STATUS: begin
|
||||
if (state != af2cp_sTxPort.c2.data)
|
||||
$display("%t: STATUS: state=%0d", $time, state);
|
||||
af2cp_sTxPort.c2.data <= state;
|
||||
end
|
||||
default: af2cp_sTxPort.c2.data <= 64'h0;
|
||||
endcase
|
||||
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [31:0] cci_write_ctr;
|
||||
logic [31:0] avs_read_ctr;
|
||||
logic [31:0] avs_write_ctr;
|
||||
logic [31:0] vx_snoop_ctr;
|
||||
logic [9:0] vx_snoop_delay;
|
||||
logic vx_reset;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
vx_reset <= 0;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
case (csr_cmd)
|
||||
CMD_TYPE_READ: begin
|
||||
$display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_READ;
|
||||
end
|
||||
CMD_TYPE_WRITE: begin
|
||||
$display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_WRITE;
|
||||
end
|
||||
CMD_TYPE_RUN: begin
|
||||
$display("%t: STATE START", $time);
|
||||
vx_reset <= 1;
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
CMD_TYPE_CLFLUSH: begin
|
||||
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_CLFLUSH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
STATE_READ: begin
|
||||
if (cci_write_ctr >= csr_data_size)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE: begin
|
||||
if (avs_write_ctr >= csr_data_size)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
if (vx_ebreak)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_CLFLUSH: begin
|
||||
if (vx_snoop_delay >= VX_SNOOP_DELAY)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// AVS Controller /////////////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
mem_bank_select <= 0;
|
||||
avs_burstcount <= 1;
|
||||
avs_byteenable <= 64'hffffffffffffffff;
|
||||
avs_address <= 0;
|
||||
avs_writedata <= 0;
|
||||
avs_read <= 0;
|
||||
avs_write <= 0;
|
||||
avs_read_ctr <= 0;
|
||||
avs_write_ctr <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
avs_read <= 0;
|
||||
avs_write <= 0;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
avs_read_ctr <= 0;
|
||||
avs_write_ctr <= 0;
|
||||
end
|
||||
|
||||
STATE_READ: begin
|
||||
if (!avs_raq_full
|
||||
&& !avs_rdq_full
|
||||
&& !avs_waitrequest
|
||||
&& avs_read_ctr < csr_data_size)
|
||||
begin
|
||||
avs_address <= csr_mem_addr + avs_read_ctr;
|
||||
avs_read <= 1;
|
||||
avs_read_ctr <= avs_read_ctr + 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, csr_mem_addr + avs_read_ctr);
|
||||
end
|
||||
end
|
||||
|
||||
STATE_WRITE: begin
|
||||
if (cp2af_sRxPort.c0.rspValid
|
||||
&& avs_write_ctr < csr_data_size)
|
||||
begin
|
||||
avs_writedata <= cp2af_sRxPort.c0.data;
|
||||
avs_address <= csr_mem_addr + avs_write_ctr;
|
||||
avs_write <= 1;
|
||||
avs_write_ctr <= avs_write_ctr + 1;
|
||||
$display("%t: AVS Wr Req: addr=%h (%0d/%0d)", $time, csr_mem_addr + avs_write_ctr, avs_write_ctr + 1, csr_data_size);
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN, STATE_CLFLUSH: begin
|
||||
if (vx_dram_req_read
|
||||
&& !vx_dram_req_delay)
|
||||
begin
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_read <= 1;
|
||||
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
end
|
||||
|
||||
if (vx_dram_req_write
|
||||
&& !vx_dram_req_delay)
|
||||
begin
|
||||
avs_writedata <= {>>{vx_dram_req_data}};
|
||||
avs_address <= (vx_dram_req_addr >> 6);
|
||||
avs_write <= 1;
|
||||
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
|
||||
end
|
||||
end
|
||||
endcase
|
||||
|
||||
if (avs_readdatavalid)
|
||||
begin
|
||||
$display("%t: AVS Rd Rsp", $time);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex DRAM requests stalling
|
||||
|
||||
logic vortex_enabled;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_delay = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_fill_rsp = vortex_enabled && !avs_rdq_empty && vx_dram_fill_accept;
|
||||
vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
|
||||
end
|
||||
|
||||
// AVS address read request queue /////////////////////////////////////////////
|
||||
|
||||
logic cci_write_req;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_raq_pop = vx_dram_fill_rsp || cci_write_req;
|
||||
avs_raq_din = avs_address;
|
||||
avs_raq_push = avs_read;
|
||||
end
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW($bits(t_local_mem_addr)),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) vx_rd_addr_queue (
|
||||
.clk (clk),
|
||||
.reset (SoftReset),
|
||||
.push (avs_raq_push),
|
||||
.in_data (avs_raq_din),
|
||||
.pop (avs_raq_pop),
|
||||
.out_data (avs_raq_dout),
|
||||
.empty (avs_raq_empty),
|
||||
.full (avs_raq_full)
|
||||
);
|
||||
|
||||
// AVS data read response queue ///////////////////////////////////////////////
|
||||
|
||||
always_comb
|
||||
begin
|
||||
avs_rdq_pop = avs_raq_pop;
|
||||
avs_rdq_din = avs_readdata;
|
||||
avs_rdq_push = avs_readdatavalid;
|
||||
end
|
||||
|
||||
VX_generic_queue_ll #(
|
||||
.DATAW($bits(t_local_mem_data)),
|
||||
.SIZE(AVS_RD_QUEUE_SIZE)
|
||||
) vx_rd_data_queue (
|
||||
.clk (clk),
|
||||
.reset (SoftReset),
|
||||
.push (avs_rdq_push),
|
||||
.in_data (avs_rdq_din),
|
||||
.pop (avs_rdq_pop),
|
||||
.out_data (avs_rdq_dout),
|
||||
.empty (avs_rdq_empty),
|
||||
.full (avs_rdq_full)
|
||||
);
|
||||
|
||||
// CCI Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c0_ReqMemHdr rd_hdr;
|
||||
|
||||
logic cci_read_pending;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
rd_hdr.address = csr_io_addr + avs_write_ctr;
|
||||
end
|
||||
|
||||
// Send read requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c0.hdr <= 0;
|
||||
af2cp_sTxPort.c0.valid <= 0;
|
||||
cci_read_pending <= 0;
|
||||
end
|
||||
else begin
|
||||
af2cp_sTxPort.c0.valid <= 0;
|
||||
|
||||
if (STATE_WRITE == state
|
||||
&& !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full
|
||||
&& !avs_waitrequest // ensure AVS write queue not full
|
||||
&& !cci_read_pending // ensure no read pending
|
||||
&& avs_write_ctr < csr_data_size) // ensure not done
|
||||
begin
|
||||
af2cp_sTxPort.c0.hdr <= rd_hdr;
|
||||
af2cp_sTxPort.c0.valid <= 1;
|
||||
cci_read_pending <= 1;
|
||||
$display("%t: CCI Rd Req: addr=%h", $time, rd_hdr.address);
|
||||
end
|
||||
|
||||
if (cci_read_pending
|
||||
&& cp2af_sRxPort.c0.rspValid)
|
||||
begin
|
||||
$display("%t: CCI Rd Rsp", $time);
|
||||
cci_read_pending <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// CCI Write Request //////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_c1_ReqMemHdr wr_hdr;
|
||||
|
||||
logic cci_write_pending;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
cci_write_req = (STATE_READ == state)
|
||||
&& !avs_rdq_empty
|
||||
&& !cp2af_sRxPort.c1TxAlmFull
|
||||
&& !cci_write_pending
|
||||
&& cci_write_ctr < csr_data_size;
|
||||
|
||||
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
wr_hdr.address = csr_io_addr + cci_write_ctr;
|
||||
wr_hdr.sop = 1; // single line write mode
|
||||
end
|
||||
|
||||
// Send write requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
af2cp_sTxPort.c1.hdr <= 0;
|
||||
af2cp_sTxPort.c1.data <= 0;
|
||||
af2cp_sTxPort.c1.valid <= 0;
|
||||
cci_write_ctr <= 0;
|
||||
cci_write_pending <= 0;
|
||||
end
|
||||
else begin
|
||||
af2cp_sTxPort.c1.valid <= 0;
|
||||
|
||||
if (STATE_IDLE == state)
|
||||
begin
|
||||
cci_write_ctr <= 0;
|
||||
end
|
||||
|
||||
if (cci_write_req)
|
||||
begin
|
||||
af2cp_sTxPort.c1.hdr <= wr_hdr;
|
||||
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout);
|
||||
af2cp_sTxPort.c1.valid <= 1;
|
||||
cci_write_pending <= 1;
|
||||
$display("%t: CCI Wr Req: addr=%h", $time, wr_hdr.address);
|
||||
end
|
||||
|
||||
if (cci_write_pending
|
||||
&& cp2af_sRxPort.c1.rspValid)
|
||||
begin
|
||||
cci_write_ctr <= cci_write_ctr + 1;
|
||||
cci_write_pending <= 0;
|
||||
$display("%t: CCI Wr Rsp (%0d/%0d)", $time, cci_write_ctr + 1, csr_data_size);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex cache snooping //////////////////////////////////////////////////////
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (SoftReset)
|
||||
begin
|
||||
vx_snp_req <= 0;
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
else begin
|
||||
if (STATE_IDLE == state)
|
||||
begin
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
|
||||
vx_snp_req <= 0;
|
||||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& !vx_snp_req_delay)
|
||||
begin
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
||||
end
|
||||
|
||||
if (vx_snoop_ctr == csr_data_size)
|
||||
begin
|
||||
vx_snoop_delay <= vx_snoop_delay + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex binding /////////////////////////////////////////////////////////////
|
||||
|
||||
Vortex_SOC #() vx_soc (
|
||||
.clk (clk),
|
||||
.reset (SoftReset || vx_reset),
|
||||
|
||||
// DRAM Req
|
||||
.out_dram_req_write (vx_dram_req_write),
|
||||
.out_dram_req_read (vx_dram_req_read),
|
||||
.out_dram_req_addr (vx_dram_req_addr),
|
||||
.out_dram_req_data (vx_dram_req_data),
|
||||
.out_dram_req_delay (vx_dram_req_delay),
|
||||
|
||||
// DRAM Rsp
|
||||
.out_dram_fill_accept (vx_dram_fill_accept),
|
||||
.out_dram_fill_rsp (vx_dram_fill_rsp),
|
||||
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
|
||||
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
|
||||
|
||||
// Cache Snooping Req
|
||||
.llc_snp_req (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_delay (vx_snp_req_delay),
|
||||
|
||||
// program exit signal
|
||||
.out_ebreak (vx_ebreak)
|
||||
);
|
||||
|
||||
endmodule
|
||||
69
hw/opae/wave.do
Normal file
69
hw/opae/wave.do
Normal file
@@ -0,0 +1,69 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate -label clk /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/clk
|
||||
add wave -noupdate -label reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/SoftReset
|
||||
add wave -noupdate -label state /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/state
|
||||
add wave -noupdate -label cci_write_pending /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_pending
|
||||
add wave -noupdate -label cci_write_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_ctr
|
||||
add wave -noupdate -label csr_data_size -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/csr_data_size
|
||||
add wave -noupdate -label avs_read_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read_ctr
|
||||
add wave -noupdate -label avs_waitrequest /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_waitrequest
|
||||
add wave -noupdate -label avs_address -radix hexadecimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_address
|
||||
add wave -noupdate -label avs_readdata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdata
|
||||
add wave -noupdate -label avs_writedata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_writedata
|
||||
add wave -noupdate -label avs_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_write
|
||||
add wave -noupdate -label avs_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read
|
||||
add wave -noupdate -label avs_readdatavalid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdatavalid
|
||||
add wave -noupdate -label sRx.c0.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c0.rspValid
|
||||
add wave -noupdate -label sRx.c1.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c1.rspValid
|
||||
add wave -noupdate -label sTx.c0.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c0.valid
|
||||
add wave -noupdate -label sTx.c1.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c1.valid
|
||||
add wave -noupdate -label cci_write_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_req
|
||||
add wave -noupdate -label avs_raq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_push
|
||||
add wave -noupdate -label avs_rdq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_push
|
||||
add wave -noupdate -label avs_raq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_pop
|
||||
add wave -noupdate -label avs_rdq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_pop
|
||||
add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_full
|
||||
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
|
||||
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
|
||||
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
|
||||
add wave -noupdate -label vortex_enabled /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vortex_enabled
|
||||
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
|
||||
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
|
||||
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
|
||||
add wave -noupdate -label vx_dram_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_delay
|
||||
add wave -noupdate -label vx_dram_req_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_addr
|
||||
add wave -noupdate -label vx_draw_req_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_data
|
||||
add wave -noupdate -label out_dram_fill_rsp /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_rsp
|
||||
add wave -noupdate -label out_dram_fill_accept /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_accept
|
||||
add wave -noupdate -label vx_draw_fill_rsp_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_data
|
||||
add wave -noupdate -label vx_dram_fill_rsp_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_addr
|
||||
add wave -noupdate -label llc_snp_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req
|
||||
add wave -noupdate -label llc_snp_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req_delay
|
||||
add wave -noupdate -label out_break /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_ebreak
|
||||
add wave -noupdate -label warp_pc -radix hexadecimal -radixshowbase 0 {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_pc}
|
||||
add wave -noupdate -label scheduled_warp {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/scheduled_warp}
|
||||
add wave -noupdate -label thread_mask {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/thread_mask}
|
||||
add wave -noupdate -label warp_num {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_num}
|
||||
add wave -noupdate -label warp_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_active}
|
||||
add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_stalled}
|
||||
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
|
||||
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {360293 ps} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 195
|
||||
configure wave -valuecolwidth 100
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ps
|
||||
update
|
||||
WaveRestoreZoom {346453 ps} {711141 ps}
|
||||
Reference in New Issue
Block a user