Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
This commit is contained in:
1
sim/opaesim/.gitignore
vendored
Normal file
1
sim/opaesim/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/obj_dir/*
|
||||
134
sim/opaesim/Makefile
Normal file
134
sim/opaesim/Makefile
Normal file
@@ -0,0 +1,134 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
AFU_DIR = $(RTL_DIR)/afu/opae
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common -I$(abspath $(DESTDIR))
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
# Control logic analyzer monitors
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
|
||||
|
||||
# AFU parameters
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
|
||||
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += $(DBG_SCOPE_FLAGS)
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O3 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SCOPE_JSON = $(DESTDIR)/scope.json
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
PROJECT = libopae-c-sim.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/vortex.xml:
|
||||
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $(DESTDIR)/vortex.xml
|
||||
|
||||
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml
|
||||
$(SCRIPT_DIR)/scope.py $(DESTDIR)/vortex.xml -o $(DESTDIR)/scope.json
|
||||
|
||||
$(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(AFU_DIR)/vortex_afu.vh -o $(DESTDIR)/vortex_afu.h
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
||||
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/vortex.xml $(DESTDIR)/scope.json $(DESTDIR)/vortex_afu.h $(DESTDIR)/$(PROJECT)
|
||||
154
sim/opaesim/fpga.cpp
Normal file
154
sim/opaesim/fpga.cpp
Normal file
@@ -0,0 +1,154 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
|
||||
__unused (token, prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
|
||||
__unused (prop, objtype);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
|
||||
__unused (prop, guid);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyProperties(fpga_properties *prop) {
|
||||
__unused (prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
|
||||
__unused (filters, num_filters, num_filters, tokens, max_tokens);
|
||||
if (num_matches) {
|
||||
*num_matches = 1;
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyToken(fpga_token *token) {
|
||||
__unused (token);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filters, uint64_t* lms) {
|
||||
__unused (filters);
|
||||
if (lms) {
|
||||
#if (XLEN == 64)
|
||||
*lms = 0x200000000; // 8 GB
|
||||
#else
|
||||
*lms = 0x100000000; // 4 GB
|
||||
#endif
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
__unused (token);
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
*handle = reinterpret_cast<fpga_handle>(sim);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaClose(fpga_handle handle) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
delete sim;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
if (ret != 0)
|
||||
return FPGA_NO_MEMORY;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->release_buffer(wsid);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
|
||||
if (NULL == handle || ioaddr == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->get_io_address(wsid, ioaddr);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
if (NULL == handle || mmio_num != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->write_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
if (NULL == handle || mmio_num != 0 || value == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->read_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
54
sim/opaesim/fpga.h
Normal file
54
sim/opaesim/fpga.h
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
FPGA_OK = 0, /**< Operation completed successfully */
|
||||
FPGA_INVALID_PARAM, /**< Invalid parameter supplied */
|
||||
FPGA_BUSY, /**< Resource is busy */
|
||||
FPGA_EXCEPTION, /**< An exception occurred */
|
||||
FPGA_NOT_FOUND, /**< A required resource was not found */
|
||||
FPGA_NO_MEMORY, /**< Not enough memory to complete operation */
|
||||
FPGA_NOT_SUPPORTED, /**< Requested operation is not supported */
|
||||
FPGA_NO_DRIVER, /**< Driver is not loaded */
|
||||
FPGA_NO_DAEMON, /**< FPGA Daemon (fpgad) is not running */
|
||||
FPGA_NO_ACCESS, /**< Insufficient privileges or permissions */
|
||||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef enum {
|
||||
FPGA_DEVICE = 0,
|
||||
FPGA_ACCELERATOR
|
||||
} fpga_objtype;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
typedef void *fpga_properties;
|
||||
|
||||
typedef uint8_t fpga_guid[16];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __FPGA_H__
|
||||
553
sim/opaesim/opae_sim.cpp
Normal file
553
sim/opaesim/opae_sim.cpp
Normal file
@@ -0,0 +1,553 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "opae_sim.h"
|
||||
|
||||
#include <verilated.h>
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CYCLE_RATIO
|
||||
#define MEM_CYCLE_RATIO -1
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RAND_MOD 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
#define CPU_GPU_LATENCY 200
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class opae_sim::Impl {
|
||||
public:
|
||||
Impl()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0) {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
// create RTL module instance
|
||||
device_ = new Vvortex_afu_shim();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC();
|
||||
device_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
ram_ = new RAM(RAM_PAGE_SIZE);
|
||||
|
||||
// initialize dram simulator
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(MEMORY_BANKS));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(1);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// launch execution thread
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
while (!stop_) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
this->tick();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
aligned_free(buffer.second.data);
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->close();
|
||||
delete trace_;
|
||||
#endif
|
||||
delete device_;
|
||||
|
||||
delete ram_;
|
||||
|
||||
if (dram_) {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
}
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < len; ++i) {
|
||||
((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
}
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
buffer.ioaddr = uintptr_t(alloc);
|
||||
auto buffer_id = host_buffer_ids_++;
|
||||
host_buffers_.emplace(buffer_id, buffer);
|
||||
*buf_addr = alloc;
|
||||
*wsid = buffer_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->tick();
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(device_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = device_->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(device_->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->tick();
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void reset() {
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
device_->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
pending_mem_reqs_[b].clear();
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
device_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void tick() {
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
if (dram_->send(dram_queue_.front()))
|
||||
dram_queue_.pop();
|
||||
}
|
||||
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
dram_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
dram_->tick();
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void eval() {
|
||||
device_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| device_->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
device_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
|
||||
device_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void sTxPort_bus() {
|
||||
// process read requests
|
||||
if (device_->af2cp_sTxPort_c0_valid) {
|
||||
assert(!device_->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = device_->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (device_->af2cp_sTxPort_c1_valid) {
|
||||
assert(!device_->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = device_->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, device_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
device_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
device_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void avs_bus() {
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
// process memory responses
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
&& (*pending_mem_reqs_[b].begin())->ready) {
|
||||
auto mem_rd_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_req = *mem_rd_it;
|
||||
device_->avs_readdatavalid[b] = 1;
|
||||
memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_req->addr;
|
||||
pending_mem_reqs_[b].erase(mem_rd_it);
|
||||
delete mem_req;
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE;
|
||||
if (device_->avs_write[b]) {
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
} else
|
||||
if (device_->avs_read[b]) {
|
||||
auto mem_req = new mem_rd_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : pending_mem_reqs_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
}
|
||||
|
||||
device_->avs_waitrequest[b] = false;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
bool ready;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t* data;
|
||||
size_t size;
|
||||
uint64_t ioaddr;
|
||||
} host_buffer_t;
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t*> pending_mem_reqs_[MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM* ram_;
|
||||
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
|
||||
Vvortex_afu_shim *device_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: impl_(new Impl())
|
||||
{}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
return impl_->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
impl_->release_buffer(wsid);
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
impl_->get_io_address(wsid, ioaddr);
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
impl_->write_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
impl_->read_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
43
sim/opaesim/opae_sim.h
Normal file
43
sim/opaesim/opae_sim.h
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
8
sim/opaesim/verilator.vlt
Normal file
8
sim/opaesim/verilator.vlt
Normal file
@@ -0,0 +1,8 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
|
||||
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
|
||||
lint_off -file "*/fpnew/src/*"
|
||||
|
||||
lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv"
|
||||
lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv"
|
||||
178
sim/opaesim/vortex_afu_shim.sv
Normal file
178
sim/opaesim/vortex_afu_shim.sv
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`include "vortex_afu.vh"
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
||||
// global signals
|
||||
input clk,
|
||||
input reset,
|
||||
|
||||
// IF signals between CCI and AFU
|
||||
input logic vcp2af_sRxPort_c0_TxAlmFull,
|
||||
input logic vcp2af_sRxPort_c1_TxAlmFull,
|
||||
|
||||
input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used,
|
||||
input logic vcp2af_sRxPort_c0_hdr_rsvd1,
|
||||
input logic vcp2af_sRxPort_c0_hdr_hit_miss,
|
||||
input logic [1:0] vcp2af_sRxPort_c0_hdr_rsvd0,
|
||||
input t_ccip_clNum vcp2af_sRxPort_c0_hdr_cl_num,
|
||||
input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type,
|
||||
input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata,
|
||||
input t_ccip_clData vcp2af_sRxPort_c0_data,
|
||||
input logic vcp2af_sRxPort_c0_rspValid,
|
||||
input logic vcp2af_sRxPort_c0_mmioRdValid,
|
||||
input logic vcp2af_sRxPort_c0_mmioWrValid,
|
||||
|
||||
input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address,
|
||||
input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length,
|
||||
input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd,
|
||||
input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid,
|
||||
|
||||
input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used,
|
||||
input logic vcp2af_sRxPort_c1_hdr_rsvd1,
|
||||
input logic vcp2af_sRxPort_c1_hdr_hit_miss,
|
||||
input logic vcp2af_sRxPort_c1_hdr_format,
|
||||
input logic vcp2af_sRxPort_c1_hdr_rsvd0,
|
||||
input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num,
|
||||
input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type,
|
||||
input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata,
|
||||
input logic vcp2af_sRxPort_c1_rspValid,
|
||||
|
||||
output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel,
|
||||
output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1,
|
||||
output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len,
|
||||
output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type,
|
||||
output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0,
|
||||
output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address,
|
||||
output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata,
|
||||
output logic af2cp_sTxPort_c0_valid,
|
||||
|
||||
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2,
|
||||
output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel,
|
||||
output logic af2cp_sTxPort_c1_hdr_sop,
|
||||
output logic af2cp_sTxPort_c1_hdr_rsvd1,
|
||||
output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len,
|
||||
output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type,
|
||||
output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0,
|
||||
output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address,
|
||||
output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata,
|
||||
output t_ccip_clData af2cp_sTxPort_c1_data,
|
||||
output logic af2cp_sTxPort_c1_valid,
|
||||
|
||||
output t_ccip_tid af2cp_sTxPort_c2_hdr_tid,
|
||||
output logic af2cp_sTxPort_c2_mmioRdValid,
|
||||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
|
||||
);
|
||||
|
||||
t_if_ccip_Rx cp2af_sRxPort;
|
||||
t_if_ccip_Tx af2cp_sTxPort;
|
||||
|
||||
vortex_afu #(
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
|
||||
) afu (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.cp2af_sRxPort(cp2af_sRxPort),
|
||||
.af2cp_sTxPort(af2cp_sTxPort),
|
||||
.avs_writedata(avs_writedata),
|
||||
.avs_readdata(avs_readdata),
|
||||
.avs_address(avs_address),
|
||||
.avs_waitrequest(avs_waitrequest),
|
||||
.avs_write(avs_write),
|
||||
.avs_read(avs_read),
|
||||
.avs_byteenable(avs_byteenable),
|
||||
.avs_burstcount(avs_burstcount),
|
||||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
t_if_ccip_c0_RxHdr c0_RxHdr;
|
||||
always @ (*) begin
|
||||
c0_RxHdr = 'x;
|
||||
if (vcp2af_sRxPort_c0_mmioWrValid || vcp2af_sRxPort_c0_mmioRdValid) begin
|
||||
c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address;
|
||||
c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length;
|
||||
c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd;
|
||||
c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid;
|
||||
end else begin
|
||||
c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used;
|
||||
c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1;
|
||||
c0_RxHdr.rspMemHdr.hit_miss = vcp2af_sRxPort_c0_hdr_hit_miss;
|
||||
c0_RxHdr.rspMemHdr.rsvd0 = vcp2af_sRxPort_c0_hdr_rsvd0;
|
||||
c0_RxHdr.rspMemHdr.cl_num = vcp2af_sRxPort_c0_hdr_cl_num;
|
||||
c0_RxHdr.rspMemHdr.resp_type = vcp2af_sRxPort_c0_hdr_resp_type;
|
||||
c0_RxHdr.rspMemHdr.mdata = vcp2af_sRxPort_c0_hdr_mdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull;
|
||||
assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull;
|
||||
|
||||
assign cp2af_sRxPort.c0.hdr = c0_RxHdr;
|
||||
assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data;
|
||||
assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid;
|
||||
assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid;
|
||||
assign cp2af_sRxPort.c0.mmioWrValid = vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
assign cp2af_sRxPort.c1.hdr.vc_used = vcp2af_sRxPort_c1_hdr_vc_used;
|
||||
assign cp2af_sRxPort.c1.hdr.rsvd1 = vcp2af_sRxPort_c1_hdr_rsvd1;
|
||||
assign cp2af_sRxPort.c1.hdr.hit_miss = vcp2af_sRxPort_c1_hdr_hit_miss;
|
||||
assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format;
|
||||
assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0;
|
||||
assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num;
|
||||
assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type;
|
||||
assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata;
|
||||
assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid;
|
||||
|
||||
assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel;
|
||||
assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1;
|
||||
assign af2cp_sTxPort_c0_hdr_cl_len = af2cp_sTxPort.c0.hdr.cl_len;
|
||||
assign af2cp_sTxPort_c0_hdr_req_type = af2cp_sTxPort.c0.hdr.req_type;
|
||||
assign af2cp_sTxPort_c0_hdr_rsvd0 = af2cp_sTxPort.c0.hdr.rsvd0;
|
||||
assign af2cp_sTxPort_c0_hdr_address = af2cp_sTxPort.c0.hdr.address;
|
||||
assign af2cp_sTxPort_c0_hdr_mdata = af2cp_sTxPort.c0.hdr.mdata;
|
||||
assign af2cp_sTxPort_c0_valid = af2cp_sTxPort.c0.valid;
|
||||
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd2 = af2cp_sTxPort.c1.hdr.rsvd2;
|
||||
assign af2cp_sTxPort_c1_hdr_vc_sel = af2cp_sTxPort.c1.hdr.vc_sel;
|
||||
assign af2cp_sTxPort_c1_hdr_sop = af2cp_sTxPort.c1.hdr.sop;
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd1 = af2cp_sTxPort.c1.hdr.rsvd1;
|
||||
assign af2cp_sTxPort_c1_hdr_cl_len = af2cp_sTxPort.c1.hdr.cl_len;
|
||||
assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type;
|
||||
assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0;
|
||||
assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address;
|
||||
assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata;
|
||||
assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data;
|
||||
assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid;
|
||||
|
||||
assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid;
|
||||
assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid;
|
||||
assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data;
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user