Merge branch 'master' into graphics
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
|
||||
|
||||
CFLAGS += -DUSE_VLSIM -fPIC -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
@@ -13,7 +12,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
@@ -22,15 +21,9 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_VLSIM $(CONFIGS)
|
||||
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
@@ -49,10 +42,11 @@ TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
@@ -83,16 +77,20 @@ endif
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_DPI
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h
|
||||
|
||||
@@ -10,10 +10,23 @@
|
||||
|
||||
#define RESET_DELAY 4
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 24
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#ifndef MEM_RQ_SIZE
|
||||
#define MEM_RQ_SIZE 16
|
||||
#endif
|
||||
|
||||
#ifndef MEM_STALLS_MODULO
|
||||
#define MEM_STALLS_MODULO 16
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
uint64_t timestamp = 0;
|
||||
|
||||
@@ -23,7 +36,7 @@ double sc_time_stamp() {
|
||||
|
||||
opae_sim::opae_sim() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(2);
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
@@ -137,16 +150,19 @@ void opae_sim::flush() {
|
||||
|
||||
void opae_sim::reset() {
|
||||
|
||||
host_buffers_.clear();
|
||||
dram_reads_.clear();
|
||||
host_buffers_.clear();
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
vortex_afu_->avs_waitrequest = 0;
|
||||
|
||||
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
|
||||
mem_reads_[b].clear();
|
||||
vortex_afu_->avs_readdatavalid[b] = 0;
|
||||
vortex_afu_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
|
||||
vortex_afu_->reset = 1;
|
||||
|
||||
@@ -268,84 +284,89 @@ void opae_sim::sTxPort_bus() {
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
// update DRAM responses schedule
|
||||
for (auto& rsp : dram_reads_) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
|
||||
// schedule DRAM responses in FIFO order
|
||||
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
|
||||
if (!dram_reads_.empty()
|
||||
&& (0 == dram_reads_.begin()->cycles_left)) {
|
||||
dram_rd_it = dram_reads_.begin();
|
||||
}
|
||||
|
||||
// send DRAM response
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
if (dram_rd_it != dram_reads_.end()) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
uint32_t addr = dram_rd_it->addr;
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
|
||||
// update memory responses schedule
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
||||
// handle DRAM stalls
|
||||
bool dram_stalled = false;
|
||||
#ifdef ENABLE_DRAM_STALLS
|
||||
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
|
||||
dram_stalled = true;
|
||||
} else
|
||||
if (dram_reads_.size() >= DRAM_RQ_SIZE) {
|
||||
dram_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process DRAM requests
|
||||
if (!dram_stalled) {
|
||||
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
|
||||
if (vortex_afu_->avs_write) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
uint64_t byteen = vortex_afu_->avs_byteenable;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
||||
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
ram_[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
// schedule memory responses in FIFO order
|
||||
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
|
||||
if (!mem_reads_[b].empty()
|
||||
&& (0 == mem_reads_[b].begin()->cycles_left)) {
|
||||
mem_rd_it = mem_reads_[b].begin();
|
||||
}
|
||||
if (vortex_afu_->avs_read) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
dram_rd_req_t dram_req;
|
||||
dram_req.addr = vortex_afu_->avs_address;
|
||||
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.data.data());
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
for (auto& rsp : dram_reads_) {
|
||||
if (dram_req.addr == rsp.addr) {
|
||||
dram_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dram_reads_.emplace_back(dram_req);
|
||||
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
|
||||
for (auto& req : dram_reads_) {
|
||||
|
||||
// send memory response
|
||||
vortex_afu_->avs_readdatavalid[b] = 0;
|
||||
if (mem_rd_it != mem_reads_[b].end()) {
|
||||
vortex_afu_->avs_readdatavalid[b] = 1;
|
||||
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_rd_it->addr;
|
||||
mem_reads_[b].erase(mem_rd_it);
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vortex_afu_->avs_waitrequest = dram_stalled;
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
|
||||
if (vortex_afu_->avs_write[b]) {
|
||||
uint64_t byteen = vortex_afu_->avs_byteenable[b];
|
||||
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
ram_[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
}
|
||||
if (vortex_afu_->avs_read[b]) {
|
||||
mem_rd_req_t mem_req;
|
||||
mem_req.addr = vortex_afu_->avs_address[b];
|
||||
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_reads_[b].emplace_back(mem_req);
|
||||
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
|
||||
vortex_afu_->avs_waitrequest[b] = mem_stalled;
|
||||
}
|
||||
}
|
||||
@@ -1,14 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include "verilated.h"
|
||||
//#include "verilated_stub.h"
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
#include "ram.h"
|
||||
|
||||
#include <ostream>
|
||||
@@ -16,7 +18,10 @@
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
@@ -40,9 +45,9 @@ private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} dram_rd_req_t;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
@@ -77,7 +82,7 @@ private:
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
|
||||
std::list<dram_rd_req_t> dram_reads_;
|
||||
std::list<mem_rd_req_t> mem_reads_ [PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`include "vortex_afu.vh"
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
module vortex_afu_shim #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
`include "VX_define.vh"
|
||||
|
||||
module vortex_afu_shim (
|
||||
// global signals
|
||||
input clk,
|
||||
input reset,
|
||||
@@ -69,24 +72,22 @@ module vortex_afu_shim #(
|
||||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||
|
||||
// Avalon signals for local memory access
|
||||
output t_local_mem_data avs_writedata,
|
||||
input t_local_mem_data avs_readdata,
|
||||
output t_local_mem_addr avs_address,
|
||||
input logic avs_waitrequest,
|
||||
output logic avs_write,
|
||||
output logic avs_read,
|
||||
output t_local_mem_byte_mask avs_byteenable,
|
||||
output t_local_mem_burst_cnt avs_burstcount,
|
||||
input avs_readdatavalid,
|
||||
|
||||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
|
||||
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
|
||||
);
|
||||
|
||||
t_if_ccip_Rx cp2af_sRxPort;
|
||||
t_if_ccip_Tx af2cp_sTxPort;
|
||||
|
||||
vortex_afu #(
|
||||
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
|
||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)
|
||||
) afu (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
@@ -100,8 +101,7 @@ vortex_afu #(
|
||||
.avs_read(avs_read),
|
||||
.avs_byteenable(avs_byteenable),
|
||||
.avs_burstcount(avs_burstcount),
|
||||
.avs_readdatavalid(avs_readdatavalid),
|
||||
.mem_bank_select(mem_bank_select)
|
||||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
t_if_ccip_c0_RxHdr c0_RxHdr;
|
||||
|
||||
Reference in New Issue
Block a user