fixed OPAE crash, added custom bram module to controll rw collision, dogfood testcase argurment, optimzed buffered fifo, quartus build optimization flags
This commit is contained in:
@@ -7,7 +7,7 @@ CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
|||||||
|
|
||||||
LDFLAGS += -L$(OPAE_HOME)/lib
|
LDFLAGS += -L$(OPAE_HOME)/lib
|
||||||
|
|
||||||
SCOPE=1
|
#SCOPE=1
|
||||||
|
|
||||||
# stack execution protection
|
# stack execution protection
|
||||||
LDFLAGS +=-z noexecstack
|
LDFLAGS +=-z noexecstack
|
||||||
@@ -32,8 +32,6 @@ ASE_LIBS += -luuid -lopae-c-ase
|
|||||||
|
|
||||||
VLSIM_LIBS += -lopae-c-vlsim
|
VLSIM_LIBS += -lopae-c-vlsim
|
||||||
|
|
||||||
LIB_DIR=../lib
|
|
||||||
|
|
||||||
ASE_DIR = ase
|
ASE_DIR = ase
|
||||||
|
|
||||||
VLSIM_DIR = vlsim
|
VLSIM_DIR = vlsim
|
||||||
@@ -67,10 +65,10 @@ fpga: $(SRCS)
|
|||||||
asesim: $(SRCS) $(ASE_DIR)
|
asesim: $(SRCS) $(ASE_DIR)
|
||||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||||
|
|
||||||
vlsim: $(SRCS) opae-vlsim
|
vlsim: $(SRCS) vlsim-hw
|
||||||
$(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||||
|
|
||||||
opae-vlsim:
|
vlsim-hw:
|
||||||
$(SET_SCOPE) $(MAKE) -C vlsim
|
$(SET_SCOPE) $(MAKE) -C vlsim
|
||||||
|
|
||||||
vortex.o: vortex.cpp
|
vortex.o: vortex.cpp
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
|||||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||||
|
|
||||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
@@ -72,9 +72,13 @@ ifdef SCOPE
|
|||||||
SCOPE_VH = $(RTL_DIR)/scope-defs.vh
|
SCOPE_VH = $(RTL_DIR)/scope-defs.vh
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# use our OPAE shim
|
||||||
VL_FLAGS += -DNOPAE
|
VL_FLAGS += -DNOPAE
|
||||||
CFLAGS += -DNOPAE
|
CFLAGS += -DNOPAE
|
||||||
|
|
||||||
|
# use DPI FPU
|
||||||
|
VL_FLAGS += -DFPU_FAST
|
||||||
|
|
||||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||||
|
|
||||||
PROJECT = libopae-c-vlsim.so
|
PROJECT = libopae-c-vlsim.so
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
fpga_result res = _expr; \
|
fpga_result res = _expr; \
|
||||||
if (res == FPGA_OK) \
|
if (res == FPGA_OK) \
|
||||||
break; \
|
break; \
|
||||||
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
|
||||||
#_expr, (int)res, fpgaErrStr(res)); \
|
#_expr, (int)res, fpgaErrStr(res)); \
|
||||||
return -1; \
|
return -1; \
|
||||||
} while (false)
|
} while (false)
|
||||||
@@ -118,7 +118,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||||||
*value = STARTUP_ADDR;
|
*value = STARTUP_ADDR;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "invalid caps id: %d\n", caps_id);
|
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||||
std::abort();
|
std::abort();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -156,7 +156,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
fpgaDestroyProperties(&filter);
|
fpgaDestroyProperties(&filter);
|
||||||
|
|
||||||
if (num_matches < 1) {
|
if (num_matches < 1) {
|
||||||
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -197,9 +197,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
fpgaClose(accel_handle);
|
fpgaClose(accel_handle);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#ifndef NDEBUG
|
||||||
fprintf(stdout, "DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||||
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
|
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SCOPE
|
#ifdef SCOPE
|
||||||
@@ -236,18 +237,18 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||||
assert(ret == 0);
|
assert(ret == 0);
|
||||||
float IPC = (float)(double(instrs) / double(cycles));
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||||
total_instrs += instrs;
|
total_instrs += instrs;
|
||||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||||
}
|
}
|
||||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||||
} else {
|
} else {
|
||||||
uint64_t instrs, cycles;
|
uint64_t instrs, cycles;
|
||||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||||
float IPC = (float)(double(instrs) / double(cycles));
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
assert(ret == 0);
|
assert(ret == 0);
|
||||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -373,7 +374,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
|
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
|
||||||
if (0 == data || 0 == timeout) {
|
if (0 == data || 0 == timeout) {
|
||||||
if (data != 0) {
|
if (data != 0) {
|
||||||
fprintf(stdout, "ready-wait timed out: status=%ld\n", data);
|
fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -90,16 +90,20 @@ vx_buffer_h dst_buf = nullptr;
|
|||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Driver Test." << std::endl;
|
std::cout << "Vortex Driver Test." << std::endl;
|
||||||
std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parse_args(int argc, char **argv) {
|
static void parse_args(int argc, char **argv) {
|
||||||
int c;
|
int c;
|
||||||
while ((c = getopt(argc, argv, "n:s:e:k:ch?")) != -1) {
|
while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'n':
|
case 'n':
|
||||||
count = atoi(optarg);
|
count = atoi(optarg);
|
||||||
break;
|
break;
|
||||||
|
case 't':
|
||||||
|
testid_s = atoi(optarg);
|
||||||
|
testid_e = atoi(optarg);
|
||||||
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
testid_s = atoi(optarg);
|
testid_s = atoi(optarg);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -60,9 +60,9 @@ qsub-sim
|
|||||||
make ase
|
make ase
|
||||||
|
|
||||||
# tests
|
# tests
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1
|
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4
|
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
|
||||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||||
|
|
||||||
# modify "vsim_run.tcl" to dump VCD trace
|
# modify "vsim_run.tcl" to dump VCD trace
|
||||||
@@ -97,7 +97,7 @@ kill -9 <pid>
|
|||||||
# fixing device resource busy issue when deleting /build_ase_1c/
|
# fixing device resource busy issue when deleting /build_ase_1c/
|
||||||
lsof +D build_ase_1c
|
lsof +D build_ase_1c
|
||||||
|
|
||||||
# quick off cache synthesis
|
# quick off synthesis
|
||||||
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
|
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
|
||||||
make -C cache clean && make -C cache > cache/build.log 2>&1 &
|
make -C cache clean && make -C cache > cache/build.log 2>&1 &
|
||||||
make -C core clean && make -C core > core/build.log 2>&1 &
|
make -C core clean && make -C core > core/build.log 2>&1 &
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
+define+SYNTHESIS
|
+define+SYNTHESIS
|
||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
+define+FPU_FAST
|
+define+FPU_FAST
|
||||||
+define+SCOPE
|
#+define+SCOPE
|
||||||
|
|
||||||
#+define+DBG_PRINT_CORE_ICACHE
|
#+define+DBG_PRINT_CORE_ICACHE
|
||||||
#+define+DBG_PRINT_CORE_DCACHE
|
#+define+DBG_PRINT_CORE_DCACHE
|
||||||
|
|||||||
@@ -6,4 +6,21 @@ set_global_assignment -name VERILOG_MACRO QUARTUS
|
|||||||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||||
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||||
|
|
||||||
|
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||||
|
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||||
|
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||||
|
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||||
|
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||||
|
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||||
|
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||||
|
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||||
|
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||||
|
set_global_assignment -name SEED 1
|
||||||
|
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||||
|
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||||
|
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||||
|
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||||
|
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||||
|
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||||
@@ -175,8 +175,9 @@ logic [31:0] cmd_csr_wdata;
|
|||||||
// MMIO controller ////////////////////////////////////////////////////////////
|
// MMIO controller ////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
t_ccip_c0_ReqMmioHdr mmio_hdr;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||||
|
|
||||||
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
||||||
|
|
||||||
@@ -204,9 +205,20 @@ wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hd
|
|||||||
reg scope_start;
|
reg scope_start;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
// disable assertions until reset
|
||||||
|
`ifndef VERILATOR
|
||||||
|
initial begin
|
||||||
|
$assertoff;
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
always_ff @(posedge clk)
|
||||||
begin
|
begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
|
`ifndef VERILATOR
|
||||||
|
$asserton; // enable assertions
|
||||||
|
`endif
|
||||||
|
|
||||||
mmio_tx.hdr <= 0;
|
mmio_tx.hdr <= 0;
|
||||||
mmio_tx.data <= 0;
|
mmio_tx.data <= 0;
|
||||||
mmio_tx.mmioRdValid <= 0;
|
mmio_tx.mmioRdValid <= 0;
|
||||||
@@ -324,7 +336,8 @@ begin
|
|||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
default: begin
|
default: begin
|
||||||
`ifdef DBG_PRINT_OPAE
|
mmio_tx.data <= 64'h0;
|
||||||
|
`ifdef DBG_PRINT_OPAE
|
||||||
$display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address);
|
$display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address);
|
||||||
`endif
|
`endif
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -59,8 +59,6 @@
|
|||||||
`define EXT_F_ENABLE
|
`define EXT_F_ENABLE
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
//`define FPU_FAST
|
|
||||||
|
|
||||||
// Device identification
|
// Device identification
|
||||||
`define VENDOR_ID 0
|
`define VENDOR_ID 0
|
||||||
`define ARCHITECTURE_ID 0
|
`define ARCHITECTURE_ID 0
|
||||||
|
|||||||
@@ -347,7 +347,7 @@ module VX_decode #(
|
|||||||
assign decode_if.rd = rd;
|
assign decode_if.rd = rd;
|
||||||
assign decode_if.rs1 = rs1_qual;
|
assign decode_if.rs1 = rs1_qual;
|
||||||
assign decode_if.rs2 = rs2;
|
assign decode_if.rs2 = rs2;
|
||||||
assign decode_if.rs3 = rs3;
|
assign decode_if.rs3 = 0;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign decode_if.use_rs3 = use_rs3;
|
assign decode_if.use_rs3 = use_rs3;
|
||||||
|
|||||||
@@ -1,70 +0,0 @@
|
|||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
// control module to support multi-cycle read for fp register
|
|
||||||
|
|
||||||
module VX_gpr_fp_ctrl (
|
|
||||||
input wire clk,
|
|
||||||
input wire reset,
|
|
||||||
|
|
||||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
|
||||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
|
||||||
VX_gpr_req_if gpr_req_if,
|
|
||||||
|
|
||||||
// outputs
|
|
||||||
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
|
||||||
VX_gpr_rsp_if gpr_rsp_if
|
|
||||||
);
|
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data;
|
|
||||||
reg rsp_valid;
|
|
||||||
reg [31:0] rsp_pc;
|
|
||||||
reg [`NW_BITS-1:0] rsp_wid;
|
|
||||||
reg read_rs1;
|
|
||||||
|
|
||||||
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1;
|
|
||||||
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
rsp_valid <= 0;
|
|
||||||
rsp_pc <= 0;
|
|
||||||
rsp_rs1_data <= 0;
|
|
||||||
rsp_rs2_data <= 0;
|
|
||||||
rsp_rs3_data <= 0;
|
|
||||||
rsp_wid <= 0;
|
|
||||||
read_rs1 <= 1;
|
|
||||||
end else begin
|
|
||||||
if (rs3_delay) begin
|
|
||||||
read_rs1 <= 0;
|
|
||||||
rsp_wid <= gpr_req_if.wid;
|
|
||||||
end else if (read_fire) begin
|
|
||||||
read_rs1 <= 1;
|
|
||||||
end
|
|
||||||
|
|
||||||
rsp_valid <= gpr_req_if.valid;
|
|
||||||
rsp_wid <= gpr_req_if.wid;
|
|
||||||
rsp_pc <= gpr_req_if.PC;
|
|
||||||
|
|
||||||
if (read_rs1) begin
|
|
||||||
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
|
||||||
end
|
|
||||||
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
|
|
||||||
rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
|
||||||
|
|
||||||
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
// outputs
|
|
||||||
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
|
|
||||||
assign raddr1 = {gpr_req_if.wid, rs1};
|
|
||||||
assign gpr_req_if.ready = ~rs3_delay;
|
|
||||||
|
|
||||||
assign gpr_rsp_if.valid = rsp_valid;
|
|
||||||
assign gpr_rsp_if.wid = rsp_wid;
|
|
||||||
assign gpr_rsp_if.PC = rsp_pc;
|
|
||||||
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
|
|
||||||
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
|
|
||||||
assign gpr_rsp_if.rs3_data = rsp_rs3_data;
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
@@ -12,21 +12,24 @@ module VX_gpr_ram (
|
|||||||
);
|
);
|
||||||
`ifndef ASIC
|
`ifndef ASIC
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
|
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||||
|
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||||
if (we[i]) begin
|
if (we[i]) begin
|
||||||
ram[waddr][i][0] <= wdata[i][07:00];
|
mem[waddr][i][0] <= wdata[i][07:00];
|
||||||
ram[waddr][i][1] <= wdata[i][15:08];
|
mem[waddr][i][1] <= wdata[i][15:08];
|
||||||
ram[waddr][i][2] <= wdata[i][23:16];
|
mem[waddr][i][2] <= wdata[i][23:16];
|
||||||
ram[waddr][i][3] <= wdata[i][31:24];
|
mem[waddr][i][3] <= wdata[i][31:24];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
q1 <= mem[rs1];
|
||||||
|
q2 <= mem[rs2];
|
||||||
end
|
end
|
||||||
|
|
||||||
assign rs1_data = ram[rs1];
|
assign rs1_data = q1;
|
||||||
assign rs2_data = ram[rs2];
|
assign rs2_data = q2;
|
||||||
|
|
||||||
`else
|
`else
|
||||||
|
|
||||||
@@ -134,4 +137,4 @@ module VX_gpr_ram (
|
|||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -15,8 +15,15 @@ module VX_gpr_stage #(
|
|||||||
);
|
);
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
|
reg rsp_valid;
|
||||||
|
reg [`NW_BITS-1:0] rsp_wid;
|
||||||
|
reg [31:0] rsp_pc;
|
||||||
|
reg rs1_is_zero, rs2_is_zero;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
|
||||||
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
|
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
|
||||||
|
|
||||||
|
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||||
|
|
||||||
VX_gpr_ram gpr_ram (
|
VX_gpr_ram gpr_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
@@ -24,60 +31,77 @@ module VX_gpr_stage #(
|
|||||||
.waddr ({writeback_if.wid, writeback_if.rd}),
|
.waddr ({writeback_if.wid, writeback_if.rd}),
|
||||||
.wdata (writeback_if.data),
|
.wdata (writeback_if.data),
|
||||||
.rs1 (raddr1),
|
.rs1 (raddr1),
|
||||||
.rs2 ({gpr_req_if.wid, gpr_req_if.rs2}),
|
.rs2 (raddr2),
|
||||||
.rs1_data (rs1_data),
|
.rs1_data (rs1_data),
|
||||||
.rs2_data (rs2_data)
|
.rs2_data (rs2_data)
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
|
||||||
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.rs1_data (rs1_data),
|
|
||||||
.rs2_data (rs2_data),
|
|
||||||
.raddr1 (raddr1),
|
|
||||||
.gpr_req_if (gpr_req_if),
|
|
||||||
.gpr_rsp_if (gpr_rsp_if)
|
|
||||||
);
|
|
||||||
`else
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data;
|
|
||||||
reg rsp_valid;
|
|
||||||
reg [`NW_BITS-1:0] rsp_wid;
|
|
||||||
reg [31:0] rsp_pc;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
rsp_valid <= 0;
|
rsp_valid <= 0;
|
||||||
rsp_wid <= 0;
|
rsp_wid <= 0;
|
||||||
rsp_pc <= 0;
|
rsp_pc <= 0;
|
||||||
rsp_rs1_data <= 0;
|
rs1_is_zero <= 0;
|
||||||
rsp_rs2_data <= 0;
|
rs2_is_zero <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
rsp_valid <= gpr_req_if.valid;
|
rsp_valid <= gpr_req_if.valid;
|
||||||
rsp_wid <= gpr_req_if.wid;
|
rsp_wid <= gpr_req_if.wid;
|
||||||
rsp_pc <= gpr_req_if.PC;
|
rsp_pc <= gpr_req_if.PC;
|
||||||
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
rs1_is_zero <= (0 == gpr_req_if.rs1);
|
||||||
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
|
rs2_is_zero <= (0 == gpr_req_if.rs2);
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
`ifdef EXT_F_ENABLE
|
||||||
|
|
||||||
|
reg [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||||
|
reg read_rs3, save_rs3;
|
||||||
|
|
||||||
|
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
|
||||||
|
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
rs3_data <= 0;
|
||||||
|
read_rs3 <= 0;
|
||||||
|
end else begin
|
||||||
|
if (rs3_delay) begin
|
||||||
|
read_rs3 <= 1;
|
||||||
|
save_rs3 <= 1;
|
||||||
|
end else if (read_fire) begin
|
||||||
|
read_rs3 <= 0;
|
||||||
|
end
|
||||||
|
if (save_rs3) begin
|
||||||
|
rs3_data <= rs1_data;
|
||||||
|
save_rs3 <= 0;
|
||||||
|
end
|
||||||
|
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
|
||||||
|
assign gpr_req_if.ready = ~rs3_delay;
|
||||||
|
assign gpr_rsp_if.rs3_data = rs3_data;
|
||||||
|
|
||||||
|
`else
|
||||||
|
|
||||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||||
|
|
||||||
assign gpr_req_if.ready = 1;
|
assign gpr_req_if.ready = 1;
|
||||||
|
|
||||||
assign gpr_rsp_if.valid = rsp_valid;
|
|
||||||
assign gpr_rsp_if.wid = rsp_wid;
|
|
||||||
assign gpr_rsp_if.PC = rsp_pc;
|
|
||||||
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
|
|
||||||
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
|
|
||||||
assign gpr_rsp_if.rs3_data = 0;
|
assign gpr_rsp_if.rs3_data = 0;
|
||||||
|
|
||||||
`UNUSED_VAR (gpr_req_if.valid);
|
`UNUSED_VAR (gpr_req_if.valid);
|
||||||
`UNUSED_VAR (gpr_req_if.rs3);
|
`UNUSED_VAR (gpr_req_if.rs3);
|
||||||
`UNUSED_VAR (gpr_req_if.use_rs3);
|
`UNUSED_VAR (gpr_req_if.use_rs3);
|
||||||
`UNUSED_VAR (gpr_rsp_if.ready);
|
`UNUSED_VAR (gpr_rsp_if.ready);
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||||
|
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
|
||||||
|
assign gpr_rsp_if.valid = rsp_valid;
|
||||||
|
assign gpr_rsp_if.wid = rsp_wid;
|
||||||
|
assign gpr_rsp_if.PC = rsp_pc;
|
||||||
|
|
||||||
assign writeback_if.ready = 1'b1;
|
assign writeback_if.ready = 1'b1;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -20,13 +20,13 @@ module VX_ibuffer #(
|
|||||||
localparam ADDRW = $clog2(SIZE);
|
localparam ADDRW = $clog2(SIZE);
|
||||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||||
|
|
||||||
|
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
wire [`NUM_WARPS-1:0] q_full;
|
wire [`NUM_WARPS-1:0] q_full;
|
||||||
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
|
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
|
||||||
wire [DATAW-1:0] q_data_in;
|
wire [DATAW-1:0] q_data_in;
|
||||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
||||||
|
|
||||||
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||||
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
|
|
||||||
|
|
||||||
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
|
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
|
||||||
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
|
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
|
||||||
@@ -36,7 +36,7 @@ module VX_ibuffer #(
|
|||||||
wire writing = enq_fire && (i == ibuf_enq_if.wid);
|
wire writing = enq_fire && (i == ibuf_enq_if.wid);
|
||||||
wire reading = deq_fire && (i == ibuf_deq_if.wid);
|
wire reading = deq_fire && (i == ibuf_deq_if.wid);
|
||||||
|
|
||||||
wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading));
|
wire is_slot0 = (0 == size_r[i]) || ((1 == size_r[i]) && reading);
|
||||||
|
|
||||||
wire push = writing && !is_slot0;
|
wire push = writing && !is_slot0;
|
||||||
wire pop = reading && (size_r[i] != 1);
|
wire pop = reading && (size_r[i] != 1);
|
||||||
@@ -48,36 +48,37 @@ module VX_ibuffer #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.push (push),
|
.push (push),
|
||||||
.data_in (q_data_in),
|
|
||||||
.pop (pop),
|
.pop (pop),
|
||||||
|
.data_in (q_data_in),
|
||||||
.data_out (q_data_prev[i]),
|
.data_out (q_data_prev[i]),
|
||||||
`UNUSED_PIN (empty),
|
`UNUSED_PIN (empty),
|
||||||
`UNUSED_PIN (full),
|
`UNUSED_PIN (full),
|
||||||
`UNUSED_PIN (size)
|
`UNUSED_PIN (size)
|
||||||
);
|
);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (writing && is_slot0) begin
|
|
||||||
q_data_out[i] <= q_data_in;
|
|
||||||
end
|
|
||||||
if (pop) begin
|
|
||||||
q_data_out[i] <= q_data_prev[i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
size_r[i] <= 0;
|
size_r[i] <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (writing && !reading) begin
|
if (writing) begin
|
||||||
size_r[i] <= size_r[i] + SIZEW'(1);
|
if (is_slot0) begin
|
||||||
|
q_data_out[i] <= q_data_in;
|
||||||
|
end
|
||||||
|
if (!reading) begin
|
||||||
|
size_r[i] <= size_r[i] + SIZEW'(1);
|
||||||
|
end
|
||||||
end
|
end
|
||||||
if (reading && !writing) begin
|
if (reading) begin
|
||||||
size_r[i] <= size_r[i] - SIZEW'(1);
|
if (size_r[i] != 1) begin
|
||||||
|
q_data_out[i] <= q_data_prev[i];
|
||||||
|
end
|
||||||
|
if (!writing) begin
|
||||||
|
size_r[i] <= size_r[i] - SIZEW'(1);
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign q_full[i] = (size_r[i] == SIZE);
|
assign q_full[i] = (size_r[i] == SIZE);
|
||||||
assign q_size[i] = size_r[i];
|
assign q_size[i] = size_r[i];
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ module VX_icache_stage #(
|
|||||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (icache_req_fire) begin
|
if (icache_req_fire) begin
|
||||||
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
|
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
|
||||||
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
|
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -41,9 +41,9 @@
|
|||||||
|
|
||||||
`define STRINGIFY(x) `"x`"
|
`define STRINGIFY(x) `"x`"
|
||||||
|
|
||||||
`define STATIC_ASSERT(cond, msg) \
|
`define STATIC_ASSERT(cond, msg) \
|
||||||
generate \
|
generate \
|
||||||
if (!(cond)) $error msg; \
|
if (!(cond)) $error msg; \
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
`define ENABLE_TRACING /* verilator tracing_on */
|
`define ENABLE_TRACING /* verilator tracing_on */
|
||||||
@@ -51,8 +51,8 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
`define USE_FAST_BRAM (* ramstyle="mlab" *)
|
||||||
`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *)
|
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|||||||
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -447,6 +447,8 @@ module VX_bank #(
|
|||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||||
|
end else begin
|
||||||
|
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
|||||||
1
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
1
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -58,6 +58,7 @@ module VX_cache_miss_resrv #(
|
|||||||
);
|
);
|
||||||
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
|
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||||
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||||
|
|
||||||
reg [MRVQ_SIZE-1:0] valid_table;
|
reg [MRVQ_SIZE-1:0] valid_table;
|
||||||
reg [MRVQ_SIZE-1:0] ready_table;
|
reg [MRVQ_SIZE-1:0] ready_table;
|
||||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
|
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
|
||||||
|
|||||||
31
hw/rtl/cache/VX_tag_data_store.v
vendored
31
hw/rtl/cache/VX_tag_data_store.v
vendored
@@ -30,7 +30,6 @@ module VX_tag_data_store #(
|
|||||||
input wire fill_sent
|
input wire fill_sent
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
|
||||||
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
|
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
|
||||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
|
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
|
||||||
reg [`BANK_LINE_COUNT-1:0] dirty;
|
reg [`BANK_LINE_COUNT-1:0] dirty;
|
||||||
@@ -40,8 +39,7 @@ module VX_tag_data_store #(
|
|||||||
assign read_dirty = dirty [read_addr];
|
assign read_dirty = dirty [read_addr];
|
||||||
assign read_dirtyb = dirtyb [read_addr];
|
assign read_dirtyb = dirtyb [read_addr];
|
||||||
assign read_tag = tag [read_addr];
|
assign read_tag = tag [read_addr];
|
||||||
assign read_data = data [read_addr];
|
|
||||||
|
|
||||||
wire do_write = (| write_enable);
|
wire do_write = (| write_enable);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
@@ -69,15 +67,26 @@ module VX_tag_data_store #(
|
|||||||
if (invalidate) begin
|
if (invalidate) begin
|
||||||
valid[write_addr] <= 0;
|
valid[write_addr] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
|
|
||||||
for (integer i = 0; i < WORD_SIZE; i++) begin
|
|
||||||
if (write_enable[j][i]) begin
|
|
||||||
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren;
|
||||||
|
assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}};
|
||||||
|
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8),
|
||||||
|
.SIZE(`BANK_LINE_COUNT),
|
||||||
|
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
|
||||||
|
.BUFFERED(0),
|
||||||
|
.RWCHECK(1)
|
||||||
|
) dp_ram (
|
||||||
|
.clk(clk),
|
||||||
|
.waddr(write_addr),
|
||||||
|
.raddr(read_addr),
|
||||||
|
.wren(ram_wren),
|
||||||
|
.rden(1'b1),
|
||||||
|
.din(write_data),
|
||||||
|
.dout(read_data)
|
||||||
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
117
hw/rtl/libs/VX_dp_ram.v
Normal file
117
hw/rtl/libs/VX_dp_ram.v
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
module VX_dp_ram #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter SIZE = 1,
|
||||||
|
parameter BYTEENW = 1,
|
||||||
|
parameter BUFFERED = 1,
|
||||||
|
parameter RWCHECK = 1,
|
||||||
|
parameter ADDRW = $clog2(SIZE),
|
||||||
|
parameter SIZEW = $clog2(SIZE+1)
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire [ADDRW-1:0] waddr,
|
||||||
|
input wire [ADDRW-1:0] raddr,
|
||||||
|
input wire [BYTEENW-1:0] wren,
|
||||||
|
input wire rden,
|
||||||
|
input wire [DATAW-1:0] din,
|
||||||
|
output wire [DATAW-1:0] dout
|
||||||
|
);
|
||||||
|
|
||||||
|
if (BUFFERED) begin
|
||||||
|
|
||||||
|
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||||
|
reg [DATAW-1:0] dout_r;
|
||||||
|
|
||||||
|
if (BYTEENW > 1) begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
for (integer i = 0; i < BYTEENW; i++) begin
|
||||||
|
if (wren[i])
|
||||||
|
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||||
|
end
|
||||||
|
if (rden)
|
||||||
|
dout_r <= mem[raddr];
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (wren)
|
||||||
|
mem[waddr] <= din;
|
||||||
|
if (rden)
|
||||||
|
dout_r <= mem[raddr];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign dout = dout_r;
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
|
||||||
|
`UNUSED_VAR(rden)
|
||||||
|
|
||||||
|
if (RWCHECK) begin
|
||||||
|
|
||||||
|
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||||
|
|
||||||
|
if (BYTEENW > 1) begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
for (integer i = 0; i < BYTEENW; i++) begin
|
||||||
|
if (wren[i])
|
||||||
|
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (wren)
|
||||||
|
mem[waddr] <= din;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
`ifdef SYNTHESIS
|
||||||
|
reg [DATAW-1:0] din_r;
|
||||||
|
wire writing;
|
||||||
|
|
||||||
|
if (BYTEENW > 1) begin
|
||||||
|
assign writing = (| wren);
|
||||||
|
always @(posedge clk) begin
|
||||||
|
for (integer i = 0; i < BYTEENW; i++) begin
|
||||||
|
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
assign writing = wren;
|
||||||
|
always @(posedge clk) begin
|
||||||
|
din_r <= din;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
reg bypass_r;
|
||||||
|
always @(posedge clk) begin
|
||||||
|
bypass_r <= writing && (raddr == waddr);
|
||||||
|
end
|
||||||
|
|
||||||
|
assign dout = bypass_r ? din_r : mem[raddr];
|
||||||
|
`else
|
||||||
|
assign dout = mem[raddr];
|
||||||
|
`endif
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
|
||||||
|
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||||
|
|
||||||
|
if (BYTEENW > 1) begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
for (integer i = 0; i < BYTEENW; i++) begin
|
||||||
|
if (wren[i])
|
||||||
|
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (wren)
|
||||||
|
mem[waddr] <= din;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign dout = mem[raddr];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
@@ -18,16 +18,11 @@ module VX_generic_queue #(
|
|||||||
output wire [SIZEW-1:0] size
|
output wire [SIZEW-1:0] size
|
||||||
);
|
);
|
||||||
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
|
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
|
||||||
|
|
||||||
|
if (SIZE == 1) begin
|
||||||
|
|
||||||
always @(*) begin
|
|
||||||
assert(!pop || !empty);
|
|
||||||
assert(!push || !full);
|
|
||||||
end
|
|
||||||
|
|
||||||
if (SIZE == 1) begin // (SIZE == 1)
|
|
||||||
|
|
||||||
reg [SIZEW-1:0] size_r;
|
|
||||||
reg [DATAW-1:0] head_r;
|
reg [DATAW-1:0] head_r;
|
||||||
|
reg size_r;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
@@ -35,8 +30,10 @@ module VX_generic_queue #(
|
|||||||
size_r <= 0;
|
size_r <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (push && !pop) begin
|
if (push && !pop) begin
|
||||||
|
assert(!full);
|
||||||
size_r <= 1;
|
size_r <= 1;
|
||||||
end else if (pop && !push) begin
|
end else if (pop && !push) begin
|
||||||
|
assert(!empty);
|
||||||
size_r <= 0;
|
size_r <= 0;
|
||||||
end
|
end
|
||||||
if (push) begin
|
if (push) begin
|
||||||
@@ -50,63 +47,14 @@ module VX_generic_queue #(
|
|||||||
assign full = (size_r != 0);
|
assign full = (size_r != 0);
|
||||||
assign size = size_r;
|
assign size = size_r;
|
||||||
|
|
||||||
end else begin // (SIZE > 1)
|
end else begin
|
||||||
|
|
||||||
`ifdef QUARTUS
|
|
||||||
|
|
||||||
scfifo scfifo_component (
|
|
||||||
.clock (clk),
|
|
||||||
.data (data_in),
|
|
||||||
.rdreq (pop),
|
|
||||||
.wrreq (push),
|
|
||||||
.empty (empty),
|
|
||||||
.full (full),
|
|
||||||
.q (data_out),
|
|
||||||
.sclr (reset),
|
|
||||||
.usedw (),
|
|
||||||
.aclr (),
|
|
||||||
.almost_empty (),
|
|
||||||
.almost_full (),
|
|
||||||
.eccstatus ()
|
|
||||||
);
|
|
||||||
|
|
||||||
defparam
|
|
||||||
scfifo_component.lpm_type = "scfifo",
|
|
||||||
scfifo_component.intended_device_family = "Arria 10",
|
|
||||||
scfifo_component.lpm_numwords = SIZE,
|
|
||||||
scfifo_component.lpm_width = DATAW,
|
|
||||||
scfifo_component.lpm_widthu = $clog2(SIZE),
|
|
||||||
scfifo_component.lpm_showahead = "ON",
|
|
||||||
scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"),
|
|
||||||
scfifo_component.use_eab = "ON";
|
|
||||||
|
|
||||||
reg [SIZEW-1:0] size_r;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
size_r <= 0;
|
|
||||||
end else begin
|
|
||||||
if (push && !pop) begin
|
|
||||||
size_r <= size_r + SIZEW'(1);
|
|
||||||
end
|
|
||||||
if (pop && !push) begin
|
|
||||||
size_r <= size_r - SIZEW'(1);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign size = size_r;
|
|
||||||
|
|
||||||
`else
|
|
||||||
|
|
||||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
|
||||||
|
|
||||||
if (0 == BUFFERED) begin
|
if (0 == BUFFERED) begin
|
||||||
|
|
||||||
reg [SIZEW-1:0] size_r;
|
|
||||||
reg [ADDRW:0] rd_ptr_r;
|
reg [ADDRW:0] rd_ptr_r;
|
||||||
reg [ADDRW:0] wr_ptr_r;
|
reg [ADDRW:0] wr_ptr_r;
|
||||||
|
reg [ADDRW-1:0] used_r;
|
||||||
|
|
||||||
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
|
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
|
||||||
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
|
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
|
||||||
|
|
||||||
@@ -114,111 +62,127 @@ module VX_generic_queue #(
|
|||||||
if (reset) begin
|
if (reset) begin
|
||||||
rd_ptr_r <= 0;
|
rd_ptr_r <= 0;
|
||||||
wr_ptr_r <= 0;
|
wr_ptr_r <= 0;
|
||||||
size_r <= 0;
|
used_r <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (push) begin
|
if (push) begin
|
||||||
|
assert(!full);
|
||||||
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
|
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
|
||||||
if (!pop) begin
|
if (!pop) begin
|
||||||
size_r <= size_r + SIZEW'(1);
|
used_r <= used_r + ADDRW'(1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (pop) begin
|
if (pop) begin
|
||||||
|
assert(!empty);
|
||||||
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
|
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
|
||||||
if (!push) begin
|
if (!push) begin
|
||||||
size_r <= size_r - SIZEW'(1);
|
used_r <= used_r - ADDRW'(1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
VX_dp_ram #(
|
||||||
if (push) begin
|
.DATAW(DATAW),
|
||||||
data[wr_ptr_a] <= data_in;
|
.SIZE(SIZE),
|
||||||
end
|
.BUFFERED(0),
|
||||||
end
|
.RWCHECK(1)
|
||||||
|
) dp_ram (
|
||||||
assign data_out = data[rd_ptr_a];
|
.clk(clk),
|
||||||
assign empty = (wr_ptr_r == rd_ptr_r);
|
.waddr(wr_ptr_a),
|
||||||
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
|
.raddr(rd_ptr_a),
|
||||||
assign size = size_r;
|
.wren(push),
|
||||||
|
.rden(pop),
|
||||||
|
.din(data_in),
|
||||||
|
.dout(data_out)
|
||||||
|
);
|
||||||
|
|
||||||
|
assign empty = (wr_ptr_r == rd_ptr_r);
|
||||||
|
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
|
||||||
|
assign size = {full, used_r};
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
reg [SIZEW-1:0] size_r;
|
wire [DATAW-1:0] dout;
|
||||||
reg [DATAW-1:0] head_r;
|
|
||||||
reg [DATAW-1:0] curr_r;
|
reg [DATAW-1:0] din_r;
|
||||||
reg [ADDRW-1:0] wr_ptr_r;
|
reg [ADDRW-1:0] wr_ptr_r;
|
||||||
reg [ADDRW-1:0] rd_ptr_r;
|
reg [ADDRW-1:0] rd_ptr_r;
|
||||||
reg [ADDRW-1:0] rd_ptr_next_r;
|
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||||
|
reg [ADDRW-1:0] used_r;
|
||||||
reg empty_r;
|
reg empty_r;
|
||||||
reg full_r;
|
reg full_r;
|
||||||
reg bypass_r;
|
reg bypass_r;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
size_r <= 0;
|
wr_ptr_r <= 0;
|
||||||
curr_r <= 0;
|
rd_ptr_r <= 0;
|
||||||
wr_ptr_r <= 0;
|
rd_ptr_n_r <= 1;
|
||||||
rd_ptr_r <= 0;
|
empty_r <= 1;
|
||||||
rd_ptr_next_r <= 1;
|
full_r <= 0;
|
||||||
empty_r <= 1;
|
used_r <= 0;
|
||||||
full_r <= 0;
|
|
||||||
end else begin
|
end else begin
|
||||||
if (push) begin
|
if (push) begin
|
||||||
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
||||||
|
|
||||||
if (!pop) begin
|
if (!pop) begin
|
||||||
empty_r <= 0;
|
empty_r <= 0;
|
||||||
if (size_r == SIZEW'(SIZE-1)) begin
|
if (used_r == ADDRW'(SIZE-1)) begin
|
||||||
full_r <= 1;
|
full_r <= 1;
|
||||||
end
|
end
|
||||||
size_r <= size_r + SIZEW'(1);
|
used_r <= used_r + ADDRW'(1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if (pop) begin
|
if (pop) begin
|
||||||
rd_ptr_r <= rd_ptr_next_r;
|
rd_ptr_r <= rd_ptr_n_r;
|
||||||
|
|
||||||
if (SIZE > 2) begin
|
if (SIZE > 2) begin
|
||||||
rd_ptr_next_r <= rd_ptr_r + ADDRW'(2);
|
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||||
end else begin // (SIZE == 2);
|
end else begin // (SIZE == 2);
|
||||||
rd_ptr_next_r <= ~rd_ptr_next_r;
|
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (!push) begin
|
if (!push) begin
|
||||||
if (size_r == SIZEW'(1)) begin
|
full_r <= 0;
|
||||||
assert(rd_ptr_next_r == wr_ptr_r);
|
if (used_r == ADDRW'(1)) begin
|
||||||
|
assert(rd_ptr_n_r == wr_ptr_r);
|
||||||
empty_r <= 1;
|
empty_r <= 1;
|
||||||
end;
|
end;
|
||||||
full_r <= 0;
|
used_r <= used_r - ADDRW'(1);
|
||||||
size_r <= size_r - SIZEW'(1);
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop));
|
|
||||||
curr_r <= data_in;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||||
head_r <= 0;
|
bypass_r <= 1;
|
||||||
end else begin
|
din_r <= data_in;
|
||||||
if (push) begin
|
end else if (pop)
|
||||||
data[wr_ptr_r] <= data_in;
|
bypass_r <= 0;
|
||||||
end
|
end
|
||||||
head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign data_out = bypass_r ? curr_r : head_r;
|
VX_dp_ram #(
|
||||||
|
.DATAW(DATAW),
|
||||||
|
.SIZE(SIZE),
|
||||||
|
.BUFFERED(1),
|
||||||
|
.RWCHECK(0)
|
||||||
|
) dp_ram (
|
||||||
|
.clk(clk),
|
||||||
|
.waddr(wr_ptr_r),
|
||||||
|
.raddr(rd_ptr_n_r),
|
||||||
|
.wren(push),
|
||||||
|
.rden(pop),
|
||||||
|
.din(data_in),
|
||||||
|
.dout(dout)
|
||||||
|
);
|
||||||
|
|
||||||
|
assign data_out = bypass_r ? din_r : dout;
|
||||||
assign empty = empty_r;
|
assign empty = empty_r;
|
||||||
assign full = full_r;
|
assign full = full_r;
|
||||||
assign size = size_r;
|
assign size = {full_r, used_r};
|
||||||
end
|
end
|
||||||
|
|
||||||
`endif
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -43,17 +43,19 @@ set_global_assignment -name VERILOG_MACRO FPU_FAST
|
|||||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
|
||||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
|
||||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
|
||||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
|
||||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||||
set_global_assignment -name SEED 1
|
set_global_assignment -name SEED 1
|
||||||
|
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||||
|
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||||
|
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||||
|
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||||
|
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||||
|
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||||
|
|
||||||
set idx 0
|
set idx 0
|
||||||
foreach arg $q_args_orig {
|
foreach arg $q_args_orig {
|
||||||
|
|||||||
Reference in New Issue
Block a user