fixed l3cache hang using memory arbiter in afu

This commit is contained in:
Blaise Tine
2020-11-15 06:36:32 -08:00
parent 2e0f51af80
commit 5d58bf3d11
20 changed files with 514 additions and 388 deletions

View File

@@ -4,20 +4,21 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../../../hw CFLAGS += -I../../../../hw
# control RTL debug print states # control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
@@ -59,7 +60,7 @@ VL_FLAGS += verilator.vlt
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS) VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else else
VL_FLAGS += -DNDEBUG VL_FLAGS += -DNDEBUG
@@ -78,7 +79,7 @@ VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE CFLAGS += -DNOPAE
# use DPI FPU # use DPI FPU
#VL_FLAGS += -DFPU_FAST VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
@@ -94,7 +95,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
$(PROJECT): $(SRCS) $(SCOPE_VH) $(PROJECT): $(SRCS) $(SCOPE_VH)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk make -j -C obj_dir -f V$(TOP).mk
clean: clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh

View File

@@ -9,15 +9,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
@@ -70,7 +71,7 @@ all: $(PROJECT)
$(PROJECT): $(SRCS) $(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk make -j -C obj_dir -f V$(TOP).mk
clean: clean:
rm -rf $(PROJECT) obj_dir rm -rf $(PROJECT) obj_dir

View File

@@ -14,17 +14,29 @@ union Float_t {
} parts; } parts;
}; };
inline float fround(float x, int32_t precision = 4) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) { inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
auto tolerance = std::max(fabs(a), fabs(b)) * eps; auto tolerance = std::min(fabs(a), fabs(b)) * eps;
return fabs(a - b) <= tolerance; return fabs(a - b) <= tolerance;
} }
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) { inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
Float_t fa{a}, fb{b}; Float_t fa{a}, fb{b};
return std::abs(fa.i - fb.i) <= ulp; auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
} }
inline bool almost_equal(float a, float b) { inline bool almost_equal(float a, float b) {
if (almost_equal_eps(a, b))
return true;
return almost_equal_ulp(a, b); return almost_equal_ulp(a, b);
} }
@@ -158,8 +170,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -186,8 +198,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -214,8 +226,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -242,8 +254,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -270,8 +282,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -298,8 +310,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -326,8 +338,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -354,8 +366,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -384,8 +396,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -412,8 +424,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n); a[i] = fround((n - i) * (1.0f/n));
b[i] = (n + i) * (1.0f/n); b[i] = fround((n + i) * (1.0f/n));
} }
} }
@@ -442,7 +454,7 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
int q = 1.0f + (i % 64); float q = 1.0f + (i % 64);
a[i] = q; a[i] = q;
b[i] = q; b[i] = q;
} }
@@ -471,8 +483,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = (n/2 - i) * (1.0f/n); a[i] = fround((n/2 - i) * (1.0f/n));
b[i] = (n/2 - i) * (1.0f/n); b[i] = fround((n/2 - i) * (1.0f/n));
} }
} }
@@ -500,8 +512,8 @@ public:
auto a = (float*)src1; auto a = (float*)src1;
auto b = (float*)src2; auto b = (float*)src2;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
a[i] = i * (1.0f/n); a[i] = fround(i * (1.0f/n));
b[i] = i * (1.0f/n); b[i] = fround(i * (1.0f/n));
} }
} }

128
hw/opae/VX_avs_wrapper.v Normal file
View File

@@ -0,0 +1,128 @@
`include "VX_platform.vh"
module VX_avs_wrapper #(
parameter AVS_DATAW = 1,
parameter AVS_ADDRW = 1,
parameter AVS_BURSTW = 1,
parameter AVS_BANKS = 1,
parameter REQ_TAGW = 1,
parameter RD_QUEUE_SIZE = 1,
parameter AVS_BYTEENW = (AVS_DATAW / 8),
parameter RD_QUEUE_ADDRW= $clog2(RD_QUEUE_SIZE+1),
parameter AVS_BANKS_BITS= $clog2(AVS_BANKS)
) (
input wire clk,
input wire reset,
// AVS bus
output wire [AVS_DATAW-1:0] avs_writedata,
input wire [AVS_DATAW-1:0] avs_readdata,
output wire [AVS_ADDRW-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURSTW-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect,
// DRAM request
input wire dram_req_valid,
input wire dram_req_rw,
input wire [AVS_BYTEENW-1:0] dram_req_byteen,
input wire [AVS_ADDRW-1:0] dram_req_addr,
input wire [AVS_DATAW-1:0] dram_req_data,
input wire [REQ_TAGW-1:0] dram_req_tag,
output wire dram_req_ready,
// DRAM response
output wire dram_rsp_valid,
output wire [AVS_DATAW-1:0] dram_rsp_data,
output wire [REQ_TAGW-1:0] dram_rsp_tag,
input wire dram_rsp_ready
);
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURSTW-1:0] avs_burstcount_r;
wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready;
wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready;
wire avs_rdq_push = avs_readdatavalid;
wire avs_rdq_pop = avs_rtq_pop;
wire avs_rdq_empty;
reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads;
wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n;
assign avs_pending_reads_n = avs_pending_reads
+ RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 :
(avs_rdq_pop && !avs_rtq_push) ? -1 : 0);
always @(posedge clk) begin
if (reset) begin
avs_burstcount_r <= 1;
avs_bankselect_r <= 0;
avs_pending_reads <= 0;
end else begin
avs_pending_reads <= avs_pending_reads_n;
end
end
VX_generic_queue #(
.DATAW (REQ_TAGW),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_rtq_push),
.data_in (dram_req_tag),
.pop (avs_rtq_pop),
.data_out (dram_rsp_tag),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
VX_generic_queue #(
.DATAW (AVS_DATAW),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_rdq_push),
.data_in (avs_readdata),
.pop (avs_rdq_pop),
.data_out (dram_rsp_data),
.empty (avs_rdq_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
assign avs_read = dram_req_valid && !dram_req_rw;
assign avs_write = dram_req_valid && dram_req_rw;
assign avs_address = dram_req_addr;
assign avs_byteenable = dram_req_byteen;
assign avs_writedata = dram_req_data;
assign dram_req_ready = !avs_waitrequest
&& (avs_pending_reads < RD_QUEUE_SIZE);
assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r;
assign dram_rsp_valid = !avs_rdq_empty;
`ifdef DBG_PRINT_AVS
always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin
if (dram_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata);
else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n);
end
if (dram_rsp_valid && dram_rsp_ready) begin
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n);
end
end
`endif
endmodule

View File

@@ -10,12 +10,14 @@
#+define+DBG_PRINT_CACHE_BANK #+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP #+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ #+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_CACHE_TAG
#+define+DBG_PRINT_CACHE_DATA #+define+DBG_PRINT_CACHE_DATA
#+define+DBG_PRINT_DRAM #+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE #+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE #+define+DBG_PRINT_OPAE
#+define+DBG_CORE_REQ_INFO #+define+DBG_PRINT_AVS
#+define+DBG_PRINT_SCOPE #+define+DBG_PRINT_SCOPE
#+define+DBG_CACHE_REQ_INFO
vortex_afu.json vortex_afu.json
QI:vortex_afu.qsf QI:vortex_afu.qsf

View File

@@ -39,11 +39,13 @@ module vortex_afu #(
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH); localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH); localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH);
localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW);
localparam AVS_RD_QUEUE_SIZE = 16; localparam AVS_RD_QUEUE_SIZE = 16;
localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX;
localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
@@ -134,28 +136,12 @@ wire [31:0] vx_csr_io_rsp_data;
wire vx_csr_io_rsp_ready; wire vx_csr_io_rsp_ready;
reg vx_reset; reg vx_reset;
reg vx_enabled;
wire vx_busy; wire vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
wire avs_rtq_push;
wire avs_rtq_pop;
`DEBUG_BEGIN
wire avs_rtq_empty;
wire avs_rtq_full;
`DEBUG_BEGIN
wire avs_rdq_push;
wire avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
wire avs_rdq_empty;
`DEBUG_BEGIN
wire avs_rdq_full;
`DEBUG_END
// CMD variables ////////////////////////////////////////////////////////////// // CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr; t_ccip_clAddr cmd_io_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
@@ -167,9 +153,9 @@ wire cmd_scope_write;
`endif `endif
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
reg [11:0] cmd_csr_addr; reg [11:0] cmd_csr_addr;
reg [31:0] cmd_csr_rdata; reg [31:0] cmd_csr_rdata;
reg [31:0] cmd_csr_wdata; reg [31:0] cmd_csr_wdata;
// MMIO controller //////////////////////////////////////////////////////////// // MMIO controller ////////////////////////////////////////////////////////////
@@ -189,6 +175,7 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi
assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address); assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address);
`endif `endif
/*
`DEBUG_BEGIN `DEBUG_BEGIN
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
@@ -201,6 +188,7 @@ wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata; wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata;
`DEBUG_END `DEBUG_END
*/
wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0;
@@ -220,13 +208,8 @@ always @(posedge clk) begin
`ifndef VERILATOR `ifndef VERILATOR
$asserton; // enable assertions $asserton; // enable assertions
`endif `endif
mmio_tx.hdr <= 0; mmio_tx.hdr <= 0;
mmio_tx.data <= 0;
mmio_tx.mmioRdValid <= 0; mmio_tx.mmioRdValid <= 0;
cmd_io_addr <= 0;
cmd_mem_addr <= 0;
cmd_data_size <= 0;
`ifdef SCOPE `ifdef SCOPE
scope_start <= 0; scope_start <= 0;
`endif `endif
@@ -359,10 +342,10 @@ wire cmd_run_done;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
state <= STATE_IDLE; state <= STATE_IDLE;
vx_reset <= 0; vx_reset <= 0;
end vx_enabled <= 0;
else begin end else begin
vx_reset <= 0; vx_reset <= 0;
@@ -385,7 +368,8 @@ always @(posedge clk) begin
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: STATE START", $time); $display("%t: STATE START", $time);
`endif `endif
vx_reset <= 1; vx_reset <= 1;
vx_enabled <= 1;
state <= STATE_START; state <= STATE_START;
end end
CMD_CLFLUSH: begin CMD_CLFLUSH: begin
@@ -480,215 +464,186 @@ end
// AVS Controller ///////////////////////////////////////////////////////////// // AVS Controller /////////////////////////////////////////////////////////////
wire vortex_enabled; wire dram_req_valid;
wire cci_rdq_empty; wire dram_req_rw;
t_local_mem_byte_mask dram_req_byteen;
t_local_mem_addr dram_req_addr;
t_local_mem_data dram_req_data;
wire [AVS_REQ_TAGW:0] dram_req_tag;
wire dram_req_ready;
wire dram_rsp_valid;
t_local_mem_data dram_rsp_data;
wire [AVS_REQ_TAGW:0] dram_rsp_tag;
wire dram_rsp_ready;
wire cci_dram_req_valid;
wire cci_dram_req_rw;
t_local_mem_byte_mask cci_dram_req_byteen;
t_local_mem_addr cci_dram_req_addr;
t_local_mem_data cci_dram_req_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag;
wire cci_dram_req_ready;
wire cci_dram_rsp_valid;
t_local_mem_data cci_dram_rsp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag;
wire cci_dram_rsp_ready;
wire vx_dram_req_valid_qual;
t_local_mem_addr vx_dram_req_addr_qual;
t_local_mem_byte_mask vx_dram_req_byteen_qual;
t_local_mem_data vx_dram_req_data_qual;
wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual;
wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual;
wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx;
wire cci_dram_rd_req_fire; //--
wire cci_dram_wr_req_fire;
wire vx_dram_rd_req_fire;
`DEBUG_BEGIN
wire vx_dram_wr_req_fire;
`DEBUG_END
wire vx_dram_rd_rsp_fire;
t_local_mem_byte_mask vx_dram_req_byteen_; assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid;
reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads;
wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next;
wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire cci_dram_rd_req_enable, cci_dram_wr_req_enable; assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr;
wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; assign cci_dram_req_rw = (CMD_MEM_WRITE == state);
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); assign cci_dram_req_byteen = {64{1'b1}};
assign cci_dram_rd_req_enable = (state == STATE_READ) assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
&& (cci_dram_rd_req_ctr != 0);
assign cci_dram_wr_req_enable = (state == STATE_WRITE) assign cci_dram_req_tag = AVS_REQ_TAGW'(0);
&& !cci_rdq_empty
&& (cci_dram_wr_req_ctr < cmd_data_size);
assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE); `UNUSED_VAR (cci_dram_rsp_tag)
assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && !vx_dram_req_rw;
assign vx_dram_wr_req_enable = vx_dram_req_enable && vx_dram_req_valid && vx_dram_req_rw;
assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && !avs_waitrequest; //--
assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && !avs_waitrequest;
assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && !avs_waitrequest; assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled;
assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest;
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
assign avs_pending_reads_next = avs_pending_reads
+ $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW; assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_req_byteen_ = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]) << (VX_DRAM_LINE_LW - 3)); assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
end else begin end else begin
assign vx_dram_req_offset = 0; assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0);
assign vx_dram_req_byteen_ = vx_dram_req_byteen; assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
assign vx_dram_req_tag_qual = vx_dram_req_tag;
assign vx_dram_req_data_qual = vx_dram_req_data;
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
end end
always @(*) begin assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
case (state) assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
endcase
case (state) //--
CMD_MEM_READ: avs_byteenable = 64'hffffffffffffffff;
CMD_MEM_WRITE: avs_byteenable = 64'hffffffffffffffff;
default: avs_byteenable = vx_dram_req_byteen_;
endcase
case (state) VX_mem_arb #(
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; .NUM_REQUESTS (2),
default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset; .DATA_WIDTH ($bits(t_local_mem_data)),
endcase .ADDR_WIDTH ($bits(t_local_mem_addr)),
end .TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
) vx_cci_avs_arb (
.clk (clk),
.reset (reset),
assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable; // Source request
assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; .req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}),
.req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}),
.req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}),
.req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}),
.req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}),
.req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}),
.req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}),
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size); // Source response
.rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}),
.rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}),
.rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}),
.rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}),
always @(posedge clk) begin // DRAM request
if (reset) .req_valid_out (dram_req_valid),
begin .req_rw_out (dram_req_rw),
mem_bank_select <= 0; .req_byteen_out (dram_req_byteen),
avs_burstcount <= 1; .req_addr_out (dram_req_addr),
cci_dram_rd_req_addr <= 0; .req_data_out (dram_req_data),
cci_dram_wr_req_addr <= 0; .req_tag_out (dram_req_tag),
cci_dram_rd_req_ctr <= 0; .req_ready_out (dram_req_ready),
cci_dram_wr_req_ctr <= 0;
avs_pending_reads <= 0; // DRAM response
end .rsp_valid_in (dram_rsp_valid),
else begin .rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
if (state == STATE_IDLE) begin .rsp_ready_in (dram_rsp_ready)
if (CMD_MEM_READ == cmd_type) begin
cci_dram_rd_req_addr <= cmd_mem_addr;
cci_dram_rd_req_ctr <= cmd_data_size;
end
else if (CMD_MEM_WRITE == cmd_type) begin
cci_dram_wr_req_addr <= cmd_mem_addr;
cci_dram_wr_req_ctr <= 0;
end
end
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next);
`endif
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
`endif
end
`ifdef DBG_PRINT_OPAE
if (vx_dram_rd_req_fire) begin
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_pending_reads_next);
end
if (vx_dram_wr_req_fire) begin
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_writedata);
end
if (avs_readdatavalid) begin
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_next);
end
`endif
avs_pending_reads <= avs_pending_reads_next;
end
end
// Vortex DRAM requests
assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest;
// Vortex DRAM fill response
assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_rsp_data = (`VX_DRAM_LINE_WIDTH)'(avs_rdq_dout >> vx_dram_rsp_offset);
end else begin
assign vx_dram_rsp_data = avs_rdq_dout;
end
// AVS address read request queue /////////////////////////////////////////////
assign avs_rtq_push = vx_dram_rd_req_fire;
assign avs_rtq_pop = vx_dram_rd_rsp_fire;
VX_generic_queue #(
.DATAW (`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW),
.SIZE (AVS_RD_QUEUE_SIZE)
) avs_rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_rtq_push),
.data_in ({vx_dram_req_tag, vx_dram_req_offset}),
.pop (avs_rtq_pop),
.data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}),
.empty (avs_rtq_empty),
.full (avs_rtq_full),
`UNUSED_PIN (size)
); );
// AVS data read response queue /////////////////////////////////////////////// //--
wire cci_wr_req_fire; VX_avs_wrapper #(
.AVS_DATAW ($bits(t_local_mem_data)),
.AVS_ADDRW ($bits(t_local_mem_addr)),
.AVS_BURSTW ($bits(t_local_mem_burst_cnt)),
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAGW (AVS_REQ_TAGW+1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
) avs_wrapper (
.clk (clk),
.reset (reset),
assign avs_rdq_push = avs_readdatavalid; // AVS bus
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; .avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.avs_bankselect (mem_bank_select),
VX_generic_queue #( // DRAM request
.DATAW (DRAM_LINE_WIDTH), .dram_req_valid (dram_req_valid),
.SIZE (AVS_RD_QUEUE_SIZE) .dram_req_rw (dram_req_rw),
) avs_rd_rsp_queue ( .dram_req_byteen (dram_req_byteen),
.clk (clk), .dram_req_addr (dram_req_addr),
.reset (reset), .dram_req_data (dram_req_data),
.push (avs_rdq_push), .dram_req_tag (dram_req_tag),
.data_in (avs_readdata), .dram_req_ready (dram_req_ready),
.pop (avs_rdq_pop),
.data_out (avs_rdq_dout), // DRAM response
.empty (avs_rdq_empty), .dram_rsp_valid (dram_rsp_valid),
.full (avs_rdq_full), .dram_rsp_data (dram_rsp_data),
`UNUSED_PIN (size) .dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready)
); );
// CCI-P Read Request /////////////////////////////////////////////////////////// // CCI-P Read Request ///////////////////////////////////////////////////////////
reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
t_ccip_clAddr cci_rd_req_addr; t_ccip_clAddr cci_rd_req_addr;
wire cci_rd_req_fire, cci_rd_rsp_fire;
reg cci_rd_req_enable, cci_rd_req_wait; reg cci_rd_req_enable, cci_rd_req_wait;
wire cci_rdq_push, cci_rdq_pop; wire cci_rdq_push, cci_rdq_pop;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
wire cci_rdq_empty;
always @(*) begin always @(*) begin
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
@@ -696,8 +651,10 @@ always @(*) begin
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
end end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid; wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready;
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
wire cci_rd_req_fire = af2cp_sTxPort.c0.valid;
wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
@@ -712,28 +669,36 @@ assign cci_pending_reads_next = cci_pending_reads
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
(!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
assign cci_dram_wr_req_valid = !cci_rdq_empty;
assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size);
// Send read requests to CCI // Send read requests to CCI
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
cci_rd_req_addr <= 0; cci_rd_req_addr <= 0;
cci_rd_req_ctr <= 0; cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0; cci_rd_rsp_ctr <= 0;
cci_pending_reads <= 0; cci_pending_reads <= 0;
cci_rd_req_enable <= 0; cci_rd_req_enable <= 0;
cci_rd_req_wait <= 0; cci_rd_req_wait <= 0;
cci_dram_wr_req_ctr <= 0;
end end
else begin else begin
if ((STATE_IDLE == state) if ((STATE_IDLE == state)
&& (CMD_MEM_WRITE == cmd_type)) begin && (CMD_MEM_WRITE == cmd_type)) begin
cci_rd_req_addr <= cmd_io_addr; cci_rd_req_addr <= cmd_io_addr;
cci_rd_req_ctr <= 0; cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0; cci_rd_rsp_ctr <= 0;
cci_pending_reads <= 0; cci_pending_reads <= 0;
cci_rd_req_enable <= (cmd_data_size != 0); cci_rd_req_enable <= (cmd_data_size != 0);
cci_rd_req_wait <= 0; cci_rd_req_wait <= 0;
cci_dram_wr_req_ctr <= 0;
cci_dram_wr_req_addr_unqual <= cmd_mem_addr;
end end
cci_rd_req_enable <= (STATE_WRITE == state) cci_rd_req_enable <= (STATE_WRITE == state)
@@ -768,6 +733,11 @@ always @(posedge clk) begin
`endif `endif
end end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
end
cci_pending_reads <= cci_pending_reads_next; cci_pending_reads <= cci_pending_reads_next;
end end
end end
@@ -811,57 +781,61 @@ VX_generic_queue #(
reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes;
wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual;
t_ccip_clAddr cci_wr_req_addr; t_ccip_clAddr cci_wr_req_addr;
reg cci_wr_req_enable;
wire cci_wr_rsp_fire;
always @(*) begin always @(*) begin
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
af2cp_sTxPort.c1.data = t_ccip_clData'(avs_rdq_dout); af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data);
end end
assign cci_wr_req_fire = af2cp_sTxPort.c1.valid; wire cci_wr_req_fire = af2cp_sTxPort.c1.valid;
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready;
assign cci_pending_writes_next = cci_pending_writes assign cci_pending_writes_next = cci_pending_writes
+ $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
(!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0);
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual;
assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid;
assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull;
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
// Send write requests to CCI // Send write requests to CCI
always @(posedge clk) always @(posedge clk)
begin begin
if (reset) begin if (reset) begin
cci_wr_req_addr <= 0; cci_wr_req_addr <= 0;
cci_wr_req_ctr <= 0; cci_wr_req_ctr <= 0;
cci_wr_req_enable <= 0; cci_pending_writes <= 0;
cci_pending_writes <= 0; cci_dram_rd_req_ctr <= 0;
end end
else begin else begin
if ((STATE_IDLE == state) if ((STATE_IDLE == state)
&& (CMD_MEM_READ == cmd_type)) begin && (CMD_MEM_READ == cmd_type)) begin
cci_wr_req_addr <= cmd_io_addr; cci_wr_req_addr <= cmd_io_addr;
cci_wr_req_ctr <= cmd_data_size; cci_wr_req_ctr <= cmd_data_size;
cci_pending_writes <= 0; cci_pending_writes <= 0;
end cci_dram_rd_req_ctr <= cmd_data_size;
cci_dram_rd_req_addr_unqual <= cmd_mem_addr;
cci_wr_req_enable <= (STATE_READ == state) end
&& (cci_pending_writes_next < CCI_RW_QUEUE_SIZE)
&& !cp2af_sRxPort.c1TxAlmFull;
if (cci_wr_req_fire) begin if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0); assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE `ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout); $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
`endif `endif
end end
@@ -871,6 +845,11 @@ begin
end end
`endif `endif
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
end
cci_pending_writes <= cci_pending_writes_next; cci_pending_writes <= cci_pending_writes_next;
end end
end end

View File

@@ -553,42 +553,42 @@ module VX_cluster #(
VX_mem_arb #( VX_mem_arb #(
.NUM_REQUESTS (`L2NUM_REQUESTS), .NUM_REQUESTS (`L2NUM_REQUESTS),
.WORD_SIZE (`L2BANK_LINE_SIZE), .DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`DDRAM_TAG_WIDTH), .TAG_IN_WIDTH (`DDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH) .TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH)
) dram_arb ( ) dram_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core request // Core request
.mem_req_valid_in (core_dram_req_valid), .req_valid_in (core_dram_req_valid),
.mem_req_rw_in (core_dram_req_rw), .req_rw_in (core_dram_req_rw),
.mem_req_byteen_in (core_dram_req_byteen), .req_byteen_in (core_dram_req_byteen),
.mem_req_addr_in (core_dram_req_addr), .req_addr_in (core_dram_req_addr),
.mem_req_data_in (core_dram_req_data), .req_data_in (core_dram_req_data),
.mem_req_tag_in (core_dram_req_tag), .req_tag_in (core_dram_req_tag),
.mem_req_ready_in (core_dram_req_ready), .req_ready_in (core_dram_req_ready),
// Core response // Core response
.mem_rsp_valid_in (core_dram_rsp_valid), .rsp_valid_out (core_dram_rsp_valid),
.mem_rsp_data_in (core_dram_rsp_data), .rsp_data_out (core_dram_rsp_data),
.mem_rsp_tag_in (core_dram_rsp_tag), .rsp_tag_out (core_dram_rsp_tag),
.mem_rsp_ready_in (core_dram_rsp_ready), .rsp_ready_out (core_dram_rsp_ready),
// DRAM request // DRAM request
.mem_req_valid_out (dram_req_valid), .req_valid_out (dram_req_valid),
.mem_req_rw_out (dram_req_rw), .req_rw_out (dram_req_rw),
.mem_req_byteen_out (dram_req_byteen), .req_byteen_out (dram_req_byteen),
.mem_req_addr_out (dram_req_addr), .req_addr_out (dram_req_addr),
.mem_req_data_out (dram_req_data), .req_data_out (dram_req_data),
.mem_req_tag_out (dram_req_tag), .req_tag_out (dram_req_tag),
.mem_req_ready_out (dram_req_ready), .req_ready_out (dram_req_ready),
// DRAM response // DRAM response
.mem_rsp_valid_out (dram_rsp_valid), .rsp_valid_in (dram_rsp_valid),
.mem_rsp_tag_out (dram_rsp_tag), .rsp_tag_in (dram_rsp_tag),
.mem_rsp_data_out (dram_rsp_data), .rsp_data_in (dram_rsp_data),
.mem_rsp_ready_out (dram_rsp_ready) .rsp_ready_in (dram_rsp_ready)
); );
end end

View File

@@ -234,10 +234,10 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CORE_REQ_INFO // pc, rd, wid `ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS) `define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
`else `else
`define DBG_CORE_REQ_MDATAW 0 `define DBG_CACHE_REQ_MDATAW 0
`endif `endif
////////////////////////// Dcache Configurable Knobs ////////////////////////// ////////////////////////// Dcache Configurable Knobs //////////////////////////
@@ -249,7 +249,7 @@
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE) `define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits // Core request tag bits
`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS) `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// DRAM request data bits // DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8) `define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
@@ -287,7 +287,7 @@
`define ICORE_TAG_ID_BITS `NW_BITS `define ICORE_TAG_ID_BITS `NW_BITS
// Core request tag bits // Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS) `define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits // DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8) `define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)

View File

@@ -39,10 +39,6 @@ module VX_gpr_stage #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
rsp_valid <= 0; rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rs1_is_zero <= 0;
rs2_is_zero <= 0;
end else begin end else begin
rsp_valid <= gpr_req_if.valid; rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid; rsp_wid <= gpr_req_if.wid;

View File

@@ -45,7 +45,7 @@ module VX_icache_stage #(
// Can accept new request? // Can accept new request?
assign ifetch_req_if.ready = icache_req_if.ready; assign ifetch_req_if.ready = icache_req_if.ready;
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag}; assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
`else `else
assign icache_req_if.tag = req_tag; assign icache_req_if.tag = req_tag;

View File

@@ -144,7 +144,7 @@ module VX_lsu_unit #(
assign dcache_req_if.addr = req_addr; assign dcache_req_if.addr = req_addr;
assign dcache_req_if.data = req_data; assign dcache_req_if.data = req_data;
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag}; assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag};
`else `else
assign dcache_req_if.tag = req_tag; assign dcache_req_if.tag = req_tag;

View File

@@ -2,46 +2,46 @@
module VX_mem_arb #( module VX_mem_arb #(
parameter NUM_REQUESTS = 1, parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1, parameter DATA_WIDTH = 1,
parameter TAG_IN_WIDTH = 1, parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1, parameter TAG_OUT_WIDTH = 1,
parameter WORD_WIDTH = WORD_SIZE * 8, parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQUESTS) parameter REQS_BITS = `CLOG2(NUM_REQUESTS)
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// input requests // input requests
input wire [NUM_REQUESTS-1:0] mem_req_valid_in, input wire [NUM_REQUESTS-1:0] req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in, input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in, input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQUESTS-1:0] mem_req_rw_in, input wire [NUM_REQUESTS-1:0] req_rw_in,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in, input wire [NUM_REQUESTS-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in, input wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] req_data_in,
output wire [NUM_REQUESTS-1:0] mem_req_ready_in, output wire [NUM_REQUESTS-1:0] req_ready_in,
// input response // input response
output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in, output wire [NUM_REQUESTS-1:0] rsp_valid_out,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in, output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in, output wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in, input wire [NUM_REQUESTS-1:0] rsp_ready_out,
// output request // output request
output wire mem_req_valid_out, output wire req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out, output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
output wire [ADDR_WIDTH-1:0] mem_req_addr_out, output wire [ADDR_WIDTH-1:0] req_addr_out,
output wire mem_req_rw_out, output wire req_rw_out,
output wire [WORD_SIZE-1:0] mem_req_byteen_out, output wire [DATA_SIZE-1:0] req_byteen_out,
output wire [WORD_WIDTH-1:0] mem_req_data_out, output wire [DATA_WIDTH-1:0] req_data_out,
input wire mem_req_ready_out, input wire req_ready_out,
// output response // output response
input wire mem_rsp_valid_out, input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out, input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [WORD_WIDTH-1:0] mem_rsp_data_out, input wire [DATA_WIDTH-1:0] rsp_data_in,
output wire mem_rsp_ready_out output wire rsp_ready_in
); );
if (NUM_REQUESTS > 1) begin if (NUM_REQUESTS > 1) begin
@@ -53,59 +53,59 @@ module VX_mem_arb #(
) req_arb ( ) req_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.requests (mem_req_valid_in), .requests (req_valid_in),
`UNUSED_PIN (grant_valid), `UNUSED_PIN (grant_valid),
.grant_index (req_idx), .grant_index (req_idx),
.grant_onehot (req_1hot) .grant_onehot (req_1hot)
); );
wire stall = ~mem_req_ready_out && mem_req_valid_out; wire stall = ~req_ready_out && req_valid_out;
VX_generic_register #( VX_generic_register #(
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH), .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH),
.PASSTHRU(NUM_REQUESTS <= 2) .PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (1'b0), .flush (1'b0),
.in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}), .in ({req_valid_in[req_idx], {req_tag_in[req_idx], REQS_BITS'(req_idx)}, req_addr_in[req_idx], req_rw_in[req_idx], req_byteen_in[req_idx], req_data_in[req_idx]}),
.out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out}) .out ({req_valid_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out})
); );
for (genvar i = 0; i < NUM_REQUESTS; i++) begin for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_req_ready_in[i] = req_1hot[i] && ~stall; assign req_ready_in[i] = req_1hot[i] && ~stall;
end end
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0]; wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i)); assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i));
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; assign rsp_tag_out[i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH];
assign mem_rsp_data_in[i] = mem_rsp_data_out; assign rsp_data_out[i] = rsp_data_in;
end end
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel]; assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin end else begin
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
assign mem_req_valid_out = mem_req_valid_in; assign req_valid_out = req_valid_in;
assign mem_req_tag_out = mem_req_tag_in; assign req_tag_out = req_tag_in;
assign mem_req_addr_out = mem_req_addr_in; assign req_addr_out = req_addr_in;
assign mem_req_rw_out = mem_req_rw_in; assign req_rw_out = req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in; assign req_byteen_out = req_byteen_in;
assign mem_req_data_out = mem_req_data_in; assign req_data_out = req_data_in;
assign mem_req_ready_in = mem_req_ready_out; assign req_ready_in = req_ready_out;
assign mem_rsp_valid_in = mem_rsp_valid_out; assign rsp_valid_out = rsp_valid_in;
assign mem_rsp_tag_in = mem_rsp_tag_out; assign rsp_tag_out = rsp_tag_in;
assign mem_rsp_data_in = mem_rsp_data_out; assign rsp_data_out = rsp_data_in;
assign mem_rsp_ready_out = mem_rsp_ready_in; assign rsp_ready_in = rsp_ready_out;
end end

View File

@@ -64,23 +64,27 @@ module VX_scoreboard #(
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay); assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE `ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end
end
`endif
reg [31:0] stall_ctr; reg [31:0] stall_ctr;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
stall_ctr <= 0; stall_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", stall_ctr <= stall_ctr + 1;
assert(stall_ctr < 100000) else $error("%t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
stall_ctr <= stall_ctr + 1;
if (stall_ctr >= 2000) begin
$fflush();
assert(0);
end
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0; stall_ctr <= 0;
end end
end end
`endif
endmodule endmodule

View File

@@ -100,7 +100,7 @@ module VX_bank #(
output wire misses output wire misses
); );
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
wire[31:0] debug_pc_st0; wire[31:0] debug_pc_st0;
wire[`NR_BITS-1:0] debug_rd_st0; wire[`NR_BITS-1:0] debug_rd_st0;
@@ -352,7 +352,7 @@ module VX_bank #(
wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0 wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0; assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
end else begin end else begin
@@ -371,7 +371,7 @@ module VX_bank #(
.out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) .out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
); );
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin end else begin
@@ -420,7 +420,7 @@ module VX_bank #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1), .debug_pc (debug_pc_st1),
.debug_rd (debug_rd_st1), .debug_rd (debug_rd_st1),
.debug_wid (debug_wid_st1), .debug_wid (debug_wid_st1),
@@ -474,7 +474,7 @@ module VX_bank #(
.out ({is_msrq_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) .out ({is_msrq_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
); );
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end else begin end else begin
@@ -498,7 +498,7 @@ module VX_bank #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st2), .debug_pc (debug_pc_st2),
.debug_rd (debug_rd_st2), .debug_rd (debug_rd_st2),
.debug_wid (debug_wid_st2), .debug_wid (debug_wid_st2),
@@ -562,7 +562,7 @@ module VX_bank #(
.out ({is_msrq_st3, send_core_rsp_st3, send_fill_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) .out ({is_msrq_st3, send_core_rsp_st3, send_fill_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
); );
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3; assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
end else begin end else begin
@@ -623,7 +623,7 @@ module VX_bank #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
.debug_pc_st0 (debug_pc_st0), .debug_pc_st0 (debug_pc_st0),
.debug_rd_st0 (debug_rd_st0), .debug_rd_st0 (debug_rd_st0),
.debug_wid_st0 (debug_wid_st0), .debug_wid_st0 (debug_wid_st0),

View File

@@ -3,7 +3,7 @@
`include "VX_platform.vh" `include "VX_platform.vh"
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
`include "VX_define.vh" `include "VX_define.vh"
`endif `endif

View File

@@ -24,7 +24,7 @@ module VX_cache_miss_resrv #(
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_st0, input wire[31:0] debug_pc_st0,
input wire[`NR_BITS-1:0] debug_rd_st0, input wire[`NR_BITS-1:0] debug_rd_st0,

View File

@@ -25,7 +25,7 @@ module VX_data_access #(
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc, input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd, input wire[`NR_BITS-1:0] debug_rd,

View File

@@ -25,7 +25,7 @@ module VX_tag_access #(
input wire clk, input wire clk,
input wire reset, input wire reset,
`ifdef DBG_CORE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc, input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd, input wire[`NR_BITS-1:0] debug_rd,
@@ -122,7 +122,7 @@ module VX_tag_access #(
assign readtag_out = use_read_tag; assign readtag_out = use_read_tag;
assign writeen_out = (use_do_write || use_do_fill); assign writeen_out = (use_do_write || use_do_fill);
`ifdef DBG_PRINT_CACHE_DATA `ifdef DBG_PRINT_CACHE_TAG
always @(posedge clk) begin always @(posedge clk) begin
if (valid_in && !stall) begin if (valid_in && !stall) begin
if (use_do_fill && tags_match) begin if (use_do_fill && tags_match) begin

View File

@@ -10,15 +10,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CACHE_REQ_INFO
FPU_INCLUDE = -I../rtl/fp_cores -I../rtl/fp_cores/svdpi -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src FPU_INCLUDE = -I../rtl/fp_cores -I../rtl/fp_cores/svdpi -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate $(FPU_INCLUDE) INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate $(FPU_INCLUDE)
@@ -45,7 +46,7 @@ gen-s:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
gen-sd: gen-sd:
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs --trace-threads 1 $(DBG) verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG)
gen-st: gen-st:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
@@ -54,7 +55,7 @@ gen-m:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md: gen-md:
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs --trace-threads 1 $(DBG) verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG)
gen-mt: gen-mt:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
@@ -63,7 +64,7 @@ build-s: gen-s
(cd obj_dir && make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)
build-sd: gen-sd build-sd: gen-sd
(cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)
build-st: gen-st build-st: gen-st
(cd obj_dir && make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)
@@ -72,7 +73,7 @@ build-m: gen-m
(cd obj_dir && make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)
build-md: gen-md build-md: gen-md
(cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)
build-mt: gen-mt build-mt: gen-mt
(cd obj_dir && make -j -f VVortex.mk) (cd obj_dir && make -j -f VVortex.mk)

View File

@@ -8,9 +8,11 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CACHE_BANK \ -DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \ -DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_CACHE_TAG \
-DDBG_PRINT_CACHE_DATA \ -DDBG_PRINT_CACHE_DATA \
-DDBG_PRINT_DRAM \ -DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE -DDBG_PRINT_OPAE \
-DDBG_PRINT_AVS
#DBG_PRINT=$(DBG_PRINT_FLAGS) #DBG_PRINT=$(DBG_PRINT_FLAGS)