fixed l3cache hang using memory arbiter in afu

This commit is contained in:
Blaise Tine
2020-11-15 06:36:32 -08:00
parent 2e0f51af80
commit 5d58bf3d11
20 changed files with 514 additions and 388 deletions

View File

@@ -4,20 +4,21 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../../../hw
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
@@ -59,7 +60,7 @@ VL_FLAGS += verilator.vlt
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS)
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
@@ -78,7 +79,7 @@ VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# use DPI FPU
#VL_FLAGS += -DFPU_FAST
VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
@@ -94,7 +95,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
$(PROJECT): $(SRCS) $(SCOPE_VH)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh

View File

@@ -9,15 +9,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
@@ -70,7 +71,7 @@ all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir

View File

@@ -14,17 +14,29 @@ union Float_t {
} parts;
};
inline float fround(float x, int32_t precision = 4) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
return fabs(a - b) <= tolerance;
}
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) {
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
Float_t fa{a}, fb{b};
return std::abs(fa.i - fb.i) <= ulp;
auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
}
inline bool almost_equal(float a, float b) {
if (almost_equal_eps(a, b))
return true;
return almost_equal_ulp(a, b);
}
@@ -158,8 +170,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -186,8 +198,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -214,8 +226,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -242,8 +254,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -270,8 +282,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -298,8 +310,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -326,8 +338,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -354,8 +366,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -384,8 +396,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -412,8 +424,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -442,7 +454,7 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
int q = 1.0f + (i % 64);
float q = 1.0f + (i % 64);
a[i] = q;
b[i] = q;
}
@@ -471,8 +483,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n/2 - i) * (1.0f/n);
b[i] = (n/2 - i) * (1.0f/n);
a[i] = fround((n/2 - i) * (1.0f/n));
b[i] = fround((n/2 - i) * (1.0f/n));
}
}
@@ -500,8 +512,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = i * (1.0f/n);
b[i] = i * (1.0f/n);
a[i] = fround(i * (1.0f/n));
b[i] = fround(i * (1.0f/n));
}
}