fixed l3cache hang using memory arbiter in afu
This commit is contained in:
@@ -4,20 +4,21 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
@@ -59,7 +60,7 @@ VL_FLAGS += verilator.vlt
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS)
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
@@ -78,7 +79,7 @@ VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
@@ -94,7 +95,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
|
||||
|
||||
$(PROJECT): $(SRCS) $(SCOPE_VH)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh
|
||||
|
||||
@@ -9,15 +9,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
@@ -70,7 +71,7 @@ all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
|
||||
@@ -14,17 +14,29 @@ union Float_t {
|
||||
} parts;
|
||||
};
|
||||
|
||||
inline float fround(float x, int32_t precision = 4) {
|
||||
auto power_of_10 = std::pow(10, precision);
|
||||
return std::round(x * power_of_10) / power_of_10;
|
||||
}
|
||||
|
||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
|
||||
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
|
||||
return fabs(a - b) <= tolerance;
|
||||
}
|
||||
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) {
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||
Float_t fa{a}, fb{b};
|
||||
return std::abs(fa.i - fb.i) <= ulp;
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
if (d > ulp) {
|
||||
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool almost_equal(float a, float b) {
|
||||
if (almost_equal_eps(a, b))
|
||||
return true;
|
||||
return almost_equal_ulp(a, b);
|
||||
}
|
||||
|
||||
@@ -158,8 +170,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -186,8 +198,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,8 +226,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,8 +254,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -270,8 +282,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -298,8 +310,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -326,8 +338,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -354,8 +366,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -384,8 +396,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,8 +424,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -442,7 +454,7 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int q = 1.0f + (i % 64);
|
||||
float q = 1.0f + (i % 64);
|
||||
a[i] = q;
|
||||
b[i] = q;
|
||||
}
|
||||
@@ -471,8 +483,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n/2 - i) * (1.0f/n);
|
||||
b[i] = (n/2 - i) * (1.0f/n);
|
||||
a[i] = fround((n/2 - i) * (1.0f/n));
|
||||
b[i] = fround((n/2 - i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -500,8 +512,8 @@ public:
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = i * (1.0f/n);
|
||||
b[i] = i * (1.0f/n);
|
||||
a[i] = fround(i * (1.0f/n));
|
||||
b[i] = fround(i * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user