merging perf counters
This commit is contained in:
@@ -58,6 +58,12 @@ ifdef SCOPE
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
PERF_ENABLE = PERF=1
|
||||
endif
|
||||
|
||||
all: vlsim
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
@@ -71,7 +77,7 @@ scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
scope: scope-defs.h
|
||||
|
||||
vlsim-hw: $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(MAKE) -C vlsim
|
||||
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
|
||||
|
||||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
@@ -94,7 +100,6 @@ $(ASE_DIR):
|
||||
clean:
|
||||
rm -rf $(PROJECT) $(PROJECT_ASE) $(PROJECT_VLSIM) *.o .depend
|
||||
$(MAKE) -C vlsim clean
|
||||
$(MAKE) -C ase clean
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
|
||||
@@ -43,8 +43,9 @@ RTL_DIR=../../../hw/rtl
|
||||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
@@ -70,6 +71,12 @@ ifdef SCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
@@ -77,8 +84,6 @@ CFLAGS += -DNOPAE
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
@@ -244,27 +244,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
// Dump perf stats
|
||||
if (device->num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
assert(ret == 0);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
assert(ret == 0);
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
Reference in New Issue
Block a user