merging perf counters

This commit is contained in:
Blaise Tine
2020-12-08 21:02:39 -08:00
27 changed files with 1047 additions and 230 deletions

View File

@@ -58,6 +58,12 @@ ifdef SCOPE
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
PERF_ENABLE = PERF=1
endif
all: vlsim
# AFU info from JSON file, including AFU UUID
@@ -71,7 +77,7 @@ scope-defs.h: $(SCRIPT_DIR)/scope.json
scope: scope-defs.h
vlsim-hw: $(SCOPE_H)
$(SCOPE_ENABLE) $(MAKE) -C vlsim
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
fpga: $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
@@ -94,7 +100,6 @@ $(ASE_DIR):
clean:
rm -rf $(PROJECT) $(PROJECT_ASE) $(PROJECT_VLSIM) *.o .depend
$(MAKE) -C vlsim clean
$(MAKE) -C ase clean
ifneq ($(MAKECMDGOALS),clean)
-include .depend

View File

@@ -43,8 +43,9 @@ RTL_DIR=../../../hw/rtl
SRCS = fpga.cpp opae_sim.cpp
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/svdpi -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME
@@ -70,6 +71,12 @@ ifdef SCOPE
CFLAGS += -DSCOPE
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
endif
# use our OPAE shim
VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
@@ -77,8 +84,6 @@ CFLAGS += -DNOPAE
# use DPI FPU
VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
PROJECT = libopae-c-vlsim.so
all: $(PROJECT)

View File

@@ -244,27 +244,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
#endif
#ifdef DUMP_PERF_STATS
// Dump perf stats
if (device->num_cores > 1) {
uint64_t total_instrs = 0, total_cycles = 0;
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
uint64_t instrs, cycles;
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
assert(ret == 0);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
total_instrs += instrs;
total_cycles = std::max<uint64_t>(total_cycles, cycles);
}
float IPC = (float)(double(total_instrs) / double(total_cycles));
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
} else {
uint64_t instrs, cycles;
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
float IPC = (float)(double(instrs) / double(cycles));
assert(ret == 0);
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
}
vx_dump_perf(device, stdout);
#endif
fpgaClose(device->fpga);