merge fixes
This commit is contained in:
281
benchmarks/opencl/spmv/perfmon.c
Normal file
281
benchmarks/opencl/spmv/perfmon.c
Normal file
@@ -0,0 +1,281 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include "perfmon.h"
|
||||
|
||||
static const char* mxpa_profile_log = "mxpa_profile_%d.log";
|
||||
static int enabled = 0;
|
||||
|
||||
// quartet's CPU supports these. Check check_events at libpfm4
|
||||
// to make this adaptable.
|
||||
static const char *gen_events_all[]={
|
||||
"snb_ep::L3_LAT_CACHE:MISS",
|
||||
"snb_ep::L3_LAT_CACHE:REFERENCE",
|
||||
|
||||
"snb_ep::L2_RQSTS:ALL_DEMAND_DATA_RD",
|
||||
"snb_ep::L2_RQSTS:ALL_DEMAND_RD_HIT",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:ACCESS",
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:MISS",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:PREFETCH",
|
||||
"perf::L1-DCACHE-PREFETCH-MISSES",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1I:READ",
|
||||
"perf::PERF_COUNT_HW_CACHE_L1I:MISS",
|
||||
|
||||
"perf::ITLB-LOADS",
|
||||
"perf::ITLB-LOAD-MISSES",
|
||||
"perf::DTLB-LOADS",
|
||||
"perf::DTLB-LOAD-MISSES",
|
||||
"perf::CONTEXT-SWITCHES",
|
||||
"perf::CPU-MIGRATIONS",
|
||||
"perf::CYCLES",
|
||||
"snb_ep::RESOURCE_STALLS:ANY",
|
||||
|
||||
"perf::INSTRUCTIONS",
|
||||
"perf::BRANCH-INSTRUCTIONS",
|
||||
"perf::BRANCHES",
|
||||
"perf::BRANCH-MISSES",
|
||||
NULL
|
||||
};
|
||||
|
||||
#define NUM_MAX_THREAD 256
|
||||
|
||||
static perf_event_desc_t *g_fds[NUM_MAX_THREAD];
|
||||
static int g_nthreads;
|
||||
static int num_fds = 0;
|
||||
|
||||
/* note: unsafe for multithreading */
|
||||
static uint64_t* begins;
|
||||
|
||||
static void
|
||||
fetch_counts(perf_event_desc_t *fds, int num_fds)
|
||||
{
|
||||
if (begins == 0) {
|
||||
begins = (uint64_t*) malloc(num_fds * sizeof(uint64_t));
|
||||
memset(begins, 0, num_fds * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
uint64_t val;
|
||||
uint64_t values[3];
|
||||
double ratio;
|
||||
int i;
|
||||
ssize_t ret;
|
||||
|
||||
/*
|
||||
* now read the results. We use pfp_event_count because
|
||||
* libpfm guarantees that counters for the events always
|
||||
* come first.
|
||||
*/
|
||||
memset(values, 0, sizeof(values));
|
||||
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
ret = read(fds[i].fd, values, sizeof(values));
|
||||
if (ret < (ssize_t)sizeof(values)) {
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
||||
else
|
||||
warnx("could not read event%d", i);
|
||||
}
|
||||
/*
|
||||
* scaling is systematic because we may be sharing the PMU and
|
||||
* thus may be multiplexed
|
||||
*/
|
||||
int valid = 0;
|
||||
val = perf_scale_valid(values, &valid);
|
||||
if (valid == 0) printf ("@i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
||||
ratio = perf_scale_ratio(values);
|
||||
|
||||
begins[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_counts(perf_event_desc_t *fds, int num_fds, const char *msg, FILE* fp)
|
||||
{
|
||||
uint64_t val;
|
||||
uint64_t values[3];
|
||||
double ratio;
|
||||
int i;
|
||||
ssize_t ret;
|
||||
|
||||
#if 0
|
||||
fprintf(fp, "%s ------------------------------------\n", msg);
|
||||
#else
|
||||
fprintf(fp, "method=%s", msg);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* now read the results. We use pfp_event_count because
|
||||
* libpfm guarantees that counters for the events always
|
||||
* come first.
|
||||
*/
|
||||
memset(values, 0, sizeof(values));
|
||||
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
|
||||
ret = read(fds[i].fd, values, sizeof(values));
|
||||
if (ret < (ssize_t)sizeof(values)) {
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
||||
else
|
||||
warnx("could not read event%d", i);
|
||||
}
|
||||
/*
|
||||
* scaling is systematic because we may be sharing the PMU and
|
||||
* thus may be multiplexed
|
||||
*/
|
||||
int valid;
|
||||
val = perf_scale_valid(values, &valid);
|
||||
if (valid == 0) printf ("!i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
||||
ratio = perf_scale_ratio(values);
|
||||
|
||||
#if 0
|
||||
fprintf(fp, "%s %'20"PRIu64" %s (%.2f%% scaling, raw=%'"PRIu64", ena=%'"PRIu64", run=%'"PRIu64")\n",
|
||||
"-", // msg,
|
||||
val,
|
||||
fds[i].name,
|
||||
(1.0-ratio)*100.0,
|
||||
values[0],
|
||||
values[1],
|
||||
values[2]);
|
||||
#else
|
||||
fprintf (fp, " %s=%llu", fds[i].name, val); // valid ? val : 0);
|
||||
#endif
|
||||
}
|
||||
fprintf (fp, "\n");
|
||||
}
|
||||
|
||||
FILE* open_log_file(char* fname) {
|
||||
FILE* fp;
|
||||
if (fp = fopen(fname, "r")) {
|
||||
fclose(fp);
|
||||
return fopen(fname, "a");
|
||||
}
|
||||
fp = fopen(fname, "a");
|
||||
return fp;
|
||||
}
|
||||
|
||||
void perf_init() {
|
||||
static int init = 0;
|
||||
|
||||
if (init) return;
|
||||
init = 1;
|
||||
|
||||
char* prof_envvar = getenv("MXPA_PROFILE");
|
||||
if (prof_envvar) {
|
||||
enabled = 1;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
pfm_initialize();
|
||||
}
|
||||
|
||||
static void get_tids(int* tids, int* number) {
|
||||
char path[32];
|
||||
int pid = getpid();
|
||||
sprintf (path, "/proc/%d/task", pid);
|
||||
struct dirent *de=NULL;
|
||||
DIR *d=NULL;
|
||||
d=opendir(path);
|
||||
assert(d != NULL && "Null for opendir");
|
||||
// Loop while not NULL
|
||||
char pid_str[8];
|
||||
char last[8];
|
||||
sprintf (pid_str, "%d", pid);
|
||||
int n = 0;
|
||||
while(de = readdir(d)) {
|
||||
if (!strcmp(de->d_name, ".")) continue;
|
||||
if (!strcmp(de->d_name, "..")) continue;
|
||||
if (!strcmp(de->d_name, pid_str)) continue;
|
||||
*tids++ = atoi(de->d_name);
|
||||
n++;
|
||||
}
|
||||
*number = n;
|
||||
// printf ("Sampling thread %d\n", tid);
|
||||
closedir(d);
|
||||
}
|
||||
|
||||
void perf_start(const char* kname) {
|
||||
if (!enabled) return;
|
||||
|
||||
char* prof_envvar = getenv("MXPA_PROFILE");
|
||||
|
||||
int tids[32];
|
||||
int ntid;
|
||||
get_tids(tids, &ntid);
|
||||
g_nthreads = ntid;
|
||||
|
||||
int n;
|
||||
for (n = 0; n < ntid; n++) {
|
||||
int ret;
|
||||
ret = perf_setup_list_events(prof_envvar, &(g_fds[n]), &num_fds);
|
||||
perf_event_desc_t *fds = g_fds[n];
|
||||
int cpu = -1;
|
||||
int group_fd = -1;
|
||||
int pid = tids[n];
|
||||
fds[0].fd = -1;
|
||||
int i;
|
||||
for(i=0; i < num_fds; i++) {
|
||||
fds[i].hw.read_format = PERF_FORMAT_SCALE;
|
||||
fds[i].hw.disabled = 1; /* do not start now */
|
||||
fds[i].hw.inherit = 1; /* XXX child process will inherit, when forked only? */
|
||||
|
||||
/* each event is in an independent group (multiplexing likely) */
|
||||
fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0);
|
||||
if (fds[i].fd == -1) {
|
||||
fprintf(stderr, "cannot open event %d\n", i);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
prctl(PR_TASK_PERF_EVENTS_ENABLE);
|
||||
}
|
||||
|
||||
void perf_end(const char* kname) {
|
||||
if (!enabled) return;
|
||||
int i, n;
|
||||
prctl(PR_TASK_PERF_EVENTS_DISABLE);
|
||||
|
||||
static int first_time = 1;
|
||||
if (first_time) {
|
||||
first_time = 0;
|
||||
char name[128];
|
||||
for (n = 0; n < g_nthreads; n++) {
|
||||
sprintf (name, mxpa_profile_log, n);
|
||||
FILE* fp = fopen(name, "w");
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
char name[128];
|
||||
for (n = 0; n < g_nthreads; n++) {
|
||||
sprintf (name, mxpa_profile_log, n);
|
||||
FILE* fp = open_log_file(name);
|
||||
perf_event_desc_t *fds = g_fds[n];
|
||||
print_counts(fds, num_fds, kname, fp);
|
||||
for (i = 0; i < num_fds; i++) close(fds[i].fd);
|
||||
perf_free_fds(fds, num_fds);
|
||||
g_fds[n] = fds = NULL;
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
void pin_trace_enable(char* n) {
|
||||
perf_start((const char*)n);
|
||||
}
|
||||
|
||||
void pin_trace_disable(char* n) {
|
||||
perf_end((const char*)n);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user