perflab added
This commit is contained in:
229
perflab/matrix/clock.c
Normal file
229
perflab/matrix/clock.c
Normal file
@@ -0,0 +1,229 @@
|
||||
/* clock.c
|
||||
* Retrofitted to use thread-specific timers
|
||||
* and to get clock information from /proc/cpuinfo
|
||||
* (C) R. E. Bryant, 2010
|
||||
*
|
||||
*/
|
||||
|
||||
/* When this constant is not defined, uses time stamp counter */
|
||||
#define USE_POSIX 0
|
||||
|
||||
/* Choice to use cpu_gettime call or Intel time stamp counter directly */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <intrin.h>
|
||||
//#include <intrinsics.h>
|
||||
#include <windows.h>
|
||||
#include <time.h>
|
||||
#include "clock.h"
|
||||
|
||||
/* Use x86 cycle counter */
|
||||
|
||||
/* Initialize the cycle counter */
|
||||
static unsigned cyc_hi = 0;
|
||||
static unsigned cyc_lo = 0;
|
||||
|
||||
/* Set *hi and *lo to the high and low order bits of the cycle counter.
|
||||
Implementation requires assembly code to use the rdtsc instruction. */
|
||||
void access_counter(unsigned *hi, unsigned *lo)
|
||||
{
|
||||
|
||||
long long counter;
|
||||
|
||||
counter = __rdtsc();
|
||||
(*hi) = (unsigned int)(counter >> 32);
|
||||
(*lo) = (unsigned int)counter;
|
||||
/*
|
||||
|
||||
LARGE_INTEGER lPerformanceCount;
|
||||
|
||||
QueryPerformanceCounter(&lPerformanceCount);
|
||||
(*hi) = (unsigned int)lPerformanceCount.HighPart;
|
||||
(*lo) = (unsigned int)lPerformanceCount.LowPart;
|
||||
// printf("%08X %08X\n",(*hi),(*lo));
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/* Record the current value of the cycle counter. */
|
||||
void start_counter()
|
||||
{
|
||||
access_counter(&cyc_hi, &cyc_lo);
|
||||
}
|
||||
|
||||
/* Return the number of cycles since the last call to start_counter. */
|
||||
double get_counter()
|
||||
{
|
||||
unsigned ncyc_hi, ncyc_lo;
|
||||
unsigned hi, lo, borrow;
|
||||
double result;
|
||||
|
||||
/* Get cycle counter */
|
||||
access_counter(&ncyc_hi, &ncyc_lo);
|
||||
|
||||
/* Do double precision subtraction */
|
||||
lo = ncyc_lo - cyc_lo;
|
||||
borrow = cyc_lo > ncyc_lo;
|
||||
hi = ncyc_hi - cyc_hi - borrow;
|
||||
result = (double) hi * (1 << 30) * 4 + lo;
|
||||
return result;
|
||||
}
|
||||
void make_CPU_busy(void)
|
||||
{
|
||||
volatile double old_tick,new_tick;
|
||||
start_counter();
|
||||
old_tick = get_counter();
|
||||
new_tick = get_counter();
|
||||
while (new_tick - old_tick < 1000000000)
|
||||
new_tick = get_counter();
|
||||
}
|
||||
|
||||
//CPU的频率
|
||||
double mhz(int verbose)
|
||||
{
|
||||
LARGE_INTEGER lFrequency;
|
||||
LARGE_INTEGER lPerformanceCount_Start;
|
||||
LARGE_INTEGER lPerformanceCount_End;
|
||||
double mhz;
|
||||
double fTime;
|
||||
__int64 _i64StartCpuCounter;
|
||||
__int64 _i64EndCpuCounter;
|
||||
//On a multiprocessor machine, it should not matter which processor is called.
|
||||
//However, you can get different results on different processors due to bugs in
|
||||
//the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function.
|
||||
HANDLE hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
|
||||
//主板上高精度定时器的晶振频率
|
||||
//这个定时器应该就是一片8253或者8254
|
||||
//在intel ich7中集成了8254
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
// if (verbose>0)
|
||||
// printf("高精度定时器的晶振频率:%1.0fHz.\n",(double)lFrequency.QuadPart);
|
||||
|
||||
//这个定时器每经过一个时钟周期,其计数器会+1
|
||||
QueryPerformanceCounter(&lPerformanceCount_Start);
|
||||
|
||||
//RDTSC指令:获取CPU经历的时钟周期数
|
||||
_i64StartCpuCounter=__rdtsc();
|
||||
|
||||
//延时长一点,误差会小一点
|
||||
//int nTemp=100000;
|
||||
//while (--nTemp);
|
||||
Sleep(200);
|
||||
|
||||
QueryPerformanceCounter(&lPerformanceCount_End);
|
||||
|
||||
_i64EndCpuCounter=__rdtsc();
|
||||
|
||||
//f=1/T => f=计数次数/(计数次数*T)
|
||||
//这里的“计数次数*T”就是时间差
|
||||
fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart)
|
||||
/(double)lFrequency.QuadPart;
|
||||
|
||||
mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0);
|
||||
if (verbose>0)
|
||||
printf("CPU频率为:%1.6fMHz.\n",mhz);
|
||||
return mhz;
|
||||
}
|
||||
|
||||
double CPU_Factor1(void)
|
||||
{
|
||||
double result;
|
||||
int i,j,k,ii,jj,kk;
|
||||
LARGE_INTEGER lStart,lEnd;
|
||||
LARGE_INTEGER lFrequency;
|
||||
HANDLE hThread;
|
||||
double fTime;
|
||||
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
|
||||
ii = 43273;
|
||||
kk = 1238;
|
||||
result = 1;
|
||||
jj = 1244;
|
||||
|
||||
hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
QueryPerformanceCounter(&lStart);
|
||||
//_asm("cpuid");
|
||||
start_counter();
|
||||
for (i=0;i<100;i++)
|
||||
for (j=0;j<1000;j++)
|
||||
for (k=0;k<1000;k++)
|
||||
kk += kk*ii+jj;
|
||||
|
||||
result = get_counter();
|
||||
QueryPerformanceCounter(&lEnd);
|
||||
fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart);
|
||||
printf("CPU运行时间为%f",result);
|
||||
printf("\t %f\n",fTime);
|
||||
return result;
|
||||
}
|
||||
|
||||
double CPU_Factor(void)
|
||||
{
|
||||
double frequency;
|
||||
double multiplier = 1000 * 1000 * 1000;//nano
|
||||
LARGE_INTEGER lFrequency;
|
||||
LARGE_INTEGER start,stop;
|
||||
HANDLE hThread;
|
||||
int i;
|
||||
const int gigahertz= 1000*1000*1000;
|
||||
const int known_instructions_per_loop = 27317;
|
||||
|
||||
int iterations = 100000000;
|
||||
int g = 0;
|
||||
double normal_ticks_per_second;
|
||||
double ticks;
|
||||
double time;
|
||||
double loops_per_sec;
|
||||
double instructions_per_loop;
|
||||
double ratio;
|
||||
double actual_freq;
|
||||
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
frequency = (double)lFrequency.QuadPart;
|
||||
|
||||
hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
QueryPerformanceCounter(&start);
|
||||
for( i = 0; i < iterations; i++)
|
||||
{
|
||||
g++;
|
||||
g++;
|
||||
g++;
|
||||
g++;
|
||||
}
|
||||
QueryPerformanceCounter(&stop);
|
||||
|
||||
//normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199
|
||||
normal_ticks_per_second = frequency * 1000;
|
||||
ticks = (double)((double)stop.QuadPart - (double)start.QuadPart);
|
||||
time = (ticks * multiplier) /frequency;
|
||||
loops_per_sec = iterations / (time/multiplier);
|
||||
instructions_per_loop = normal_ticks_per_second / loops_per_sec;
|
||||
|
||||
ratio = (instructions_per_loop / known_instructions_per_loop);
|
||||
actual_freq = normal_ticks_per_second / ratio;
|
||||
/*
|
||||
actual_freq = normal_ticks_per_second / ratio;
|
||||
actual_freq = known_instructions_per_loop*iterations*multiplier/time;
|
||||
|
||||
2293 = x/time;
|
||||
|
||||
2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000
|
||||
loops_per_sec = iterations*frequency / ticks
|
||||
|
||||
instructions_per_loop = / loops_per_sec;
|
||||
*/
|
||||
printf("Perf counter freq: %f\n", normal_ticks_per_second);
|
||||
printf("Loops per sec: %f\n", loops_per_sec);
|
||||
printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop);
|
||||
printf("Presumed freq: %f\n", actual_freq);
|
||||
printf("ratio: %f\n", ratio);
|
||||
printf("time=%f\n",time);
|
||||
return ratio;
|
||||
}
|
||||
12
perflab/matrix/clock.h
Normal file
12
perflab/matrix/clock.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/* Routines for using cycle counter */
|
||||
|
||||
/* Start the counter */
|
||||
void start_counter(void);
|
||||
|
||||
/* Get # cycles since counter started. Returns 1e20 if detect timing anomaly */
|
||||
double get_counter(void);
|
||||
void make_CPU_busy(void);
|
||||
|
||||
double mhz(int verbose);
|
||||
double CPU_Factor(void);
|
||||
//double GetCpuClock(void);
|
||||
117
perflab/matrix/cpe.c
Normal file
117
perflab/matrix/cpe.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/* Compute CPE for function */
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "fcyc.h"
|
||||
#include "cpe.h"
|
||||
#include "lsquare.h"
|
||||
#include "clock.h"
|
||||
|
||||
/* Find number of cycles taken by function.
|
||||
Do this by running number of trials until best two within TOL of
|
||||
each other
|
||||
*/
|
||||
double measure_function(elem_fun_t f, int cnt)
|
||||
{
|
||||
/* Need to fudge fact that fcyc wants a function taking an
|
||||
long int *, while our function takes an long int */
|
||||
test_funct tf = (test_funct) f;
|
||||
return fcyc(tf, (int *) (int) cnt);
|
||||
}
|
||||
|
||||
#define MAXCNT 100
|
||||
|
||||
#define LIM RAND_MAX
|
||||
|
||||
/* LCM of unrolling degree */
|
||||
#ifdef USE_UNI
|
||||
#define UNROLL 32
|
||||
#else /* USE_UNI */
|
||||
#define UNROLL 1
|
||||
#endif
|
||||
|
||||
static long int get_cnt(long int index, long int samples,
|
||||
long int maxcnt, sample_t smethod, double bias)
|
||||
{
|
||||
long int mincnt = (long int) (bias*maxcnt);
|
||||
double weight;
|
||||
long int val;
|
||||
switch (smethod) {
|
||||
case UNI_SAMPLE:
|
||||
weight = (double) index/(samples - 1);
|
||||
break;
|
||||
case RAN_SAMPLE:
|
||||
weight = (double) (rand() % LIM) / (double) (LIM-1);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Undefined sampling method %d\n", smethod);
|
||||
exit(1);
|
||||
}
|
||||
val = mincnt + weight*(maxcnt-mincnt);
|
||||
return UNROLL * (val/UNROLL);
|
||||
}
|
||||
|
||||
#define SEED 31415
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||
specified number of sample points.
|
||||
If data_file, then print data so that can plot points with Excel
|
||||
smethod determines method for generating samples
|
||||
*/
|
||||
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||
sample_t smethod, double bias, long int verbose)
|
||||
{
|
||||
long int i;
|
||||
long int cnt;
|
||||
double cpe;
|
||||
double overhead = 0;
|
||||
double *cnt_val = calloc(samples, sizeof(double));
|
||||
double *cycle_val = calloc(samples, sizeof(double));
|
||||
/* Do the samples */
|
||||
|
||||
srand(SEED);
|
||||
for (i = 0; i < samples; i++) {
|
||||
cnt = get_cnt(i, samples, maxcnt, smethod, bias);
|
||||
cnt_val[i] = cnt;
|
||||
cycle_val[i] = measure_function(f, cnt);
|
||||
if (cycle_val[i] < 1.0) {
|
||||
fprintf(stderr, "Got %.2f cycles for count %ld\n", cycle_val[i], cnt);
|
||||
}
|
||||
}
|
||||
/* Fit data */
|
||||
cpe = ls_slope(cnt_val, cycle_val, samples);
|
||||
if (data_file)
|
||||
overhead = ls_intercept(cnt_val, cycle_val, samples);
|
||||
if (data_file && verbose > 1) {
|
||||
/* Print x values */
|
||||
fprintf(data_file, "Cnt\t0");
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.0f", cnt_val[i]);
|
||||
fprintf(data_file, "\n");
|
||||
/* Print y values */
|
||||
fprintf(data_file, "Cycs.\t");
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.2f", cycle_val[i]);
|
||||
fprintf(data_file, "\n");
|
||||
/* Print ax*b values */
|
||||
fprintf(data_file, "Interp.\t%.2f", overhead);
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.2f", cpe*cnt_val[i]+overhead);
|
||||
fprintf(data_file, "\n");
|
||||
}
|
||||
if (data_file && verbose) {
|
||||
/* Print results */
|
||||
fprintf(data_file, "cpe\t%.2f\tovhd\t%.2f\tavgerr\t\\%.3f\tmaxerr\t\\%.3f\n",
|
||||
cpe, overhead,
|
||||
ls_error(cnt_val, cycle_val, samples, LS_AVG),
|
||||
ls_error(cnt_val, cycle_val, samples, LS_MAX));
|
||||
}
|
||||
free(cnt_val);
|
||||
free(cycle_val);
|
||||
return cpe;
|
||||
}
|
||||
|
||||
/* Use default parameters */
|
||||
double find_cpe(elem_fun_t f, int maxcnt)
|
||||
{
|
||||
return find_cpe_full(f, maxcnt, 100, stdout, RAN_SAMPLE, 0.3, 0);
|
||||
}
|
||||
31
perflab/matrix/cpe.h
Normal file
31
perflab/matrix/cpe.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/* Compute CPE for function */
|
||||
|
||||
/* Compute for function that is linear in some parameter cnt */
|
||||
typedef void (*elem_fun_t)(int);
|
||||
|
||||
/* Different ways of finding samples
|
||||
UNI_SAMPLE: samples uniformly spaced between bias*maxcnt and maxcnt
|
||||
RAN_SAMPLE: samples randomly selected between bias*maxcnt and maxcnt
|
||||
*/
|
||||
|
||||
typedef enum {UNI_SAMPLE, RAN_SAMPLE}
|
||||
sample_t;
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt.
|
||||
Uses default parameters
|
||||
*/
|
||||
double find_cpe(elem_fun_t f, int maxcnt);
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||
specified number of sample points.
|
||||
If data_file, then print data so that can plot points with Excel
|
||||
smethod determines method for generating samples
|
||||
*/
|
||||
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||
sample_t smethod, double bias, long int verbose);
|
||||
|
||||
/* Find number of cycles taken by function.
|
||||
Do this by running number of trials until best two within TOL (2%) of
|
||||
each other
|
||||
*/
|
||||
double measure_function(elem_fun_t f, int cnt);
|
||||
223
perflab/matrix/fcyc.c
Normal file
223
perflab/matrix/fcyc.c
Normal file
@@ -0,0 +1,223 @@
|
||||
/* Compute time used by function f */
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "clock.h"
|
||||
#include "fcyc.h"
|
||||
|
||||
#define K 3
|
||||
#define MAXSAMPLES 20
|
||||
#define EPSILON 0.01
|
||||
#define COMPENSATE 0
|
||||
#define CLEAR_CACHE 0
|
||||
#define CACHE_BYTES (1<<19)
|
||||
#define CACHE_BLOCK 32
|
||||
#define MAX_ITER_TIMES 10
|
||||
|
||||
static long int kbest = K;
|
||||
static long int compensate = COMPENSATE;
|
||||
static long int clear_cache = CLEAR_CACHE;
|
||||
static long int maxsamples = MAXSAMPLES;
|
||||
static double epsilon = EPSILON;
|
||||
static long int cache_bytes = CACHE_BYTES;
|
||||
static long int cache_block = CACHE_BLOCK;
|
||||
|
||||
static long int *cache_buf = NULL;
|
||||
|
||||
static double *values = NULL;
|
||||
static long int samplecount = 0;
|
||||
|
||||
#define KEEP_VALS 0
|
||||
#define KEEP_SAMPLES 0
|
||||
|
||||
#if KEEP_SAMPLES
|
||||
static double *samples = NULL;
|
||||
#endif
|
||||
|
||||
/* Start new sampling process */
|
||||
static void init_sampler(void)
|
||||
{
|
||||
if (values)
|
||||
free(values);
|
||||
values = calloc(kbest, sizeof(double));
|
||||
#if KEEP_SAMPLES
|
||||
if (samples)
|
||||
free(samples);
|
||||
/* Allocate extra for wraparound analysis */
|
||||
samples = calloc(maxsamples+kbest, sizeof(double));
|
||||
#endif
|
||||
samplecount = 0;
|
||||
}
|
||||
|
||||
/* Add new sample. */
|
||||
static void add_sample(double val)
|
||||
{
|
||||
long int pos = 0;
|
||||
if (samplecount < kbest) {
|
||||
pos = samplecount;
|
||||
values[pos] = val;
|
||||
} else if (val < values[kbest-1]) {
|
||||
pos = kbest-1;
|
||||
values[pos] = val;
|
||||
}
|
||||
#if KEEP_SAMPLES
|
||||
samples[samplecount] = val;
|
||||
#endif
|
||||
samplecount++;
|
||||
/* Insertion sort */
|
||||
while (pos > 0 && values[pos-1] > values[pos]) {
|
||||
double temp = values[pos-1];
|
||||
values[pos-1] = values[pos];
|
||||
values[pos] = temp;
|
||||
pos--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Have kbest minimum measurements converged within epsilon? */
|
||||
static long int has_converged(void)
|
||||
{
|
||||
return
|
||||
(samplecount >= kbest) &&
|
||||
((1 + epsilon)*values[0] >= values[kbest-1]);
|
||||
}
|
||||
|
||||
/* Code to clear cache */
|
||||
|
||||
|
||||
static volatile long int sink = 0;
|
||||
|
||||
static void clear(void)
|
||||
{
|
||||
long int x = sink;
|
||||
long int *cptr, *cend;
|
||||
long int incr = cache_block/sizeof(long int);
|
||||
if (!cache_buf) {
|
||||
cache_buf = malloc(cache_bytes);
|
||||
if (!cache_buf) {
|
||||
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
cptr = (long int *) cache_buf;
|
||||
cend = cptr + cache_bytes/sizeof(long int);
|
||||
while (cptr < cend) {
|
||||
x += *cptr;
|
||||
cptr += incr;
|
||||
}
|
||||
sink = x;
|
||||
}
|
||||
|
||||
double fcyc(test_funct f, int *params)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
init_sampler();
|
||||
if (compensate) {
|
||||
do {
|
||||
double cyc;
|
||||
if (clear_cache)
|
||||
clear();
|
||||
start_counter();
|
||||
f(params);
|
||||
cyc = get_counter();
|
||||
if (cyc > 0.0)
|
||||
add_sample(cyc);
|
||||
} while (!has_converged() && samplecount < maxsamples);
|
||||
} else {
|
||||
do {
|
||||
double cyc;
|
||||
if (clear_cache)
|
||||
clear();
|
||||
start_counter();
|
||||
for (i=0;i<MAX_ITER_TIMES;i++)
|
||||
f(params);
|
||||
cyc = get_counter()/MAX_ITER_TIMES;
|
||||
if (cyc > 0.0)
|
||||
add_sample(cyc);
|
||||
|
||||
} while (!has_converged() && samplecount < maxsamples);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
{
|
||||
long int i;
|
||||
printf(" %ld smallest values: [", kbest);
|
||||
for (i = 0; i < kbest; i++)
|
||||
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
|
||||
}
|
||||
#endif
|
||||
result = values[0];
|
||||
#if !KEEP_VALS
|
||||
free(values);
|
||||
values = NULL;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/***********************************************************/
|
||||
/* Set the various parameters used by measurement routines */
|
||||
|
||||
|
||||
/* When set, will run code to clear cache before each measurement
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_clear_cache(long int clear)
|
||||
{
|
||||
clear_cache = clear;
|
||||
}
|
||||
|
||||
/* Set size of cache to use when clearing cache
|
||||
Default = 1<<19 (512KB)
|
||||
*/
|
||||
void set_fcyc_cache_size(long int bytes)
|
||||
{
|
||||
if (bytes != cache_bytes) {
|
||||
cache_bytes = bytes;
|
||||
if (cache_buf) {
|
||||
free(cache_buf);
|
||||
cache_buf = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Set size of cache block
|
||||
Default = 32
|
||||
*/
|
||||
void set_fcyc_cache_block(long int bytes) {
|
||||
cache_block = bytes;
|
||||
}
|
||||
|
||||
|
||||
/* When set, will attempt to compensate for timer interrupt overhead
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_compensate(long int compensate_arg)
|
||||
{
|
||||
compensate = compensate_arg;
|
||||
}
|
||||
|
||||
/* Value of K in K-best
|
||||
Default = 3
|
||||
*/
|
||||
void set_fcyc_k(long int k)
|
||||
{
|
||||
kbest = k;
|
||||
}
|
||||
|
||||
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||
When exceeded, just return best sample found.
|
||||
Default = 20
|
||||
*/
|
||||
void set_fcyc_maxsamples(long int maxsamples_arg)
|
||||
{
|
||||
maxsamples = maxsamples_arg;
|
||||
}
|
||||
|
||||
/* Tolerance required for K-best
|
||||
Default = 0.01
|
||||
*/
|
||||
void set_fcyc_epsilon(double epsilon_arg)
|
||||
{
|
||||
epsilon = epsilon_arg;
|
||||
}
|
||||
52
perflab/matrix/fcyc.h
Normal file
52
perflab/matrix/fcyc.h
Normal file
@@ -0,0 +1,52 @@
|
||||
|
||||
/* Fcyc measures the speed of any "test function." Such a function
|
||||
is passed a list of integer parameters, which it may interpret
|
||||
in any way it chooses.
|
||||
*/
|
||||
|
||||
typedef void (*test_funct)(long int *);
|
||||
|
||||
/* Compute number of cycles used by function f on given set of parameters */
|
||||
double fcyc(test_funct f, int* params);
|
||||
|
||||
/***********************************************************/
|
||||
/* Set the various parameters used by measurement routines */
|
||||
|
||||
|
||||
/* When set, will run code to clear cache before each measurement
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_clear_cache(long int clear);
|
||||
|
||||
/* Set size of cache to use when clearing cache
|
||||
Default = 1<<19 (512KB)
|
||||
*/
|
||||
void set_fcyc_cache_size(long int bytes);
|
||||
|
||||
/* Set size of cache block
|
||||
Default = 32
|
||||
*/
|
||||
void set_fcyc_cache_block(long int bytes);
|
||||
|
||||
/* When set, will attempt to compensate for timer interrupt overhead
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_compensate(long int compensate);
|
||||
|
||||
/* Value of K in K-best
|
||||
Default = 3
|
||||
*/
|
||||
void set_fcyc_k(long int k);
|
||||
|
||||
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||
When exceeded, just return best sample found.
|
||||
Default = 20
|
||||
*/
|
||||
void set_fcyc_maxsamples(long int maxsamples);
|
||||
|
||||
/* Tolerance required for K-best
|
||||
Default = 0.01
|
||||
*/
|
||||
void set_fcyc_epsilon(double epsilon);
|
||||
|
||||
|
||||
94
perflab/matrix/lsquare.c
Normal file
94
perflab/matrix/lsquare.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/* Compute least squares fit of set of data points */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "lsquare.h"
|
||||
|
||||
typedef struct {
|
||||
double sum_x;
|
||||
double sum_y;
|
||||
double sum_xx;
|
||||
double sum_xy;
|
||||
} ls_stat_t;
|
||||
|
||||
/* Accumulate various sums of the data */
|
||||
static void ls_stats(double *xval, double *yval, int cnt, ls_stat_t *statp)
|
||||
{
|
||||
int i;
|
||||
statp->sum_x = 0.0;
|
||||
statp->sum_y = 0.0;
|
||||
statp->sum_xx = 0.0;
|
||||
statp->sum_xy = 0.0;
|
||||
for (i = 0; i < cnt; i++) {
|
||||
double x = xval[i];
|
||||
double y = yval[i];
|
||||
statp->sum_x += x;
|
||||
statp->sum_y += y;
|
||||
statp->sum_xx += x * x;
|
||||
statp->sum_xy += x * y;
|
||||
}
|
||||
}
|
||||
|
||||
double ls_slope(double *xval, double *yval, int cnt)
|
||||
{
|
||||
double slope;
|
||||
ls_stat_t stat;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
return slope;
|
||||
}
|
||||
|
||||
double ls_intercept(double *xval, double *yval, int cnt)
|
||||
{
|
||||
double intercept;
|
||||
ls_stat_t stat;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
return intercept;
|
||||
}
|
||||
|
||||
static double rel_err(double x, double y, double slope, double intercept)
|
||||
{
|
||||
double pred_y = slope*x + intercept;
|
||||
double offset = y - pred_y;
|
||||
if (offset < 0)
|
||||
offset = -offset;
|
||||
if (pred_y == 0)
|
||||
return offset;
|
||||
return offset/pred_y;
|
||||
}
|
||||
|
||||
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype)
|
||||
{
|
||||
double slope;
|
||||
double intercept;
|
||||
ls_stat_t stat;
|
||||
int i;
|
||||
double num, denom;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
num = denom = 0;
|
||||
for (i = 0; i < cnt; i++) {
|
||||
double e = rel_err(xval[i], yval[i], slope, intercept);
|
||||
switch (etype) {
|
||||
case LS_AVG:
|
||||
num += e;
|
||||
denom++;
|
||||
break;
|
||||
case LS_MAX:
|
||||
if (num < e)
|
||||
num = e;
|
||||
denom = 1;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Invalid error type: %d\n", etype);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return num/denom;
|
||||
}
|
||||
11
perflab/matrix/lsquare.h
Normal file
11
perflab/matrix/lsquare.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/* Compute least squares fit of set of data points */
|
||||
|
||||
/* Fit is of form y = mx + b. m is slope, b is intercept */
|
||||
double ls_slope(double *xval, double *yval, int cnt);
|
||||
double ls_intercept(double *xval, double *yval, int cnt);
|
||||
|
||||
typedef enum {LS_AVG, LS_MAX} ls_err_t;
|
||||
|
||||
/* Determine error (either absolute or average) of least squares fit */
|
||||
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype);
|
||||
|
||||
28
perflab/matrix/matrix/matrix.sln
Normal file
28
perflab/matrix/matrix/matrix.sln
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix", "matrix.vcxproj", "{15DC376D-CB40-4A27-BCF8-BCE93039E478}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x64.Build.0 = Debug|x64
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x86.Build.0 = Debug|Win32
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x64.ActiveCfg = Release|x64
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x64.Build.0 = Release|x64
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x86.ActiveCfg = Release|Win32
|
||||
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
123
perflab/matrix/matrix/matrix.vcxproj
Normal file
123
perflab/matrix/matrix/matrix.vcxproj
Normal file
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{15DC376D-CB40-4A27-BCF8-BCE93039E478}</ProjectGuid>
|
||||
<RootNamespace>matrix</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\clock.c" />
|
||||
<ClCompile Include="..\cpe.c" />
|
||||
<ClCompile Include="..\fcyc.c" />
|
||||
<ClCompile Include="..\lsquare.c" />
|
||||
<ClCompile Include="..\rowcol.c" />
|
||||
<ClCompile Include="..\rowcol_test.c" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
37
perflab/matrix/matrix/matrix.vcxproj.filters
Normal file
37
perflab/matrix/matrix/matrix.vcxproj.filters
Normal file
@@ -0,0 +1,37 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="源文件">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="头文件">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="资源文件">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\clock.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\cpe.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\fcyc.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\lsquare.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\rowcol.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\rowcol_test.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
77
perflab/matrix/rowcol.c
Normal file
77
perflab/matrix/rowcol.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/**************************************************************************
|
||||
行/列求和函数。按下面的要求编辑此文件:
|
||||
1. 将你的学号、姓名,以注释的方式写到下面;
|
||||
2. 实现不同版本的行列求和函数;
|
||||
3. 编辑rc_fun_rec rc_fun_tab数组,将你的最好的答案
|
||||
(最好的行和列求和、最好的列求和)作为数组的前两项
|
||||
***************************************************************************/
|
||||
|
||||
/*
|
||||
学号:201209054233
|
||||
姓名:夜半加班狂
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "rowcol.h"
|
||||
#include <math.h>
|
||||
|
||||
/* 参考的列求和函数实现 */
|
||||
/* 计算矩阵中的每一列的和。请注意对于行和列求和来说,调用参数是
|
||||
一样的,只是第2个参数不会用到而已
|
||||
*/
|
||||
|
||||
void c_sum(matrix_t M, vector_t rowsum, vector_t colsum)
|
||||
{
|
||||
int i,j;
|
||||
for (j = 0; j < N; j++) {
|
||||
colsum[j] = 0;
|
||||
for (i = 0; i < N; i++)
|
||||
colsum[j] += M[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* 参考的列和行求和函数实现 */
|
||||
/* 计算矩阵中的每一行、每一列的和。 */
|
||||
|
||||
void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum)
|
||||
{
|
||||
int i,j;
|
||||
for (i = 0; i < N; i++) {
|
||||
rowsum[i] = colsum[i] = 0;
|
||||
for (j = 0; j < N; j++) {
|
||||
rowsum[i] += M[i][j];
|
||||
colsum[i] += M[j][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
这个表格包含多个数组元素,每一组元素(函数名字, COL/ROWCOL, "描述字符串")
|
||||
COL表示该函数仅仅计算每一列的和
|
||||
ROWCOL表示该函数计算每一行、每一列的和
|
||||
将你认为最好的两个实现,放在最前面。
|
||||
比如:
|
||||
{my_c_sum1, "超级垃圾列求和实现"},
|
||||
{my_rc_sum2, "好一点的行列求和实现"},
|
||||
*/
|
||||
|
||||
rc_fun_rec rc_fun_tab[] =
|
||||
{
|
||||
|
||||
/* 第一项,应当是你写的最好列求和的函数实现 */
|
||||
{c_sum, COL, "Best column sum"},
|
||||
/* 第二项,应当是你写的最好行列求和的函数实现 */
|
||||
{rc_sum, ROWCOL, "Best row and column sum"},
|
||||
|
||||
{c_sum, COL, "Column sum, reference implementation"},
|
||||
|
||||
{rc_sum, ROWCOL, "Row and column sum, reference implementation"},
|
||||
|
||||
/* 下面的代码不能修改或者删除!!表明数组列表结束 */
|
||||
{NULL,ROWCOL,NULL}
|
||||
};
|
||||
35
perflab/matrix/rowcol.h
Normal file
35
perflab/matrix/rowcol.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/* Matrix row and/or column summation code */
|
||||
|
||||
/* Size of matrices */
|
||||
/* $begin rcdecl */
|
||||
#define N 512
|
||||
/* $end rcdecl */
|
||||
|
||||
/* Data types */
|
||||
|
||||
/* Pointer type for vectors */
|
||||
typedef int *vecp_t;
|
||||
/* $begin rcdecl */
|
||||
/* N x N matrix */
|
||||
typedef int matrix_t[N][N];
|
||||
|
||||
/* Vector of length N */
|
||||
typedef int vector_t[N];
|
||||
/* $end rcdecl */
|
||||
|
||||
/* Different sum/product function types */
|
||||
typedef enum { COL, ROWCOL } rc_comp_t;
|
||||
|
||||
/* Summation function */
|
||||
typedef void (*rc_fun)(matrix_t, vector_t, vector_t);
|
||||
|
||||
typedef struct {
|
||||
rc_fun f;
|
||||
rc_comp_t rc_type; /* What computation does it perform? */
|
||||
char *descr;
|
||||
} rc_fun_rec, *rc_fun_ptr;
|
||||
|
||||
/* Table of functions to test. Null terminated */
|
||||
extern rc_fun_rec rc_fun_tab[];
|
||||
|
||||
|
||||
173
perflab/matrix/rowcol_test.c
Normal file
173
perflab/matrix/rowcol_test.c
Normal file
@@ -0,0 +1,173 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
//#include <random.h>
|
||||
#include "rowcol.h"
|
||||
#include "fcyc.h"
|
||||
#include "clock.h"
|
||||
|
||||
#define MAX_ITER_COUNT 100
|
||||
|
||||
/* Define performance standards */
|
||||
static struct {
|
||||
double cref; /* Cycles taken by reference solution */
|
||||
double cbest; /* Cycles taken by our best implementation */
|
||||
} cstandard[2] =
|
||||
{{7.7, 6.40}, /* Column Sum */
|
||||
{9.75, 6.60} /* Row & Column Sum */
|
||||
};
|
||||
|
||||
/* Put in code to align matrix so that it starts on a cache block boundary.
|
||||
This makes the cache performance of the code a bit more predictable
|
||||
*/
|
||||
|
||||
/* Words per cache block. OK if this is an estimate as long as it
|
||||
is a multiple of the actual value
|
||||
*/
|
||||
#define WPB 16
|
||||
|
||||
int verbose = 1;
|
||||
int data[N*N+WPB];
|
||||
int *mstart;
|
||||
|
||||
typedef vector_t *row_t;
|
||||
|
||||
/* Reference row and column sums */
|
||||
vector_t rsref, csref, rcomp, ccomp;
|
||||
|
||||
static void init_tests(void);
|
||||
extern void make_CPU_busy(void);
|
||||
|
||||
static void init_tests(void)
|
||||
{
|
||||
int i, j;
|
||||
size_t bytes_per_block = sizeof(int) * WPB;
|
||||
/* round mstart up to nearest block boundary */
|
||||
mstart = (int *)
|
||||
(((size_t) data + bytes_per_block-1) / bytes_per_block * bytes_per_block);
|
||||
for (i = 0; i < N; i++) {
|
||||
rsref[i] = csref[i] = 0;
|
||||
}
|
||||
for (i = 0; i < N; i++) {
|
||||
for (j = 0; j < N; j++) {
|
||||
int val = rand();
|
||||
mstart[i*N+j] = val;
|
||||
rsref[i] += val;
|
||||
csref[j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Test function on all values */
|
||||
int test_rc(rc_fun f, FILE *rpt, rc_comp_t rc_type) {
|
||||
int i;
|
||||
int ok = 1;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
rcomp[i] = ccomp[i] = 0xDEADBEEF;
|
||||
f((row_t)mstart, rcomp, ccomp);
|
||||
|
||||
for (i = 0; ok && i < N; i++) {
|
||||
if (rc_type == ROWCOL
|
||||
&& rsref[i] != rcomp[i]) {
|
||||
ok = 0;
|
||||
if (rpt)
|
||||
fprintf(rpt,
|
||||
"对第%d行的计算出错!正确结果是%d,但是计算得到%d\n",
|
||||
i, rsref[i], rcomp[i]);
|
||||
}
|
||||
if ((rc_type == ROWCOL || rc_type == COL)
|
||||
&& csref[i] != ccomp[i]) {
|
||||
ok = 0;
|
||||
if (rpt)
|
||||
fprintf(rpt,
|
||||
"对第%d列的计算出错!正确结果是%d,但是计算得到%d\n",
|
||||
i, csref[i], ccomp[i]);
|
||||
}
|
||||
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* Kludgy way to interface to cycle measuring code */
|
||||
void do_test(int *intf)
|
||||
{
|
||||
rc_fun f = (rc_fun) intf;
|
||||
f((row_t)mstart, rcomp, ccomp);
|
||||
}
|
||||
|
||||
void time_rc(rc_fun f, rc_comp_t rc_type, char *descr, double *cycp)
|
||||
{
|
||||
int i;
|
||||
int *intf = (int *) f;
|
||||
double t, cme;
|
||||
t = 0;
|
||||
if (verbose) printf("函数:%s\n", descr);
|
||||
if (test_rc(f, stdout, rc_type)) {
|
||||
make_CPU_busy();
|
||||
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||
t += fcyc(do_test, intf);
|
||||
t = t/MAX_ITER_COUNT;
|
||||
cme = t/(N*N);
|
||||
if (verbose) printf(" 总周期数 = %.2f, 平均周期/元素 = %.2f\n",
|
||||
t, cme);
|
||||
if (cycp)
|
||||
*cycp = cme;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the grade achieved by function */
|
||||
static double compute_score(double cmeas, double cref, double cbest)
|
||||
{
|
||||
double sbest = cref/cbest;
|
||||
double smeas = cref/cmeas;
|
||||
if (smeas < 0.1*(sbest-1)+1)
|
||||
return 0;
|
||||
if (smeas > 1.1*(sbest-1)+1)
|
||||
return 120;
|
||||
return 100*((smeas-1.0)/(sbest-1.0) + 0.1);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
double cme;
|
||||
double cme_c,cme_rc;
|
||||
int EnableScore=0;
|
||||
|
||||
if (argc == 3)
|
||||
{
|
||||
EnableScore = 1;
|
||||
verbose = 0;
|
||||
}
|
||||
init_tests();
|
||||
set_fcyc_clear_cache(1); /* Set so that clears cache between runs */
|
||||
for (i = 0; rc_fun_tab[i].f != NULL; i++) {
|
||||
cme = 100.0;
|
||||
time_rc(rc_fun_tab[i].f,
|
||||
rc_fun_tab[i].rc_type, rc_fun_tab[i].descr, &cme);
|
||||
if (i == 0)
|
||||
{
|
||||
cme_c = cme;
|
||||
if (EnableScore==0)
|
||||
{
|
||||
printf(" 最高\"列求和\"得分 ======================== %.0f\n",
|
||||
compute_score(cme, cstandard[0].cref, cstandard[0].cbest));
|
||||
}
|
||||
}
|
||||
if (i == 1)
|
||||
{
|
||||
cme_rc = cme;
|
||||
if (EnableScore==0)
|
||||
{
|
||||
printf(" 最高\"行和列求和\"得分 ====================== %.0f\n",
|
||||
compute_score(cme, cstandard[1].cref, cstandard[1].cbest));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (EnableScore)
|
||||
printf("%.2f\t %.0f\t %.2f\t %.0f\t 0\t 0\n",cme_c,compute_score(cme_c, cstandard[0].cref, cstandard[0].cbest),
|
||||
cme_rc,compute_score(cme_rc, cstandard[1].cref, cstandard[1].cbest));
|
||||
return 0;
|
||||
}
|
||||
229
perflab/poly/clock.c
Normal file
229
perflab/poly/clock.c
Normal file
@@ -0,0 +1,229 @@
|
||||
/* clock.c
|
||||
* Retrofitted to use thread-specific timers
|
||||
* and to get clock information from /proc/cpuinfo
|
||||
* (C) R. E. Bryant, 2010
|
||||
*
|
||||
*/
|
||||
|
||||
/* When this constant is not defined, uses time stamp counter */
|
||||
#define USE_POSIX 0
|
||||
|
||||
/* Choice to use cpu_gettime call or Intel time stamp counter directly */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <intrin.h>
|
||||
//#include <intrinsics.h>
|
||||
#include <windows.h>
|
||||
#include <time.h>
|
||||
#include "clock.h"
|
||||
|
||||
/* Use x86 cycle counter */
|
||||
|
||||
/* Initialize the cycle counter */
|
||||
static unsigned cyc_hi = 0;
|
||||
static unsigned cyc_lo = 0;
|
||||
|
||||
/* Set *hi and *lo to the high and low order bits of the cycle counter.
|
||||
Implementation requires assembly code to use the rdtsc instruction. */
|
||||
void access_counter(unsigned *hi, unsigned *lo)
|
||||
{
|
||||
|
||||
long long counter;
|
||||
|
||||
counter = __rdtsc();
|
||||
(*hi) = (unsigned int)(counter >> 32);
|
||||
(*lo) = (unsigned int)counter;
|
||||
/*
|
||||
|
||||
LARGE_INTEGER lPerformanceCount;
|
||||
|
||||
QueryPerformanceCounter(&lPerformanceCount);
|
||||
(*hi) = (unsigned int)lPerformanceCount.HighPart;
|
||||
(*lo) = (unsigned int)lPerformanceCount.LowPart;
|
||||
// printf("%08X %08X\n",(*hi),(*lo));
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/* Record the current value of the cycle counter. */
|
||||
void start_counter()
|
||||
{
|
||||
access_counter(&cyc_hi, &cyc_lo);
|
||||
}
|
||||
|
||||
/* Return the number of cycles since the last call to start_counter. */
|
||||
double get_counter()
|
||||
{
|
||||
unsigned ncyc_hi, ncyc_lo;
|
||||
unsigned hi, lo, borrow;
|
||||
double result;
|
||||
|
||||
/* Get cycle counter */
|
||||
access_counter(&ncyc_hi, &ncyc_lo);
|
||||
|
||||
/* Do double precision subtraction */
|
||||
lo = ncyc_lo - cyc_lo;
|
||||
borrow = cyc_lo > ncyc_lo;
|
||||
hi = ncyc_hi - cyc_hi - borrow;
|
||||
result = (double) hi * (1 << 30) * 4 + lo;
|
||||
return result;
|
||||
}
|
||||
void make_CPU_busy(void)
|
||||
{
|
||||
volatile double old_tick,new_tick;
|
||||
start_counter();
|
||||
old_tick = get_counter();
|
||||
new_tick = get_counter();
|
||||
while (new_tick - old_tick < 1000000000)
|
||||
new_tick = get_counter();
|
||||
}
|
||||
|
||||
//CPU的频率
|
||||
double mhz(int verbose)
|
||||
{
|
||||
LARGE_INTEGER lFrequency;
|
||||
LARGE_INTEGER lPerformanceCount_Start;
|
||||
LARGE_INTEGER lPerformanceCount_End;
|
||||
double mhz;
|
||||
double fTime;
|
||||
__int64 _i64StartCpuCounter;
|
||||
__int64 _i64EndCpuCounter;
|
||||
//On a multiprocessor machine, it should not matter which processor is called.
|
||||
//However, you can get different results on different processors due to bugs in
|
||||
//the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function.
|
||||
HANDLE hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
|
||||
//主板上高精度定时器的晶振频率
|
||||
//这个定时器应该就是一片8253或者8254
|
||||
//在intel ich7中集成了8254
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
// if (verbose>0)
|
||||
// printf("高精度定时器的晶振频率:%1.0fHz.\n",(double)lFrequency.QuadPart);
|
||||
|
||||
//这个定时器每经过一个时钟周期,其计数器会+1
|
||||
QueryPerformanceCounter(&lPerformanceCount_Start);
|
||||
|
||||
//RDTSC指令:获取CPU经历的时钟周期数
|
||||
_i64StartCpuCounter=__rdtsc();
|
||||
|
||||
//延时长一点,误差会小一点
|
||||
//int nTemp=100000;
|
||||
//while (--nTemp);
|
||||
Sleep(200);
|
||||
|
||||
QueryPerformanceCounter(&lPerformanceCount_End);
|
||||
|
||||
_i64EndCpuCounter=__rdtsc();
|
||||
|
||||
//f=1/T => f=计数次数/(计数次数*T)
|
||||
//这里的“计数次数*T”就是时间差
|
||||
fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart)
|
||||
/(double)lFrequency.QuadPart;
|
||||
|
||||
mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0);
|
||||
if (verbose>0)
|
||||
printf("CPU频率为:%1.6fMHz.\n",mhz);
|
||||
return mhz;
|
||||
}
|
||||
|
||||
double CPU_Factor1(void)
|
||||
{
|
||||
double result;
|
||||
int i,j,k,ii,jj,kk;
|
||||
LARGE_INTEGER lStart,lEnd;
|
||||
LARGE_INTEGER lFrequency;
|
||||
HANDLE hThread;
|
||||
double fTime;
|
||||
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
|
||||
ii = 43273;
|
||||
kk = 1238;
|
||||
result = 1;
|
||||
jj = 1244;
|
||||
|
||||
hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
QueryPerformanceCounter(&lStart);
|
||||
//_asm("cpuid");
|
||||
start_counter();
|
||||
for (i=0;i<100;i++)
|
||||
for (j=0;j<1000;j++)
|
||||
for (k=0;k<1000;k++)
|
||||
kk += kk*ii+jj;
|
||||
|
||||
result = get_counter();
|
||||
QueryPerformanceCounter(&lEnd);
|
||||
fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart);
|
||||
printf("CPU运行时间为%f",result);
|
||||
printf("\t %f\n",fTime);
|
||||
return result;
|
||||
}
|
||||
|
||||
double CPU_Factor(void)
|
||||
{
|
||||
double frequency;
|
||||
double multiplier = 1000 * 1000 * 1000;//nano
|
||||
LARGE_INTEGER lFrequency;
|
||||
LARGE_INTEGER start,stop;
|
||||
HANDLE hThread;
|
||||
int i;
|
||||
const int gigahertz= 1000*1000*1000;
|
||||
const int known_instructions_per_loop = 27317;
|
||||
|
||||
int iterations = 100000000;
|
||||
int g = 0;
|
||||
double normal_ticks_per_second;
|
||||
double ticks;
|
||||
double time;
|
||||
double loops_per_sec;
|
||||
double instructions_per_loop;
|
||||
double ratio;
|
||||
double actual_freq;
|
||||
|
||||
QueryPerformanceFrequency(&lFrequency);
|
||||
frequency = (double)lFrequency.QuadPart;
|
||||
|
||||
hThread=GetCurrentThread();
|
||||
SetThreadAffinityMask(hThread,0x1);
|
||||
QueryPerformanceCounter(&start);
|
||||
for( i = 0; i < iterations; i++)
|
||||
{
|
||||
g++;
|
||||
g++;
|
||||
g++;
|
||||
g++;
|
||||
}
|
||||
QueryPerformanceCounter(&stop);
|
||||
|
||||
//normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199
|
||||
normal_ticks_per_second = frequency * 1000;
|
||||
ticks = (double)((double)stop.QuadPart - (double)start.QuadPart);
|
||||
time = (ticks * multiplier) /frequency;
|
||||
loops_per_sec = iterations / (time/multiplier);
|
||||
instructions_per_loop = normal_ticks_per_second / loops_per_sec;
|
||||
|
||||
ratio = (instructions_per_loop / known_instructions_per_loop);
|
||||
actual_freq = normal_ticks_per_second / ratio;
|
||||
/*
|
||||
actual_freq = normal_ticks_per_second / ratio;
|
||||
actual_freq = known_instructions_per_loop*iterations*multiplier/time;
|
||||
|
||||
2293 = x/time;
|
||||
|
||||
2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000
|
||||
loops_per_sec = iterations*frequency / ticks
|
||||
|
||||
instructions_per_loop = / loops_per_sec;
|
||||
*/
|
||||
printf("Perf counter freq: %f\n", normal_ticks_per_second);
|
||||
printf("Loops per sec: %f\n", loops_per_sec);
|
||||
printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop);
|
||||
printf("Presumed freq: %f\n", actual_freq);
|
||||
printf("ratio: %f\n", ratio);
|
||||
printf("time=%f\n",time);
|
||||
return ratio;
|
||||
}
|
||||
12
perflab/poly/clock.h
Normal file
12
perflab/poly/clock.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/* Routines for using cycle counter */
|
||||
|
||||
/* Start the counter */
|
||||
void start_counter(void);
|
||||
|
||||
/* Get # cycles since counter started. Returns 1e20 if detect timing anomaly */
|
||||
double get_counter(void);
|
||||
void make_CPU_busy(void);
|
||||
|
||||
double mhz(int verbose);
|
||||
double CPU_Factor(void);
|
||||
//double GetCpuClock(void);
|
||||
117
perflab/poly/cpe.c
Normal file
117
perflab/poly/cpe.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/* Compute CPE for function */
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "fcyc.h"
|
||||
#include "cpe.h"
|
||||
#include "lsquare.h"
|
||||
#include "clock.h"
|
||||
|
||||
/* Find number of cycles taken by function.
|
||||
Do this by running number of trials until best two within TOL of
|
||||
each other
|
||||
*/
|
||||
double measure_function(elem_fun_t f, int cnt)
|
||||
{
|
||||
/* Need to fudge fact that fcyc wants a function taking an
|
||||
long int *, while our function takes an long int */
|
||||
test_funct tf = (test_funct) f;
|
||||
return fcyc(tf, (int *) (int) cnt);
|
||||
}
|
||||
|
||||
#define MAXCNT 100
|
||||
|
||||
#define LIM RAND_MAX
|
||||
|
||||
/* LCM of unrolling degree */
|
||||
#ifdef USE_UNI
|
||||
#define UNROLL 32
|
||||
#else /* USE_UNI */
|
||||
#define UNROLL 1
|
||||
#endif
|
||||
|
||||
static long int get_cnt(long int index, long int samples,
|
||||
long int maxcnt, sample_t smethod, double bias)
|
||||
{
|
||||
long int mincnt = (long int) (bias*maxcnt);
|
||||
double weight;
|
||||
long int val;
|
||||
switch (smethod) {
|
||||
case UNI_SAMPLE:
|
||||
weight = (double) index/(samples - 1);
|
||||
break;
|
||||
case RAN_SAMPLE:
|
||||
weight = (double) (rand() % LIM) / (double) (LIM-1);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Undefined sampling method %d\n", smethod);
|
||||
exit(1);
|
||||
}
|
||||
val = mincnt + weight*(maxcnt-mincnt);
|
||||
return UNROLL * (val/UNROLL);
|
||||
}
|
||||
|
||||
#define SEED 31415
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||
specified number of sample points.
|
||||
If data_file, then print data so that can plot points with Excel
|
||||
smethod determines method for generating samples
|
||||
*/
|
||||
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||
sample_t smethod, double bias, long int verbose)
|
||||
{
|
||||
long int i;
|
||||
long int cnt;
|
||||
double cpe;
|
||||
double overhead = 0;
|
||||
double *cnt_val = calloc(samples, sizeof(double));
|
||||
double *cycle_val = calloc(samples, sizeof(double));
|
||||
/* Do the samples */
|
||||
|
||||
srand(SEED);
|
||||
for (i = 0; i < samples; i++) {
|
||||
cnt = get_cnt(i, samples, maxcnt, smethod, bias);
|
||||
cnt_val[i] = cnt;
|
||||
cycle_val[i] = measure_function(f, cnt);
|
||||
if (cycle_val[i] < 1.0) {
|
||||
fprintf(stderr, "Got %.2f cycles for count %ld\n", cycle_val[i], cnt);
|
||||
}
|
||||
}
|
||||
/* Fit data */
|
||||
cpe = ls_slope(cnt_val, cycle_val, samples);
|
||||
if (data_file)
|
||||
overhead = ls_intercept(cnt_val, cycle_val, samples);
|
||||
if (data_file && verbose > 1) {
|
||||
/* Print x values */
|
||||
fprintf(data_file, "Cnt\t0");
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.0f", cnt_val[i]);
|
||||
fprintf(data_file, "\n");
|
||||
/* Print y values */
|
||||
fprintf(data_file, "Cycs.\t");
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.2f", cycle_val[i]);
|
||||
fprintf(data_file, "\n");
|
||||
/* Print ax*b values */
|
||||
fprintf(data_file, "Interp.\t%.2f", overhead);
|
||||
for (i = 0; i < samples; i++)
|
||||
fprintf(data_file, "\t%.2f", cpe*cnt_val[i]+overhead);
|
||||
fprintf(data_file, "\n");
|
||||
}
|
||||
if (data_file && verbose) {
|
||||
/* Print results */
|
||||
fprintf(data_file, "cpe\t%.2f\tovhd\t%.2f\tavgerr\t\\%.3f\tmaxerr\t\\%.3f\n",
|
||||
cpe, overhead,
|
||||
ls_error(cnt_val, cycle_val, samples, LS_AVG),
|
||||
ls_error(cnt_val, cycle_val, samples, LS_MAX));
|
||||
}
|
||||
free(cnt_val);
|
||||
free(cycle_val);
|
||||
return cpe;
|
||||
}
|
||||
|
||||
/* Use default parameters */
|
||||
double find_cpe(elem_fun_t f, int maxcnt)
|
||||
{
|
||||
return find_cpe_full(f, maxcnt, 100, stdout, RAN_SAMPLE, 0.3, 0);
|
||||
}
|
||||
31
perflab/poly/cpe.h
Normal file
31
perflab/poly/cpe.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/* Compute CPE for function */
|
||||
|
||||
/* Compute for function that is linear in some parameter cnt */
|
||||
typedef void (*elem_fun_t)(int);
|
||||
|
||||
/* Different ways of finding samples
|
||||
UNI_SAMPLE: samples uniformly spaced between bias*maxcnt and maxcnt
|
||||
RAN_SAMPLE: samples randomly selected between bias*maxcnt and maxcnt
|
||||
*/
|
||||
|
||||
typedef enum {UNI_SAMPLE, RAN_SAMPLE}
|
||||
sample_t;
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt.
|
||||
Uses default parameters
|
||||
*/
|
||||
double find_cpe(elem_fun_t f, int maxcnt);
|
||||
|
||||
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||
specified number of sample points.
|
||||
If data_file, then print data so that can plot points with Excel
|
||||
smethod determines method for generating samples
|
||||
*/
|
||||
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||
sample_t smethod, double bias, long int verbose);
|
||||
|
||||
/* Find number of cycles taken by function.
|
||||
Do this by running number of trials until best two within TOL (2%) of
|
||||
each other
|
||||
*/
|
||||
double measure_function(elem_fun_t f, int cnt);
|
||||
223
perflab/poly/fcyc.c
Normal file
223
perflab/poly/fcyc.c
Normal file
@@ -0,0 +1,223 @@
|
||||
/* Compute time used by function f */
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "clock.h"
|
||||
#include "fcyc.h"
|
||||
|
||||
#define K 3
|
||||
#define MAXSAMPLES 20
|
||||
#define EPSILON 0.01
|
||||
#define COMPENSATE 0
|
||||
#define CLEAR_CACHE 0
|
||||
#define CACHE_BYTES (1<<19)
|
||||
#define CACHE_BLOCK 32
|
||||
#define MAX_ITER_TIMES 10
|
||||
|
||||
static long int kbest = K;
|
||||
static long int compensate = COMPENSATE;
|
||||
static long int clear_cache = CLEAR_CACHE;
|
||||
static long int maxsamples = MAXSAMPLES;
|
||||
static double epsilon = EPSILON;
|
||||
static long int cache_bytes = CACHE_BYTES;
|
||||
static long int cache_block = CACHE_BLOCK;
|
||||
|
||||
static long int *cache_buf = NULL;
|
||||
|
||||
static double *values = NULL;
|
||||
static long int samplecount = 0;
|
||||
|
||||
#define KEEP_VALS 0
|
||||
#define KEEP_SAMPLES 0
|
||||
|
||||
#if KEEP_SAMPLES
|
||||
static double *samples = NULL;
|
||||
#endif
|
||||
|
||||
/* Start new sampling process */
|
||||
static void init_sampler(void)
|
||||
{
|
||||
if (values)
|
||||
free(values);
|
||||
values = calloc(kbest, sizeof(double));
|
||||
#if KEEP_SAMPLES
|
||||
if (samples)
|
||||
free(samples);
|
||||
/* Allocate extra for wraparound analysis */
|
||||
samples = calloc(maxsamples+kbest, sizeof(double));
|
||||
#endif
|
||||
samplecount = 0;
|
||||
}
|
||||
|
||||
/* Add new sample. */
|
||||
static void add_sample(double val)
|
||||
{
|
||||
long int pos = 0;
|
||||
if (samplecount < kbest) {
|
||||
pos = samplecount;
|
||||
values[pos] = val;
|
||||
} else if (val < values[kbest-1]) {
|
||||
pos = kbest-1;
|
||||
values[pos] = val;
|
||||
}
|
||||
#if KEEP_SAMPLES
|
||||
samples[samplecount] = val;
|
||||
#endif
|
||||
samplecount++;
|
||||
/* Insertion sort */
|
||||
while (pos > 0 && values[pos-1] > values[pos]) {
|
||||
double temp = values[pos-1];
|
||||
values[pos-1] = values[pos];
|
||||
values[pos] = temp;
|
||||
pos--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Have kbest minimum measurements converged within epsilon? */
|
||||
static long int has_converged(void)
|
||||
{
|
||||
return
|
||||
(samplecount >= kbest) &&
|
||||
((1 + epsilon)*values[0] >= values[kbest-1]);
|
||||
}
|
||||
|
||||
/* Code to clear cache */
|
||||
|
||||
|
||||
static volatile long int sink = 0;
|
||||
|
||||
static void clear(void)
|
||||
{
|
||||
long int x = sink;
|
||||
long int *cptr, *cend;
|
||||
long int incr = cache_block/sizeof(long int);
|
||||
if (!cache_buf) {
|
||||
cache_buf = malloc(cache_bytes);
|
||||
if (!cache_buf) {
|
||||
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
cptr = (long int *) cache_buf;
|
||||
cend = cptr + cache_bytes/sizeof(long int);
|
||||
while (cptr < cend) {
|
||||
x += *cptr;
|
||||
cptr += incr;
|
||||
}
|
||||
sink = x;
|
||||
}
|
||||
|
||||
double fcyc(test_funct f, int *params)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
init_sampler();
|
||||
if (compensate) {
|
||||
do {
|
||||
double cyc;
|
||||
if (clear_cache)
|
||||
clear();
|
||||
start_counter();
|
||||
f(params);
|
||||
cyc = get_counter();
|
||||
if (cyc > 0.0)
|
||||
add_sample(cyc);
|
||||
} while (!has_converged() && samplecount < maxsamples);
|
||||
} else {
|
||||
do {
|
||||
double cyc;
|
||||
if (clear_cache)
|
||||
clear();
|
||||
start_counter();
|
||||
for (i=0;i<MAX_ITER_TIMES;i++)
|
||||
f(params);
|
||||
cyc = get_counter()/MAX_ITER_TIMES;
|
||||
if (cyc > 0.0)
|
||||
add_sample(cyc);
|
||||
|
||||
} while (!has_converged() && samplecount < maxsamples);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
{
|
||||
long int i;
|
||||
printf(" %ld smallest values: [", kbest);
|
||||
for (i = 0; i < kbest; i++)
|
||||
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
|
||||
}
|
||||
#endif
|
||||
result = values[0];
|
||||
#if !KEEP_VALS
|
||||
free(values);
|
||||
values = NULL;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/***********************************************************/
|
||||
/* Set the various parameters used by measurement routines */
|
||||
|
||||
|
||||
/* When set, will run code to clear cache before each measurement
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_clear_cache(long int clear)
|
||||
{
|
||||
clear_cache = clear;
|
||||
}
|
||||
|
||||
/* Set size of cache to use when clearing cache
|
||||
Default = 1<<19 (512KB)
|
||||
*/
|
||||
void set_fcyc_cache_size(long int bytes)
|
||||
{
|
||||
if (bytes != cache_bytes) {
|
||||
cache_bytes = bytes;
|
||||
if (cache_buf) {
|
||||
free(cache_buf);
|
||||
cache_buf = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Set size of cache block
|
||||
Default = 32
|
||||
*/
|
||||
void set_fcyc_cache_block(long int bytes) {
|
||||
cache_block = bytes;
|
||||
}
|
||||
|
||||
|
||||
/* When set, will attempt to compensate for timer interrupt overhead
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_compensate(long int compensate_arg)
|
||||
{
|
||||
compensate = compensate_arg;
|
||||
}
|
||||
|
||||
/* Value of K in K-best
|
||||
Default = 3
|
||||
*/
|
||||
void set_fcyc_k(long int k)
|
||||
{
|
||||
kbest = k;
|
||||
}
|
||||
|
||||
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||
When exceeded, just return best sample found.
|
||||
Default = 20
|
||||
*/
|
||||
void set_fcyc_maxsamples(long int maxsamples_arg)
|
||||
{
|
||||
maxsamples = maxsamples_arg;
|
||||
}
|
||||
|
||||
/* Tolerance required for K-best
|
||||
Default = 0.01
|
||||
*/
|
||||
void set_fcyc_epsilon(double epsilon_arg)
|
||||
{
|
||||
epsilon = epsilon_arg;
|
||||
}
|
||||
52
perflab/poly/fcyc.h
Normal file
52
perflab/poly/fcyc.h
Normal file
@@ -0,0 +1,52 @@
|
||||
|
||||
/* Fcyc measures the speed of any "test function." Such a function
|
||||
is passed a list of integer parameters, which it may interpret
|
||||
in any way it chooses.
|
||||
*/
|
||||
|
||||
typedef void (*test_funct)(long int *);
|
||||
|
||||
/* Compute number of cycles used by function f on given set of parameters */
|
||||
double fcyc(test_funct f, int* params);
|
||||
|
||||
/***********************************************************/
|
||||
/* Set the various parameters used by measurement routines */
|
||||
|
||||
|
||||
/* When set, will run code to clear cache before each measurement
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_clear_cache(long int clear);
|
||||
|
||||
/* Set size of cache to use when clearing cache
|
||||
Default = 1<<19 (512KB)
|
||||
*/
|
||||
void set_fcyc_cache_size(long int bytes);
|
||||
|
||||
/* Set size of cache block
|
||||
Default = 32
|
||||
*/
|
||||
void set_fcyc_cache_block(long int bytes);
|
||||
|
||||
/* When set, will attempt to compensate for timer interrupt overhead
|
||||
Default = 0
|
||||
*/
|
||||
void set_fcyc_compensate(long int compensate);
|
||||
|
||||
/* Value of K in K-best
|
||||
Default = 3
|
||||
*/
|
||||
void set_fcyc_k(long int k);
|
||||
|
||||
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||
When exceeded, just return best sample found.
|
||||
Default = 20
|
||||
*/
|
||||
void set_fcyc_maxsamples(long int maxsamples);
|
||||
|
||||
/* Tolerance required for K-best
|
||||
Default = 0.01
|
||||
*/
|
||||
void set_fcyc_epsilon(double epsilon);
|
||||
|
||||
|
||||
94
perflab/poly/lsquare.c
Normal file
94
perflab/poly/lsquare.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/* Compute least squares fit of set of data points */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "lsquare.h"
|
||||
|
||||
typedef struct {
|
||||
double sum_x;
|
||||
double sum_y;
|
||||
double sum_xx;
|
||||
double sum_xy;
|
||||
} ls_stat_t;
|
||||
|
||||
/* Accumulate various sums of the data */
|
||||
static void ls_stats(double *xval, double *yval, int cnt, ls_stat_t *statp)
|
||||
{
|
||||
int i;
|
||||
statp->sum_x = 0.0;
|
||||
statp->sum_y = 0.0;
|
||||
statp->sum_xx = 0.0;
|
||||
statp->sum_xy = 0.0;
|
||||
for (i = 0; i < cnt; i++) {
|
||||
double x = xval[i];
|
||||
double y = yval[i];
|
||||
statp->sum_x += x;
|
||||
statp->sum_y += y;
|
||||
statp->sum_xx += x * x;
|
||||
statp->sum_xy += x * y;
|
||||
}
|
||||
}
|
||||
|
||||
double ls_slope(double *xval, double *yval, int cnt)
|
||||
{
|
||||
double slope;
|
||||
ls_stat_t stat;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
return slope;
|
||||
}
|
||||
|
||||
double ls_intercept(double *xval, double *yval, int cnt)
|
||||
{
|
||||
double intercept;
|
||||
ls_stat_t stat;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
return intercept;
|
||||
}
|
||||
|
||||
static double rel_err(double x, double y, double slope, double intercept)
|
||||
{
|
||||
double pred_y = slope*x + intercept;
|
||||
double offset = y - pred_y;
|
||||
if (offset < 0)
|
||||
offset = -offset;
|
||||
if (pred_y == 0)
|
||||
return offset;
|
||||
return offset/pred_y;
|
||||
}
|
||||
|
||||
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype)
|
||||
{
|
||||
double slope;
|
||||
double intercept;
|
||||
ls_stat_t stat;
|
||||
int i;
|
||||
double num, denom;
|
||||
ls_stats(xval, yval, cnt, &stat);
|
||||
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||
num = denom = 0;
|
||||
for (i = 0; i < cnt; i++) {
|
||||
double e = rel_err(xval[i], yval[i], slope, intercept);
|
||||
switch (etype) {
|
||||
case LS_AVG:
|
||||
num += e;
|
||||
denom++;
|
||||
break;
|
||||
case LS_MAX:
|
||||
if (num < e)
|
||||
num = e;
|
||||
denom = 1;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Invalid error type: %d\n", etype);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return num/denom;
|
||||
}
|
||||
11
perflab/poly/lsquare.h
Normal file
11
perflab/poly/lsquare.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/* Compute least squares fit of set of data points */
|
||||
|
||||
/* Fit is of form y = mx + b. m is slope, b is intercept */
|
||||
double ls_slope(double *xval, double *yval, int cnt);
|
||||
double ls_intercept(double *xval, double *yval, int cnt);
|
||||
|
||||
typedef enum {LS_AVG, LS_MAX} ls_err_t;
|
||||
|
||||
/* Determine error (either absolute or average) of least squares fit */
|
||||
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype);
|
||||
|
||||
125
perflab/poly/poly.c
Normal file
125
perflab/poly/poly.c
Normal file
@@ -0,0 +1,125 @@
|
||||
/**************************************************************************
|
||||
多项式计算函数。按下面的要求编辑此文件:
|
||||
1. 将你的学号、姓名,以注释的方式写到下面;
|
||||
2. 实现不同版本的多项式计算函数;
|
||||
3. 编辑peval_fun_rec peval_fun_tab数组,将你的最好的答案
|
||||
(最小CPE、最小C10)作为数组的前两项
|
||||
***************************************************************************/
|
||||
|
||||
/*
|
||||
学号:201209054233
|
||||
姓名:夜半加班狂
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
typedef int (*peval_fun)(int*, int, int);
|
||||
|
||||
typedef struct {
|
||||
peval_fun f;
|
||||
char *descr;
|
||||
} peval_fun_rec, *peval_fun_ptr;
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
Edit this comment to indicate your name and Andrew ID
|
||||
#ifdef ASSIGN
|
||||
Submission by Harry Q. Bovik, bovik@andrew.cmu.edu
|
||||
#else
|
||||
Instructor's version.
|
||||
Created by Randal E. Bryant, Randy.Bryant@cs.cmu.edu, 10/07/02
|
||||
#endif
|
||||
***************************************************************************/
|
||||
|
||||
/*
|
||||
实现一个指定的常系数多项式计算
|
||||
第一次,请直接运行程序,以便获知你需要实现的常系数是啥
|
||||
*/
|
||||
int const_poly_eval(int *not_use, int not_use2, int x)
|
||||
{
|
||||
int result = 0;
|
||||
/* int i;
|
||||
int xpwr = 1; // x的幂次
|
||||
int a[4] = {21,90,42,88};
|
||||
for (i = 0; i <= 3; i++) {
|
||||
result += a[i]*xpwr;
|
||||
xpwr *= x;
|
||||
}
|
||||
*/
|
||||
// 90 = 64 + 32 - 4 - 2
|
||||
// 42 = 32 + 8 + 2
|
||||
// 88 = 64 + 16 + 8
|
||||
int x64,x32,x16,x8,x4,x2;
|
||||
|
||||
x64 = x << 6;
|
||||
x32 = x << 5;
|
||||
x16 = x << 4;
|
||||
x8 = x << 3;
|
||||
x4 = x << 2;
|
||||
x2 = x << 1;
|
||||
result = 21 + x64+x32-x4-x2 + ((x32+x8+x2) + (x64+x16+x8)*x)*x;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* 多项式计算函数。注意:这个只是一个参考实现,你需要实现自己的版本 */
|
||||
|
||||
/*
|
||||
友情提示:lcc支持ATT格式的嵌入式汇编,例如
|
||||
|
||||
_asm("movl %eax,%ebx");
|
||||
_asm("pushl %edx");
|
||||
|
||||
可以在lcc中project->configuration->Compiler->Code Generation->Generate .asm,
|
||||
将其选中后,可以在lcc目录下面生成对应程序的汇编代码实现。通过查看汇编文件,
|
||||
你可以了解编译器是如何实现你的代码的。有些实现可能非常低效。
|
||||
你可以在适当的地方加入嵌入式汇编,来大幅度提高计算性能。
|
||||
*/
|
||||
|
||||
int poly_eval(int *a, int degree, int x)
|
||||
{
|
||||
int result = 0;
|
||||
int i;
|
||||
int xpwr = 1; /* x的幂次 */
|
||||
// printf("阶=%d\n",degree);
|
||||
for (i = 0; i <= degree; i++) {
|
||||
result += a[i]*xpwr;
|
||||
xpwr *= x;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
这个表格包含多个数组元素,每一组元素(函数名字, "描述字符串")
|
||||
将你认为最好的两个实现,放在最前面。
|
||||
比如:
|
||||
{my_poly_eval1, "超级垃圾实现"},
|
||||
{my_poly_eval2, "好一点的实现"},
|
||||
*/
|
||||
|
||||
peval_fun_rec peval_fun_tab[] =
|
||||
{
|
||||
|
||||
/* 第一项,应当是你写的最好CPE的函数实现 */
|
||||
{poly_eval, "夜半加班狂的CPE"},
|
||||
/* 第二项,应当是你写的在10阶时具有最好性能的实现 */
|
||||
{poly_eval, "夜半加班狂的10阶实现"},
|
||||
|
||||
{poly_eval, "poly_eval: 参考实现"},
|
||||
|
||||
/* 下面的代码不能修改或者删除!!表明数组列表结束 */
|
||||
{NULL, ""}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
17
perflab/poly/poly.h
Normal file
17
perflab/poly/poly.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
Integer polynomial evaluation.
|
||||
Polynomial given by array of coefficients a[0] ... a[degree].
|
||||
Want to compute SUM(i=0,degree) a[i] * x^i
|
||||
*/
|
||||
|
||||
/* Type declaration for a polynomial evaluation function */
|
||||
typedef int (*peval_fun)(int*, int, int);
|
||||
|
||||
typedef struct {
|
||||
peval_fun f;
|
||||
char *descr;
|
||||
} peval_fun_rec, *peval_fun_ptr;
|
||||
|
||||
/* Table of polynomial functions to test. Null terminated */
|
||||
extern peval_fun_rec peval_fun_tab[];
|
||||
|
||||
28
perflab/poly/poly/poly.sln
Normal file
28
perflab/poly/poly/poly.sln
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "poly", "poly.vcxproj", "{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x64.Build.0 = Debug|x64
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x86.Build.0 = Debug|Win32
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x64.ActiveCfg = Release|x64
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x64.Build.0 = Release|x64
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x86.ActiveCfg = Release|Win32
|
||||
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
123
perflab/poly/poly/poly.vcxproj
Normal file
123
perflab/poly/poly/poly.vcxproj
Normal file
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}</ProjectGuid>
|
||||
<RootNamespace>poly</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\clock.c" />
|
||||
<ClCompile Include="..\cpe.c" />
|
||||
<ClCompile Include="..\fcyc.c" />
|
||||
<ClCompile Include="..\lsquare.c" />
|
||||
<ClCompile Include="..\poly.c" />
|
||||
<ClCompile Include="..\poly_test.c" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
37
perflab/poly/poly/poly.vcxproj.filters
Normal file
37
perflab/poly/poly/poly.vcxproj.filters
Normal file
@@ -0,0 +1,37 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="源文件">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="头文件">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="资源文件">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\clock.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\cpe.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\fcyc.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\lsquare.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\poly.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\poly_test.c">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
302
perflab/poly/poly_test.c
Normal file
302
perflab/poly/poly_test.c
Normal file
@@ -0,0 +1,302 @@
|
||||
/* Test setup for polynomial evaluation. Do not change this. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
//#include <random.h>
|
||||
#include "poly.h"
|
||||
#include "cpe.h"
|
||||
#include "clock.h"
|
||||
|
||||
double CPU_Mhz;
|
||||
|
||||
/* Degree for fixed evaluation */
|
||||
#define FIXDEGREE 10
|
||||
/* Largest degree polynomial tested */
|
||||
#define MAXDEGREE 2000
|
||||
static int coeff[MAXDEGREE+1];
|
||||
|
||||
#define MAX_ITER_COUNT 100
|
||||
|
||||
#define REF_CPU_MHZ 2292.6 // 这是我的处理器主频
|
||||
|
||||
/* Define performance standards */
|
||||
static struct {
|
||||
double cref; /* Cycles taken by reference solution */
|
||||
double cbest; /* Cycles taken by our best implementation */
|
||||
} cstandard[3] =
|
||||
{{4.00, 1.75}, /* CPE */
|
||||
{50, 43}, /* C(10) */
|
||||
{57,31} /* 常系数多项式计算 */
|
||||
};
|
||||
|
||||
int coeff_const[4];
|
||||
|
||||
/* Should I print extra information? */
|
||||
int verbose = 0;
|
||||
|
||||
/* Standard value for polynomial evaluation */
|
||||
static int xval;
|
||||
|
||||
/* How many degrees should I compute reference value for? */
|
||||
#define DCNT 20
|
||||
|
||||
/* Correct value of polynomial evaluation for range of different degrees */
|
||||
/* pval[i] contains evaluation for degree MAXDEGREE-i */
|
||||
static int pval[DCNT];
|
||||
/* fixval contains evaluation for degree FIXDEGREE */
|
||||
static int fixval;
|
||||
static int fixval_const;
|
||||
|
||||
static void init_const_poly(void);
|
||||
static void init(void);
|
||||
extern int const_poly_eval(int *not_use, int not_use2, int x);
|
||||
void run_fun_const(int degree);
|
||||
static double compute_score(double cmeas, double cref, double cbest);
|
||||
unsigned long rand1_h,rand1_l,rand_div;
|
||||
void rand_step(unsigned long divv);
|
||||
void GenerateRandomNumber(unsigned long divv);
|
||||
extern void make_CPU_busy(void);
|
||||
double run_poly_perf_test(void);
|
||||
|
||||
/* Reference implementation */
|
||||
static int ref_poly_eval(int *a, int degree, int x)
|
||||
{
|
||||
int result = 0;
|
||||
int i;
|
||||
int xpwr = 1; /* Successive powers of x */
|
||||
for (i = 0; i <= degree; i++) {
|
||||
result += a[i]*xpwr;
|
||||
xpwr *= x;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Initialize polynomial to constant values and compute reference values */
|
||||
static void init_const_poly(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0;i<4;i++)
|
||||
{
|
||||
GenerateRandomNumber(90);
|
||||
coeff_const[i] = rand_div+10;
|
||||
}
|
||||
|
||||
printf("你需要修改poly.c的const_poly_eval函数,实现下面的常数多项式计算!\n");
|
||||
printf("\tresult=%d+%d*x+%d*x^2+%d*x^3\n",coeff_const[0],coeff_const[1],coeff_const[2],coeff_const[3]);
|
||||
|
||||
fixval_const = ref_poly_eval(coeff_const, 3, xval);
|
||||
// printf("x=%d, fixval_const=%d\n",xval,fixval_const);
|
||||
|
||||
}
|
||||
|
||||
void test_const_poly(void)
|
||||
{
|
||||
int i;
|
||||
double fix_time=0;
|
||||
int my_cal = const_poly_eval(coeff_const, 3, xval);
|
||||
if (fixval_const != my_cal)
|
||||
{
|
||||
printf("常系数多项式计算const_poly_eval实现错误(x=%d),预期结果是%d,但是计算得到的是%d\n",xval,fixval_const,my_cal);
|
||||
exit(0);
|
||||
}
|
||||
fix_time = 0;
|
||||
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||
fix_time += measure_function(run_fun_const, 3);
|
||||
fix_time = fix_time / MAX_ITER_COUNT;
|
||||
printf(" 常系数多项式计算时间 = %.1f\n", fix_time);
|
||||
printf(" 最高的常系数多项式计算得分 ============== %.0f\n",
|
||||
compute_score(fix_time, cstandard[2].cref, cstandard[2].cbest));
|
||||
}
|
||||
|
||||
/* Initialize polynomial to random values and compute reference values */
|
||||
static void init(void)
|
||||
{
|
||||
int i;
|
||||
xval = rand();
|
||||
for (i = 0; i <= MAXDEGREE; i++)
|
||||
coeff[i] = rand();
|
||||
for (i = 0; i < DCNT; i++)
|
||||
pval[i] = ref_poly_eval(coeff, MAXDEGREE-i, xval);
|
||||
fixval = ref_poly_eval(coeff, FIXDEGREE, xval);
|
||||
}
|
||||
|
||||
/* Test function on standard test cases. */
|
||||
int test_poly(peval_fun f, FILE *rpt) {
|
||||
int i;
|
||||
int v;
|
||||
int ok = 1;
|
||||
for (i = 0; i < DCNT; i++) {
|
||||
v = f(coeff, MAXDEGREE-i, xval);
|
||||
if (v != pval[i]) {
|
||||
ok = 0;
|
||||
if (rpt) {
|
||||
fprintf(rpt,
|
||||
"错误!多项式计算不对!阶=%d时,计算的值是%d,而正确值是%d\n",
|
||||
MAXDEGREE-i, v, pval[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
v = f(coeff, FIXDEGREE, xval);
|
||||
if (v != fixval) {
|
||||
ok = 0;
|
||||
if (rpt) {
|
||||
fprintf(rpt,
|
||||
"错误!多项式计算不对!阶=%d时,计算的值是%d,而正确值是%d\n",
|
||||
FIXDEGREE, v, fixval);
|
||||
}
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* Fit into framework of cpe measuring code */
|
||||
static peval_fun pfun;
|
||||
|
||||
volatile int sink;
|
||||
/* Run pfun for given degree */
|
||||
void run_fun(int degree)
|
||||
{
|
||||
sink = pfun(coeff, degree, xval);
|
||||
}
|
||||
|
||||
volatile int sink_const;
|
||||
/* Run pfun for given degree */
|
||||
void run_fun_const(int degree)
|
||||
{
|
||||
sink_const = const_poly_eval(coeff_const, degree, xval);
|
||||
}
|
||||
|
||||
|
||||
/* Test and measure polynomial evaluation function. Set values
|
||||
of CPE and CFIX */
|
||||
void run_poly(peval_fun f, char *descr, double *cpep, double *cfixp)
|
||||
{
|
||||
int i;
|
||||
double cpe=0;
|
||||
double fix_time=0;
|
||||
pfun = f;
|
||||
printf("函数:%s\n", descr);
|
||||
if (test_poly(f, stdout)) {
|
||||
cpe = 0;
|
||||
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||
cpe += find_cpe(run_fun, MAXDEGREE);
|
||||
cpe = cpe/MAX_ITER_COUNT;
|
||||
fix_time = 0;
|
||||
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||
fix_time += measure_function(run_fun, FIXDEGREE);
|
||||
fix_time = fix_time/MAX_ITER_COUNT;
|
||||
printf(" CPE = %.2f\tC(%d) = %.1f\n", cpe,
|
||||
FIXDEGREE, fix_time);
|
||||
if (cpep)
|
||||
*cpep = cpe;
|
||||
if (cfixp)
|
||||
*cfixp = fix_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the grade achieved by function */
|
||||
static double compute_score(double cmeas, double cref, double cbest)
|
||||
{
|
||||
double sbest = cref/cbest;
|
||||
double smeas = cref/cmeas;
|
||||
if (smeas < 0.1*(sbest-1)+1)
|
||||
return 0;
|
||||
if (smeas > 1.1*(sbest-1)+1)
|
||||
return 120;
|
||||
return 100*((smeas-1.0)/(sbest-1.0) + 0.1);
|
||||
}
|
||||
|
||||
/* 产生一个0~divv-1之间的随机数,同时更新随机数种子 */
|
||||
void GenerateRandomNumber(unsigned long divv)
|
||||
{
|
||||
unsigned long long x = rand1_h;
|
||||
x *= 0x6AC690C5;
|
||||
x += rand1_l;
|
||||
|
||||
rand1_h = (unsigned long)x;
|
||||
rand1_l = (unsigned long)(x>>32);
|
||||
if (divv==0) return;
|
||||
|
||||
rand_div = rand1_h % divv;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
double cpe = cstandard[0].cref;
|
||||
double cfix = cstandard[1].cref;
|
||||
verbose = 0;
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
// CPU_Factor();
|
||||
// GetCpuClock();
|
||||
printf("\t2015多项式优化实验,欢迎你!\n");
|
||||
printf("============================\n");
|
||||
|
||||
if (argc == 1)
|
||||
{
|
||||
printf("使用方法:%s 学号后6位 [学号后6位] [学号后6位] ...\n",argv[0]);
|
||||
printf("你需要依据提示改写poly.c程序,实现一个常系数多项式的计算,尽可能快哦....\n");
|
||||
printf("另外,你需要改写poly.c程序,实现任意阶的多项式计算和10阶的多项式计算,要快!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*依据学号,初始化一个随机数发生器*/
|
||||
rand1_h = (unsigned long)atoi(argv[1]);
|
||||
rand1_l=0x29A;
|
||||
GenerateRandomNumber(0);
|
||||
for (i=2;i<argc;i++)
|
||||
{
|
||||
rand1_l = (unsigned long)atoi(argv[i]);
|
||||
GenerateRandomNumber(0);
|
||||
}
|
||||
|
||||
GenerateRandomNumber(50);
|
||||
//srand(rand_div);
|
||||
|
||||
//make_CPU_busy();
|
||||
//CPU_Mhz=mhz(1);
|
||||
init();
|
||||
init_const_poly();
|
||||
printf("============================\n");
|
||||
//make_CPU_busy();
|
||||
//run_poly_perf_test();
|
||||
test_const_poly();
|
||||
for (i = 0; peval_fun_tab[i].f != NULL; i++) {
|
||||
//make_CPU_busy();
|
||||
run_poly(peval_fun_tab[i].f, peval_fun_tab[i].descr, &cpe, &cfix);
|
||||
if (i == 0)
|
||||
printf(" 最高的CPE得分 =========================== %.0f\n",
|
||||
compute_score(cpe, cstandard[0].cref, cstandard[0].cbest));
|
||||
if (i == 1)
|
||||
printf(" 最高的C(10)得分 ========================= %.0f\n",
|
||||
compute_score(cfix, cstandard[1].cref, cstandard[1].cbest));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int poly_eval_perf_test(int *a, int degree, int x)
|
||||
{
|
||||
int result = 0;
|
||||
int i;
|
||||
int xpwr = 1; /* Successive powers of x */
|
||||
for (i = 0; i <= degree; i++) {
|
||||
result += a[i] * xpwr;
|
||||
xpwr *= x;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double run_poly_perf_test(void)
|
||||
{
|
||||
int i;
|
||||
double fix_time=0;
|
||||
pfun = poly_eval_perf_test;
|
||||
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||
fix_time += measure_function(run_fun, FIXDEGREE);
|
||||
fix_time = fix_time/MAX_ITER_COUNT;
|
||||
printf("fix_time=%f\n",fix_time);
|
||||
return fix_time;
|
||||
}
|
||||
BIN
perflab/╝╞╦у╗·╧╡═│╘н└э-╩╡╤щ6.pptx
Normal file
BIN
perflab/╝╞╦у╗·╧╡═│╘н└э-╩╡╤щ6.pptx
Normal file
Binary file not shown.
BIN
perflab/╩╡╤щ6.docx
Normal file
BIN
perflab/╩╡╤щ6.docx
Normal file
Binary file not shown.
Reference in New Issue
Block a user