perflab added
This commit is contained in:
229
perflab/matrix/clock.c
Normal file
229
perflab/matrix/clock.c
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
/* clock.c
|
||||||
|
* Retrofitted to use thread-specific timers
|
||||||
|
* and to get clock information from /proc/cpuinfo
|
||||||
|
* (C) R. E. Bryant, 2010
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* When this constant is not defined, uses time stamp counter */
|
||||||
|
#define USE_POSIX 0
|
||||||
|
|
||||||
|
/* Choice to use cpu_gettime call or Intel time stamp counter directly */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <intrin.h>
|
||||||
|
//#include <intrinsics.h>
|
||||||
|
#include <windows.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
/* Use x86 cycle counter */
|
||||||
|
|
||||||
|
/* Initialize the cycle counter */
|
||||||
|
static unsigned cyc_hi = 0;
|
||||||
|
static unsigned cyc_lo = 0;
|
||||||
|
|
||||||
|
/* Set *hi and *lo to the high and low order bits of the cycle counter.
|
||||||
|
Implementation requires assembly code to use the rdtsc instruction. */
|
||||||
|
void access_counter(unsigned *hi, unsigned *lo)
|
||||||
|
{
|
||||||
|
|
||||||
|
long long counter;
|
||||||
|
|
||||||
|
counter = __rdtsc();
|
||||||
|
(*hi) = (unsigned int)(counter >> 32);
|
||||||
|
(*lo) = (unsigned int)counter;
|
||||||
|
/*
|
||||||
|
|
||||||
|
LARGE_INTEGER lPerformanceCount;
|
||||||
|
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount);
|
||||||
|
(*hi) = (unsigned int)lPerformanceCount.HighPart;
|
||||||
|
(*lo) = (unsigned int)lPerformanceCount.LowPart;
|
||||||
|
// printf("%08X %08X\n",(*hi),(*lo));
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Record the current value of the cycle counter. */
|
||||||
|
void start_counter()
|
||||||
|
{
|
||||||
|
access_counter(&cyc_hi, &cyc_lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the number of cycles since the last call to start_counter. */
|
||||||
|
double get_counter()
|
||||||
|
{
|
||||||
|
unsigned ncyc_hi, ncyc_lo;
|
||||||
|
unsigned hi, lo, borrow;
|
||||||
|
double result;
|
||||||
|
|
||||||
|
/* Get cycle counter */
|
||||||
|
access_counter(&ncyc_hi, &ncyc_lo);
|
||||||
|
|
||||||
|
/* Do double precision subtraction */
|
||||||
|
lo = ncyc_lo - cyc_lo;
|
||||||
|
borrow = cyc_lo > ncyc_lo;
|
||||||
|
hi = ncyc_hi - cyc_hi - borrow;
|
||||||
|
result = (double) hi * (1 << 30) * 4 + lo;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
void make_CPU_busy(void)
|
||||||
|
{
|
||||||
|
volatile double old_tick,new_tick;
|
||||||
|
start_counter();
|
||||||
|
old_tick = get_counter();
|
||||||
|
new_tick = get_counter();
|
||||||
|
while (new_tick - old_tick < 1000000000)
|
||||||
|
new_tick = get_counter();
|
||||||
|
}
|
||||||
|
|
||||||
|
//CPU<50><55>Ƶ<EFBFBD><C6B5>
|
||||||
|
double mhz(int verbose)
|
||||||
|
{
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
LARGE_INTEGER lPerformanceCount_Start;
|
||||||
|
LARGE_INTEGER lPerformanceCount_End;
|
||||||
|
double mhz;
|
||||||
|
double fTime;
|
||||||
|
__int64 _i64StartCpuCounter;
|
||||||
|
__int64 _i64EndCpuCounter;
|
||||||
|
//On a multiprocessor machine, it should not matter which processor is called.
|
||||||
|
//However, you can get different results on different processors due to bugs in
|
||||||
|
//the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function.
|
||||||
|
HANDLE hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD>ϸ߾<CFB8><DFBE>ȶ<EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ľ<EFBFBD><C4BE><EFBFBD>Ƶ<EFBFBD><C6B5>
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>Ӧ<EFBFBD>þ<EFBFBD><C3BE><EFBFBD>һƬ8253<35><33><EFBFBD><EFBFBD>8254
|
||||||
|
//<2F><>intel ich7<68>м<EFBFBD><D0BC><EFBFBD><EFBFBD><EFBFBD>8254
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
// if (verbose>0)
|
||||||
|
// printf("<22>߾<EFBFBD><DFBE>ȶ<EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ľ<EFBFBD><C4BE><EFBFBD>Ƶ<EFBFBD>ʣ<EFBFBD>%1.0fHz.\n",(double)lFrequency.QuadPart);
|
||||||
|
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><DAA3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>+1
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount_Start);
|
||||||
|
|
||||||
|
//RDTSCָ<43><D6B8>:<3A><>ȡCPU<50><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
_i64StartCpuCounter=__rdtsc();
|
||||||
|
|
||||||
|
//<2F><>ʱ<EFBFBD><CAB1>һ<EFBFBD><D2BB>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Сһ<D0A1><D2BB>
|
||||||
|
//int nTemp=100000;
|
||||||
|
//while (--nTemp);
|
||||||
|
Sleep(200);
|
||||||
|
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount_End);
|
||||||
|
|
||||||
|
_i64EndCpuCounter=__rdtsc();
|
||||||
|
|
||||||
|
//f=1/T => f=<3D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>/(<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*T)
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD>ġ<EFBFBD><C4A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*T<><54><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD>
|
||||||
|
fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart)
|
||||||
|
/(double)lFrequency.QuadPart;
|
||||||
|
|
||||||
|
mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0);
|
||||||
|
if (verbose>0)
|
||||||
|
printf("CPUƵ<EFBFBD><EFBFBD>Ϊ:%1.6fMHz.\n",mhz);
|
||||||
|
return mhz;
|
||||||
|
}
|
||||||
|
|
||||||
|
double CPU_Factor1(void)
|
||||||
|
{
|
||||||
|
double result;
|
||||||
|
int i,j,k,ii,jj,kk;
|
||||||
|
LARGE_INTEGER lStart,lEnd;
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
HANDLE hThread;
|
||||||
|
double fTime;
|
||||||
|
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
|
||||||
|
ii = 43273;
|
||||||
|
kk = 1238;
|
||||||
|
result = 1;
|
||||||
|
jj = 1244;
|
||||||
|
|
||||||
|
hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
QueryPerformanceCounter(&lStart);
|
||||||
|
//_asm("cpuid");
|
||||||
|
start_counter();
|
||||||
|
for (i=0;i<100;i++)
|
||||||
|
for (j=0;j<1000;j++)
|
||||||
|
for (k=0;k<1000;k++)
|
||||||
|
kk += kk*ii+jj;
|
||||||
|
|
||||||
|
result = get_counter();
|
||||||
|
QueryPerformanceCounter(&lEnd);
|
||||||
|
fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart);
|
||||||
|
printf("CPU<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>Ϊ%f",result);
|
||||||
|
printf("\t %f\n",fTime);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double CPU_Factor(void)
|
||||||
|
{
|
||||||
|
double frequency;
|
||||||
|
double multiplier = 1000 * 1000 * 1000;//nano
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
LARGE_INTEGER start,stop;
|
||||||
|
HANDLE hThread;
|
||||||
|
int i;
|
||||||
|
const int gigahertz= 1000*1000*1000;
|
||||||
|
const int known_instructions_per_loop = 27317;
|
||||||
|
|
||||||
|
int iterations = 100000000;
|
||||||
|
int g = 0;
|
||||||
|
double normal_ticks_per_second;
|
||||||
|
double ticks;
|
||||||
|
double time;
|
||||||
|
double loops_per_sec;
|
||||||
|
double instructions_per_loop;
|
||||||
|
double ratio;
|
||||||
|
double actual_freq;
|
||||||
|
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
frequency = (double)lFrequency.QuadPart;
|
||||||
|
|
||||||
|
hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
QueryPerformanceCounter(&start);
|
||||||
|
for( i = 0; i < iterations; i++)
|
||||||
|
{
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
}
|
||||||
|
QueryPerformanceCounter(&stop);
|
||||||
|
|
||||||
|
//normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199
|
||||||
|
normal_ticks_per_second = frequency * 1000;
|
||||||
|
ticks = (double)((double)stop.QuadPart - (double)start.QuadPart);
|
||||||
|
time = (ticks * multiplier) /frequency;
|
||||||
|
loops_per_sec = iterations / (time/multiplier);
|
||||||
|
instructions_per_loop = normal_ticks_per_second / loops_per_sec;
|
||||||
|
|
||||||
|
ratio = (instructions_per_loop / known_instructions_per_loop);
|
||||||
|
actual_freq = normal_ticks_per_second / ratio;
|
||||||
|
/*
|
||||||
|
actual_freq = normal_ticks_per_second / ratio;
|
||||||
|
actual_freq = known_instructions_per_loop*iterations*multiplier/time;
|
||||||
|
|
||||||
|
2293 = x/time;
|
||||||
|
|
||||||
|
2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000
|
||||||
|
loops_per_sec = iterations*frequency / ticks
|
||||||
|
|
||||||
|
instructions_per_loop = / loops_per_sec;
|
||||||
|
*/
|
||||||
|
printf("Perf counter freq: %f\n", normal_ticks_per_second);
|
||||||
|
printf("Loops per sec: %f\n", loops_per_sec);
|
||||||
|
printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop);
|
||||||
|
printf("Presumed freq: %f\n", actual_freq);
|
||||||
|
printf("ratio: %f\n", ratio);
|
||||||
|
printf("time=%f\n",time);
|
||||||
|
return ratio;
|
||||||
|
}
|
||||||
12
perflab/matrix/clock.h
Normal file
12
perflab/matrix/clock.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
/* Routines for using cycle counter */
|
||||||
|
|
||||||
|
/* Start the counter */
|
||||||
|
void start_counter(void);
|
||||||
|
|
||||||
|
/* Get # cycles since counter started. Returns 1e20 if detect timing anomaly */
|
||||||
|
double get_counter(void);
|
||||||
|
void make_CPU_busy(void);
|
||||||
|
|
||||||
|
double mhz(int verbose);
|
||||||
|
double CPU_Factor(void);
|
||||||
|
//double GetCpuClock(void);
|
||||||
117
perflab/matrix/cpe.c
Normal file
117
perflab/matrix/cpe.c
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/* Compute CPE for function */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "fcyc.h"
|
||||||
|
#include "cpe.h"
|
||||||
|
#include "lsquare.h"
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
/* Find number of cycles taken by function.
|
||||||
|
Do this by running number of trials until best two within TOL of
|
||||||
|
each other
|
||||||
|
*/
|
||||||
|
double measure_function(elem_fun_t f, int cnt)
|
||||||
|
{
|
||||||
|
/* Need to fudge fact that fcyc wants a function taking an
|
||||||
|
long int *, while our function takes an long int */
|
||||||
|
test_funct tf = (test_funct) f;
|
||||||
|
return fcyc(tf, (int *) (int) cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAXCNT 100
|
||||||
|
|
||||||
|
#define LIM RAND_MAX
|
||||||
|
|
||||||
|
/* LCM of unrolling degree */
|
||||||
|
#ifdef USE_UNI
|
||||||
|
#define UNROLL 32
|
||||||
|
#else /* USE_UNI */
|
||||||
|
#define UNROLL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static long int get_cnt(long int index, long int samples,
|
||||||
|
long int maxcnt, sample_t smethod, double bias)
|
||||||
|
{
|
||||||
|
long int mincnt = (long int) (bias*maxcnt);
|
||||||
|
double weight;
|
||||||
|
long int val;
|
||||||
|
switch (smethod) {
|
||||||
|
case UNI_SAMPLE:
|
||||||
|
weight = (double) index/(samples - 1);
|
||||||
|
break;
|
||||||
|
case RAN_SAMPLE:
|
||||||
|
weight = (double) (rand() % LIM) / (double) (LIM-1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Undefined sampling method %d\n", smethod);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
val = mincnt + weight*(maxcnt-mincnt);
|
||||||
|
return UNROLL * (val/UNROLL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SEED 31415
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||||
|
specified number of sample points.
|
||||||
|
If data_file, then print data so that can plot points with Excel
|
||||||
|
smethod determines method for generating samples
|
||||||
|
*/
|
||||||
|
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||||
|
sample_t smethod, double bias, long int verbose)
|
||||||
|
{
|
||||||
|
long int i;
|
||||||
|
long int cnt;
|
||||||
|
double cpe;
|
||||||
|
double overhead = 0;
|
||||||
|
double *cnt_val = calloc(samples, sizeof(double));
|
||||||
|
double *cycle_val = calloc(samples, sizeof(double));
|
||||||
|
/* Do the samples */
|
||||||
|
|
||||||
|
srand(SEED);
|
||||||
|
for (i = 0; i < samples; i++) {
|
||||||
|
cnt = get_cnt(i, samples, maxcnt, smethod, bias);
|
||||||
|
cnt_val[i] = cnt;
|
||||||
|
cycle_val[i] = measure_function(f, cnt);
|
||||||
|
if (cycle_val[i] < 1.0) {
|
||||||
|
fprintf(stderr, "Got %.2f cycles for count %ld\n", cycle_val[i], cnt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Fit data */
|
||||||
|
cpe = ls_slope(cnt_val, cycle_val, samples);
|
||||||
|
if (data_file)
|
||||||
|
overhead = ls_intercept(cnt_val, cycle_val, samples);
|
||||||
|
if (data_file && verbose > 1) {
|
||||||
|
/* Print x values */
|
||||||
|
fprintf(data_file, "Cnt\t0");
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.0f", cnt_val[i]);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
/* Print y values */
|
||||||
|
fprintf(data_file, "Cycs.\t");
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.2f", cycle_val[i]);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
/* Print ax*b values */
|
||||||
|
fprintf(data_file, "Interp.\t%.2f", overhead);
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.2f", cpe*cnt_val[i]+overhead);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
}
|
||||||
|
if (data_file && verbose) {
|
||||||
|
/* Print results */
|
||||||
|
fprintf(data_file, "cpe\t%.2f\tovhd\t%.2f\tavgerr\t\\%.3f\tmaxerr\t\\%.3f\n",
|
||||||
|
cpe, overhead,
|
||||||
|
ls_error(cnt_val, cycle_val, samples, LS_AVG),
|
||||||
|
ls_error(cnt_val, cycle_val, samples, LS_MAX));
|
||||||
|
}
|
||||||
|
free(cnt_val);
|
||||||
|
free(cycle_val);
|
||||||
|
return cpe;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use default parameters */
|
||||||
|
double find_cpe(elem_fun_t f, int maxcnt)
|
||||||
|
{
|
||||||
|
return find_cpe_full(f, maxcnt, 100, stdout, RAN_SAMPLE, 0.3, 0);
|
||||||
|
}
|
||||||
31
perflab/matrix/cpe.h
Normal file
31
perflab/matrix/cpe.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/* Compute CPE for function */
|
||||||
|
|
||||||
|
/* Compute for function that is linear in some parameter cnt */
|
||||||
|
typedef void (*elem_fun_t)(int);
|
||||||
|
|
||||||
|
/* Different ways of finding samples
|
||||||
|
UNI_SAMPLE: samples uniformly spaced between bias*maxcnt and maxcnt
|
||||||
|
RAN_SAMPLE: samples randomly selected between bias*maxcnt and maxcnt
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef enum {UNI_SAMPLE, RAN_SAMPLE}
|
||||||
|
sample_t;
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt.
|
||||||
|
Uses default parameters
|
||||||
|
*/
|
||||||
|
double find_cpe(elem_fun_t f, int maxcnt);
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||||
|
specified number of sample points.
|
||||||
|
If data_file, then print data so that can plot points with Excel
|
||||||
|
smethod determines method for generating samples
|
||||||
|
*/
|
||||||
|
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||||
|
sample_t smethod, double bias, long int verbose);
|
||||||
|
|
||||||
|
/* Find number of cycles taken by function.
|
||||||
|
Do this by running number of trials until best two within TOL (2%) of
|
||||||
|
each other
|
||||||
|
*/
|
||||||
|
double measure_function(elem_fun_t f, int cnt);
|
||||||
223
perflab/matrix/fcyc.c
Normal file
223
perflab/matrix/fcyc.c
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
/* Compute time used by function f */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "clock.h"
|
||||||
|
#include "fcyc.h"
|
||||||
|
|
||||||
|
#define K 3
|
||||||
|
#define MAXSAMPLES 20
|
||||||
|
#define EPSILON 0.01
|
||||||
|
#define COMPENSATE 0
|
||||||
|
#define CLEAR_CACHE 0
|
||||||
|
#define CACHE_BYTES (1<<19)
|
||||||
|
#define CACHE_BLOCK 32
|
||||||
|
#define MAX_ITER_TIMES 10
|
||||||
|
|
||||||
|
static long int kbest = K;
|
||||||
|
static long int compensate = COMPENSATE;
|
||||||
|
static long int clear_cache = CLEAR_CACHE;
|
||||||
|
static long int maxsamples = MAXSAMPLES;
|
||||||
|
static double epsilon = EPSILON;
|
||||||
|
static long int cache_bytes = CACHE_BYTES;
|
||||||
|
static long int cache_block = CACHE_BLOCK;
|
||||||
|
|
||||||
|
static long int *cache_buf = NULL;
|
||||||
|
|
||||||
|
static double *values = NULL;
|
||||||
|
static long int samplecount = 0;
|
||||||
|
|
||||||
|
#define KEEP_VALS 0
|
||||||
|
#define KEEP_SAMPLES 0
|
||||||
|
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
static double *samples = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Start new sampling process */
|
||||||
|
static void init_sampler(void)
|
||||||
|
{
|
||||||
|
if (values)
|
||||||
|
free(values);
|
||||||
|
values = calloc(kbest, sizeof(double));
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
if (samples)
|
||||||
|
free(samples);
|
||||||
|
/* Allocate extra for wraparound analysis */
|
||||||
|
samples = calloc(maxsamples+kbest, sizeof(double));
|
||||||
|
#endif
|
||||||
|
samplecount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add new sample. */
|
||||||
|
static void add_sample(double val)
|
||||||
|
{
|
||||||
|
long int pos = 0;
|
||||||
|
if (samplecount < kbest) {
|
||||||
|
pos = samplecount;
|
||||||
|
values[pos] = val;
|
||||||
|
} else if (val < values[kbest-1]) {
|
||||||
|
pos = kbest-1;
|
||||||
|
values[pos] = val;
|
||||||
|
}
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
samples[samplecount] = val;
|
||||||
|
#endif
|
||||||
|
samplecount++;
|
||||||
|
/* Insertion sort */
|
||||||
|
while (pos > 0 && values[pos-1] > values[pos]) {
|
||||||
|
double temp = values[pos-1];
|
||||||
|
values[pos-1] = values[pos];
|
||||||
|
values[pos] = temp;
|
||||||
|
pos--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Have kbest minimum measurements converged within epsilon? */
|
||||||
|
static long int has_converged(void)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
(samplecount >= kbest) &&
|
||||||
|
((1 + epsilon)*values[0] >= values[kbest-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Code to clear cache */
|
||||||
|
|
||||||
|
|
||||||
|
static volatile long int sink = 0;
|
||||||
|
|
||||||
|
static void clear(void)
|
||||||
|
{
|
||||||
|
long int x = sink;
|
||||||
|
long int *cptr, *cend;
|
||||||
|
long int incr = cache_block/sizeof(long int);
|
||||||
|
if (!cache_buf) {
|
||||||
|
cache_buf = malloc(cache_bytes);
|
||||||
|
if (!cache_buf) {
|
||||||
|
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cptr = (long int *) cache_buf;
|
||||||
|
cend = cptr + cache_bytes/sizeof(long int);
|
||||||
|
while (cptr < cend) {
|
||||||
|
x += *cptr;
|
||||||
|
cptr += incr;
|
||||||
|
}
|
||||||
|
sink = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
double fcyc(test_funct f, int *params)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double result;
|
||||||
|
init_sampler();
|
||||||
|
if (compensate) {
|
||||||
|
do {
|
||||||
|
double cyc;
|
||||||
|
if (clear_cache)
|
||||||
|
clear();
|
||||||
|
start_counter();
|
||||||
|
f(params);
|
||||||
|
cyc = get_counter();
|
||||||
|
if (cyc > 0.0)
|
||||||
|
add_sample(cyc);
|
||||||
|
} while (!has_converged() && samplecount < maxsamples);
|
||||||
|
} else {
|
||||||
|
do {
|
||||||
|
double cyc;
|
||||||
|
if (clear_cache)
|
||||||
|
clear();
|
||||||
|
start_counter();
|
||||||
|
for (i=0;i<MAX_ITER_TIMES;i++)
|
||||||
|
f(params);
|
||||||
|
cyc = get_counter()/MAX_ITER_TIMES;
|
||||||
|
if (cyc > 0.0)
|
||||||
|
add_sample(cyc);
|
||||||
|
|
||||||
|
} while (!has_converged() && samplecount < maxsamples);
|
||||||
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
|
{
|
||||||
|
long int i;
|
||||||
|
printf(" %ld smallest values: [", kbest);
|
||||||
|
for (i = 0; i < kbest; i++)
|
||||||
|
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
result = values[0];
|
||||||
|
#if !KEEP_VALS
|
||||||
|
free(values);
|
||||||
|
values = NULL;
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************************************/
|
||||||
|
/* Set the various parameters used by measurement routines */
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will run code to clear cache before each measurement
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_clear_cache(long int clear)
|
||||||
|
{
|
||||||
|
clear_cache = clear;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set size of cache to use when clearing cache
|
||||||
|
Default = 1<<19 (512KB)
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_size(long int bytes)
|
||||||
|
{
|
||||||
|
if (bytes != cache_bytes) {
|
||||||
|
cache_bytes = bytes;
|
||||||
|
if (cache_buf) {
|
||||||
|
free(cache_buf);
|
||||||
|
cache_buf = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set size of cache block
|
||||||
|
Default = 32
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_block(long int bytes) {
|
||||||
|
cache_block = bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will attempt to compensate for timer interrupt overhead
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_compensate(long int compensate_arg)
|
||||||
|
{
|
||||||
|
compensate = compensate_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Value of K in K-best
|
||||||
|
Default = 3
|
||||||
|
*/
|
||||||
|
void set_fcyc_k(long int k)
|
||||||
|
{
|
||||||
|
kbest = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||||
|
When exceeded, just return best sample found.
|
||||||
|
Default = 20
|
||||||
|
*/
|
||||||
|
void set_fcyc_maxsamples(long int maxsamples_arg)
|
||||||
|
{
|
||||||
|
maxsamples = maxsamples_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tolerance required for K-best
|
||||||
|
Default = 0.01
|
||||||
|
*/
|
||||||
|
void set_fcyc_epsilon(double epsilon_arg)
|
||||||
|
{
|
||||||
|
epsilon = epsilon_arg;
|
||||||
|
}
|
||||||
52
perflab/matrix/fcyc.h
Normal file
52
perflab/matrix/fcyc.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
|
||||||
|
/* Fcyc measures the speed of any "test function." Such a function
|
||||||
|
is passed a list of integer parameters, which it may interpret
|
||||||
|
in any way it chooses.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef void (*test_funct)(long int *);
|
||||||
|
|
||||||
|
/* Compute number of cycles used by function f on given set of parameters */
|
||||||
|
double fcyc(test_funct f, int* params);
|
||||||
|
|
||||||
|
/***********************************************************/
|
||||||
|
/* Set the various parameters used by measurement routines */
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will run code to clear cache before each measurement
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_clear_cache(long int clear);
|
||||||
|
|
||||||
|
/* Set size of cache to use when clearing cache
|
||||||
|
Default = 1<<19 (512KB)
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_size(long int bytes);
|
||||||
|
|
||||||
|
/* Set size of cache block
|
||||||
|
Default = 32
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_block(long int bytes);
|
||||||
|
|
||||||
|
/* When set, will attempt to compensate for timer interrupt overhead
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_compensate(long int compensate);
|
||||||
|
|
||||||
|
/* Value of K in K-best
|
||||||
|
Default = 3
|
||||||
|
*/
|
||||||
|
void set_fcyc_k(long int k);
|
||||||
|
|
||||||
|
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||||
|
When exceeded, just return best sample found.
|
||||||
|
Default = 20
|
||||||
|
*/
|
||||||
|
void set_fcyc_maxsamples(long int maxsamples);
|
||||||
|
|
||||||
|
/* Tolerance required for K-best
|
||||||
|
Default = 0.01
|
||||||
|
*/
|
||||||
|
void set_fcyc_epsilon(double epsilon);
|
||||||
|
|
||||||
|
|
||||||
94
perflab/matrix/lsquare.c
Normal file
94
perflab/matrix/lsquare.c
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
/* Compute least squares fit of set of data points */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "lsquare.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
double sum_x;
|
||||||
|
double sum_y;
|
||||||
|
double sum_xx;
|
||||||
|
double sum_xy;
|
||||||
|
} ls_stat_t;
|
||||||
|
|
||||||
|
/* Accumulate various sums of the data */
|
||||||
|
static void ls_stats(double *xval, double *yval, int cnt, ls_stat_t *statp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
statp->sum_x = 0.0;
|
||||||
|
statp->sum_y = 0.0;
|
||||||
|
statp->sum_xx = 0.0;
|
||||||
|
statp->sum_xy = 0.0;
|
||||||
|
for (i = 0; i < cnt; i++) {
|
||||||
|
double x = xval[i];
|
||||||
|
double y = yval[i];
|
||||||
|
statp->sum_x += x;
|
||||||
|
statp->sum_y += y;
|
||||||
|
statp->sum_xx += x * x;
|
||||||
|
statp->sum_xy += x * y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_slope(double *xval, double *yval, int cnt)
|
||||||
|
{
|
||||||
|
double slope;
|
||||||
|
ls_stat_t stat;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
return slope;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_intercept(double *xval, double *yval, int cnt)
|
||||||
|
{
|
||||||
|
double intercept;
|
||||||
|
ls_stat_t stat;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
return intercept;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double rel_err(double x, double y, double slope, double intercept)
|
||||||
|
{
|
||||||
|
double pred_y = slope*x + intercept;
|
||||||
|
double offset = y - pred_y;
|
||||||
|
if (offset < 0)
|
||||||
|
offset = -offset;
|
||||||
|
if (pred_y == 0)
|
||||||
|
return offset;
|
||||||
|
return offset/pred_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype)
|
||||||
|
{
|
||||||
|
double slope;
|
||||||
|
double intercept;
|
||||||
|
ls_stat_t stat;
|
||||||
|
int i;
|
||||||
|
double num, denom;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
num = denom = 0;
|
||||||
|
for (i = 0; i < cnt; i++) {
|
||||||
|
double e = rel_err(xval[i], yval[i], slope, intercept);
|
||||||
|
switch (etype) {
|
||||||
|
case LS_AVG:
|
||||||
|
num += e;
|
||||||
|
denom++;
|
||||||
|
break;
|
||||||
|
case LS_MAX:
|
||||||
|
if (num < e)
|
||||||
|
num = e;
|
||||||
|
denom = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Invalid error type: %d\n", etype);
|
||||||
|
exit(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return num/denom;
|
||||||
|
}
|
||||||
11
perflab/matrix/lsquare.h
Normal file
11
perflab/matrix/lsquare.h
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
/* Compute least squares fit of set of data points */
|
||||||
|
|
||||||
|
/* Fit is of form y = mx + b. m is slope, b is intercept */
|
||||||
|
double ls_slope(double *xval, double *yval, int cnt);
|
||||||
|
double ls_intercept(double *xval, double *yval, int cnt);
|
||||||
|
|
||||||
|
typedef enum {LS_AVG, LS_MAX} ls_err_t;
|
||||||
|
|
||||||
|
/* Determine error (either absolute or average) of least squares fit */
|
||||||
|
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype);
|
||||||
|
|
||||||
28
perflab/matrix/matrix/matrix.sln
Normal file
28
perflab/matrix/matrix/matrix.sln
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio 14
|
||||||
|
VisualStudioVersion = 14.0.25420.1
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix", "matrix.vcxproj", "{15DC376D-CB40-4A27-BCF8-BCE93039E478}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|x64 = Debug|x64
|
||||||
|
Debug|x86 = Debug|x86
|
||||||
|
Release|x64 = Release|x64
|
||||||
|
Release|x86 = Release|x86
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x64.ActiveCfg = Debug|x64
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x64.Build.0 = Debug|x64
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x86.ActiveCfg = Debug|Win32
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Debug|x86.Build.0 = Debug|Win32
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x64.ActiveCfg = Release|x64
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x64.Build.0 = Release|x64
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x86.ActiveCfg = Release|Win32
|
||||||
|
{15DC376D-CB40-4A27-BCF8-BCE93039E478}.Release|x86.Build.0 = Release|Win32
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
123
perflab/matrix/matrix/matrix.vcxproj
Normal file
123
perflab/matrix/matrix/matrix.vcxproj
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<ProjectGuid>{15DC376D-CB40-4A27-BCF8-BCE93039E478}</ProjectGuid>
|
||||||
|
<RootNamespace>matrix</RootNamespace>
|
||||||
|
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="Shared">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup />
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\clock.c" />
|
||||||
|
<ClCompile Include="..\cpe.c" />
|
||||||
|
<ClCompile Include="..\fcyc.c" />
|
||||||
|
<ClCompile Include="..\lsquare.c" />
|
||||||
|
<ClCompile Include="..\rowcol.c" />
|
||||||
|
<ClCompile Include="..\rowcol_test.c" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
||||||
37
perflab/matrix/matrix/matrix.vcxproj.filters
Normal file
37
perflab/matrix/matrix/matrix.vcxproj.filters
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Filter Include="源文件">
|
||||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||||
|
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="头文件">
|
||||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
|
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="资源文件">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||||
|
</Filter>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\clock.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\cpe.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\fcyc.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\lsquare.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\rowcol.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\rowcol_test.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
77
perflab/matrix/rowcol.c
Normal file
77
perflab/matrix/rowcol.c
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/**************************************************************************
|
||||||
|
<09><>/<2F><><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>༭<EFBFBD><E0BCAD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>
|
||||||
|
1. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ѧ<EFBFBD>š<EFBFBD><C5A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ע<EFBFBD>͵ķ<CDB5>ʽд<CABD><D0B4><EFBFBD><EFBFBD><EFBFBD>棻
|
||||||
|
2. ʵ<>ֲ<EFBFBD>ͬ<EFBFBD>汾<EFBFBD><E6B1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
3. <20>༭rc_fun_rec rc_fun_tab<61><62><EFBFBD>飬<EFBFBD><E9A3AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>õĴ<C3B5><C4B4><EFBFBD>
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD>õ<EFBFBD><C3B5>к<EFBFBD><D0BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>͡<EFBFBD><CDA1><EFBFBD><EFBFBD>õ<EFBFBD><C3B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><CDA3><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD>
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
ѧ<>ţ<EFBFBD>201209054233
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9><EFBFBD>Ӱ<EFBFBD><D3B0><EFBFBD>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "rowcol.h"
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
/* <20>ο<EFBFBD><CEBF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>ÿһ<C3BF>еĺ͡<C4BA><CDA1><EFBFBD>ע<EFBFBD><D7A2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD>ò<EFBFBD><C3B2><EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
һ<><D2BB><EFBFBD>ģ<EFBFBD>ֻ<EFBFBD>ǵ<EFBFBD>2<EFBFBD><32><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>õ<EFBFBD><C3B5><EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
*/
|
||||||
|
|
||||||
|
void c_sum(matrix_t M, vector_t rowsum, vector_t colsum)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
for (j = 0; j < N; j++) {
|
||||||
|
colsum[j] = 0;
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
colsum[j] += M[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* <20>ο<EFBFBD><CEBF><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>ÿһ<C3BF>С<EFBFBD>ÿһ<C3BF>еĺ͡<C4BA> */
|
||||||
|
|
||||||
|
void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
rowsum[i] = colsum[i] = 0;
|
||||||
|
for (j = 0; j < N; j++) {
|
||||||
|
rowsum[i] += M[i][j];
|
||||||
|
colsum[i] += M[j][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ԫ<EFBFBD>أ<EFBFBD>ÿһ<C3BF><D2BB>Ԫ<EFBFBD>أ<EFBFBD><D8A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, COL/ROWCOL, "<22><><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>"<22><>
|
||||||
|
COL<4F><4C>ʾ<EFBFBD>ú<EFBFBD><C3BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿһ<C3BF>еĺ<D0B5>
|
||||||
|
ROWCOL<4F><4C>ʾ<EFBFBD>ú<EFBFBD><C3BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿһ<C3BF>С<EFBFBD>ÿһ<C3BF>еĺ<D0B5>
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD>õ<EFBFBD><C3B5><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD>档
|
||||||
|
<09><><EFBFBD>磺
|
||||||
|
{my_c_sum1, "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>"},
|
||||||
|
{my_rc_sum2, "<22><>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>"},
|
||||||
|
*/
|
||||||
|
|
||||||
|
rc_fun_rec rc_fun_tab[] =
|
||||||
|
{
|
||||||
|
|
||||||
|
/* <20><>һ<EFBFBD>Ӧ<EEA3AC><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>͵ĺ<CDB5><C4BA><EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
{c_sum, COL, "Best column sum"},
|
||||||
|
/* <20>ڶ<EFBFBD><DAB6>Ӧ<EEA3AC><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>͵ĺ<CDB5><C4BA><EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
{rc_sum, ROWCOL, "Best row and column sum"},
|
||||||
|
|
||||||
|
{c_sum, COL, "Column sum, reference implementation"},
|
||||||
|
|
||||||
|
{rc_sum, ROWCOL, "Row and column sum, reference implementation"},
|
||||||
|
|
||||||
|
/* <20><><EFBFBD><EFBFBD><EFBFBD>Ĵ<EFBFBD><C4B4>벻<EFBFBD><EBB2BB><EFBFBD>Ļ<DEB8><C4BB><EFBFBD>ɾ<EFBFBD><C9BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>б<EFBFBD><D0B1><EFBFBD><EFBFBD><EFBFBD> */
|
||||||
|
{NULL,ROWCOL,NULL}
|
||||||
|
};
|
||||||
35
perflab/matrix/rowcol.h
Normal file
35
perflab/matrix/rowcol.h
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/* Matrix row and/or column summation code */
|
||||||
|
|
||||||
|
/* Size of matrices */
|
||||||
|
/* $begin rcdecl */
|
||||||
|
#define N 512
|
||||||
|
/* $end rcdecl */
|
||||||
|
|
||||||
|
/* Data types */
|
||||||
|
|
||||||
|
/* Pointer type for vectors */
|
||||||
|
typedef int *vecp_t;
|
||||||
|
/* $begin rcdecl */
|
||||||
|
/* N x N matrix */
|
||||||
|
typedef int matrix_t[N][N];
|
||||||
|
|
||||||
|
/* Vector of length N */
|
||||||
|
typedef int vector_t[N];
|
||||||
|
/* $end rcdecl */
|
||||||
|
|
||||||
|
/* Different sum/product function types */
|
||||||
|
typedef enum { COL, ROWCOL } rc_comp_t;
|
||||||
|
|
||||||
|
/* Summation function */
|
||||||
|
typedef void (*rc_fun)(matrix_t, vector_t, vector_t);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rc_fun f;
|
||||||
|
rc_comp_t rc_type; /* What computation does it perform? */
|
||||||
|
char *descr;
|
||||||
|
} rc_fun_rec, *rc_fun_ptr;
|
||||||
|
|
||||||
|
/* Table of functions to test. Null terminated */
|
||||||
|
extern rc_fun_rec rc_fun_tab[];
|
||||||
|
|
||||||
|
|
||||||
173
perflab/matrix/rowcol_test.c
Normal file
173
perflab/matrix/rowcol_test.c
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
//#include <random.h>
|
||||||
|
#include "rowcol.h"
|
||||||
|
#include "fcyc.h"
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
#define MAX_ITER_COUNT 100
|
||||||
|
|
||||||
|
/* Define performance standards */
|
||||||
|
static struct {
|
||||||
|
double cref; /* Cycles taken by reference solution */
|
||||||
|
double cbest; /* Cycles taken by our best implementation */
|
||||||
|
} cstandard[2] =
|
||||||
|
{{7.7, 6.40}, /* Column Sum */
|
||||||
|
{9.75, 6.60} /* Row & Column Sum */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Put in code to align matrix so that it starts on a cache block boundary.
|
||||||
|
This makes the cache performance of the code a bit more predictable
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Words per cache block. OK if this is an estimate as long as it
|
||||||
|
is a multiple of the actual value
|
||||||
|
*/
|
||||||
|
#define WPB 16
|
||||||
|
|
||||||
|
int verbose = 1;
|
||||||
|
int data[N*N+WPB];
|
||||||
|
int *mstart;
|
||||||
|
|
||||||
|
typedef vector_t *row_t;
|
||||||
|
|
||||||
|
/* Reference row and column sums */
|
||||||
|
vector_t rsref, csref, rcomp, ccomp;
|
||||||
|
|
||||||
|
static void init_tests(void);
|
||||||
|
extern void make_CPU_busy(void);
|
||||||
|
|
||||||
|
static void init_tests(void)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
size_t bytes_per_block = sizeof(int) * WPB;
|
||||||
|
/* round mstart up to nearest block boundary */
|
||||||
|
mstart = (int *)
|
||||||
|
(((size_t) data + bytes_per_block-1) / bytes_per_block * bytes_per_block);
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
rsref[i] = csref[i] = 0;
|
||||||
|
}
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
for (j = 0; j < N; j++) {
|
||||||
|
int val = rand();
|
||||||
|
mstart[i*N+j] = val;
|
||||||
|
rsref[i] += val;
|
||||||
|
csref[j] += val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Test function on all values */
|
||||||
|
int test_rc(rc_fun f, FILE *rpt, rc_comp_t rc_type) {
|
||||||
|
int i;
|
||||||
|
int ok = 1;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
rcomp[i] = ccomp[i] = 0xDEADBEEF;
|
||||||
|
f((row_t)mstart, rcomp, ccomp);
|
||||||
|
|
||||||
|
for (i = 0; ok && i < N; i++) {
|
||||||
|
if (rc_type == ROWCOL
|
||||||
|
&& rsref[i] != rcomp[i]) {
|
||||||
|
ok = 0;
|
||||||
|
if (rpt)
|
||||||
|
fprintf(rpt,
|
||||||
|
"<EFBFBD>Ե<EFBFBD>%d<>еļ<D0B5><C4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>%d<><64><EFBFBD><EFBFBD><EFBFBD>Ǽ<EFBFBD><C7BC><EFBFBD><EFBFBD>õ<EFBFBD>%d\n",
|
||||||
|
i, rsref[i], rcomp[i]);
|
||||||
|
}
|
||||||
|
if ((rc_type == ROWCOL || rc_type == COL)
|
||||||
|
&& csref[i] != ccomp[i]) {
|
||||||
|
ok = 0;
|
||||||
|
if (rpt)
|
||||||
|
fprintf(rpt,
|
||||||
|
"<EFBFBD>Ե<EFBFBD>%d<>еļ<D0B5><C4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>%d<><64><EFBFBD><EFBFBD><EFBFBD>Ǽ<EFBFBD><C7BC><EFBFBD><EFBFBD>õ<EFBFBD>%d\n",
|
||||||
|
i, csref[i], ccomp[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kludgy way to interface to cycle measuring code */
|
||||||
|
void do_test(int *intf)
|
||||||
|
{
|
||||||
|
rc_fun f = (rc_fun) intf;
|
||||||
|
f((row_t)mstart, rcomp, ccomp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void time_rc(rc_fun f, rc_comp_t rc_type, char *descr, double *cycp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int *intf = (int *) f;
|
||||||
|
double t, cme;
|
||||||
|
t = 0;
|
||||||
|
if (verbose) printf("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>%s\n", descr);
|
||||||
|
if (test_rc(f, stdout, rc_type)) {
|
||||||
|
make_CPU_busy();
|
||||||
|
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||||
|
t += fcyc(do_test, intf);
|
||||||
|
t = t/MAX_ITER_COUNT;
|
||||||
|
cme = t/(N*N);
|
||||||
|
if (verbose) printf(" <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> = %.2f, ƽ<><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>/Ԫ<><D4AA> = %.2f\n",
|
||||||
|
t, cme);
|
||||||
|
if (cycp)
|
||||||
|
*cycp = cme;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the grade achieved by function */
|
||||||
|
static double compute_score(double cmeas, double cref, double cbest)
|
||||||
|
{
|
||||||
|
double sbest = cref/cbest;
|
||||||
|
double smeas = cref/cmeas;
|
||||||
|
if (smeas < 0.1*(sbest-1)+1)
|
||||||
|
return 0;
|
||||||
|
if (smeas > 1.1*(sbest-1)+1)
|
||||||
|
return 120;
|
||||||
|
return 100*((smeas-1.0)/(sbest-1.0) + 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double cme;
|
||||||
|
double cme_c,cme_rc;
|
||||||
|
int EnableScore=0;
|
||||||
|
|
||||||
|
if (argc == 3)
|
||||||
|
{
|
||||||
|
EnableScore = 1;
|
||||||
|
verbose = 0;
|
||||||
|
}
|
||||||
|
init_tests();
|
||||||
|
set_fcyc_clear_cache(1); /* Set so that clears cache between runs */
|
||||||
|
for (i = 0; rc_fun_tab[i].f != NULL; i++) {
|
||||||
|
cme = 100.0;
|
||||||
|
time_rc(rc_fun_tab[i].f,
|
||||||
|
rc_fun_tab[i].rc_type, rc_fun_tab[i].descr, &cme);
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
cme_c = cme;
|
||||||
|
if (EnableScore==0)
|
||||||
|
{
|
||||||
|
printf(" <20><><EFBFBD><EFBFBD>\"<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"<EFBFBD>÷<EFBFBD> ======================== %.0f\n",
|
||||||
|
compute_score(cme, cstandard[0].cref, cstandard[0].cbest));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i == 1)
|
||||||
|
{
|
||||||
|
cme_rc = cme;
|
||||||
|
if (EnableScore==0)
|
||||||
|
{
|
||||||
|
printf(" <20><><EFBFBD><EFBFBD>\"<EFBFBD>к<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"<EFBFBD>÷<EFBFBD> ====================== %.0f\n",
|
||||||
|
compute_score(cme, cstandard[1].cref, cstandard[1].cbest));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EnableScore)
|
||||||
|
printf("%.2f\t %.0f\t %.2f\t %.0f\t 0\t 0\n",cme_c,compute_score(cme_c, cstandard[0].cref, cstandard[0].cbest),
|
||||||
|
cme_rc,compute_score(cme_rc, cstandard[1].cref, cstandard[1].cbest));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
229
perflab/poly/clock.c
Normal file
229
perflab/poly/clock.c
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
/* clock.c
|
||||||
|
* Retrofitted to use thread-specific timers
|
||||||
|
* and to get clock information from /proc/cpuinfo
|
||||||
|
* (C) R. E. Bryant, 2010
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* When this constant is not defined, uses time stamp counter */
|
||||||
|
#define USE_POSIX 0
|
||||||
|
|
||||||
|
/* Choice to use cpu_gettime call or Intel time stamp counter directly */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <intrin.h>
|
||||||
|
//#include <intrinsics.h>
|
||||||
|
#include <windows.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
/* Use x86 cycle counter */
|
||||||
|
|
||||||
|
/* Initialize the cycle counter */
|
||||||
|
static unsigned cyc_hi = 0;
|
||||||
|
static unsigned cyc_lo = 0;
|
||||||
|
|
||||||
|
/* Set *hi and *lo to the high and low order bits of the cycle counter.
|
||||||
|
Implementation requires assembly code to use the rdtsc instruction. */
|
||||||
|
void access_counter(unsigned *hi, unsigned *lo)
|
||||||
|
{
|
||||||
|
|
||||||
|
long long counter;
|
||||||
|
|
||||||
|
counter = __rdtsc();
|
||||||
|
(*hi) = (unsigned int)(counter >> 32);
|
||||||
|
(*lo) = (unsigned int)counter;
|
||||||
|
/*
|
||||||
|
|
||||||
|
LARGE_INTEGER lPerformanceCount;
|
||||||
|
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount);
|
||||||
|
(*hi) = (unsigned int)lPerformanceCount.HighPart;
|
||||||
|
(*lo) = (unsigned int)lPerformanceCount.LowPart;
|
||||||
|
// printf("%08X %08X\n",(*hi),(*lo));
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Record the current value of the cycle counter. */
|
||||||
|
void start_counter()
|
||||||
|
{
|
||||||
|
access_counter(&cyc_hi, &cyc_lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the number of cycles since the last call to start_counter. */
|
||||||
|
double get_counter()
|
||||||
|
{
|
||||||
|
unsigned ncyc_hi, ncyc_lo;
|
||||||
|
unsigned hi, lo, borrow;
|
||||||
|
double result;
|
||||||
|
|
||||||
|
/* Get cycle counter */
|
||||||
|
access_counter(&ncyc_hi, &ncyc_lo);
|
||||||
|
|
||||||
|
/* Do double precision subtraction */
|
||||||
|
lo = ncyc_lo - cyc_lo;
|
||||||
|
borrow = cyc_lo > ncyc_lo;
|
||||||
|
hi = ncyc_hi - cyc_hi - borrow;
|
||||||
|
result = (double) hi * (1 << 30) * 4 + lo;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
void make_CPU_busy(void)
|
||||||
|
{
|
||||||
|
volatile double old_tick,new_tick;
|
||||||
|
start_counter();
|
||||||
|
old_tick = get_counter();
|
||||||
|
new_tick = get_counter();
|
||||||
|
while (new_tick - old_tick < 1000000000)
|
||||||
|
new_tick = get_counter();
|
||||||
|
}
|
||||||
|
|
||||||
|
//CPU<50><55>Ƶ<EFBFBD><C6B5>
|
||||||
|
double mhz(int verbose)
|
||||||
|
{
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
LARGE_INTEGER lPerformanceCount_Start;
|
||||||
|
LARGE_INTEGER lPerformanceCount_End;
|
||||||
|
double mhz;
|
||||||
|
double fTime;
|
||||||
|
__int64 _i64StartCpuCounter;
|
||||||
|
__int64 _i64EndCpuCounter;
|
||||||
|
//On a multiprocessor machine, it should not matter which processor is called.
|
||||||
|
//However, you can get different results on different processors due to bugs in
|
||||||
|
//the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function.
|
||||||
|
HANDLE hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD>ϸ߾<CFB8><DFBE>ȶ<EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ľ<EFBFBD><C4BE><EFBFBD>Ƶ<EFBFBD><C6B5>
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>Ӧ<EFBFBD>þ<EFBFBD><C3BE><EFBFBD>һƬ8253<35><33><EFBFBD><EFBFBD>8254
|
||||||
|
//<2F><>intel ich7<68>м<EFBFBD><D0BC><EFBFBD><EFBFBD><EFBFBD>8254
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
// if (verbose>0)
|
||||||
|
// printf("<22>߾<EFBFBD><DFBE>ȶ<EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ľ<EFBFBD><C4BE><EFBFBD>Ƶ<EFBFBD>ʣ<EFBFBD>%1.0fHz.\n",(double)lFrequency.QuadPart);
|
||||||
|
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><DAA3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>+1
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount_Start);
|
||||||
|
|
||||||
|
//RDTSCָ<43><D6B8>:<3A><>ȡCPU<50><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
_i64StartCpuCounter=__rdtsc();
|
||||||
|
|
||||||
|
//<2F><>ʱ<EFBFBD><CAB1>һ<EFBFBD><D2BB>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Сһ<D0A1><D2BB>
|
||||||
|
//int nTemp=100000;
|
||||||
|
//while (--nTemp);
|
||||||
|
Sleep(200);
|
||||||
|
|
||||||
|
QueryPerformanceCounter(&lPerformanceCount_End);
|
||||||
|
|
||||||
|
_i64EndCpuCounter=__rdtsc();
|
||||||
|
|
||||||
|
//f=1/T => f=<3D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>/(<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*T)
|
||||||
|
//<2F><><EFBFBD><EFBFBD><EFBFBD>ġ<EFBFBD><C4A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*T<><54><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD>
|
||||||
|
fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart)
|
||||||
|
/(double)lFrequency.QuadPart;
|
||||||
|
|
||||||
|
mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0);
|
||||||
|
if (verbose>0)
|
||||||
|
printf("CPUƵ<EFBFBD><EFBFBD>Ϊ:%1.6fMHz.\n",mhz);
|
||||||
|
return mhz;
|
||||||
|
}
|
||||||
|
|
||||||
|
double CPU_Factor1(void)
|
||||||
|
{
|
||||||
|
double result;
|
||||||
|
int i,j,k,ii,jj,kk;
|
||||||
|
LARGE_INTEGER lStart,lEnd;
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
HANDLE hThread;
|
||||||
|
double fTime;
|
||||||
|
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
|
||||||
|
ii = 43273;
|
||||||
|
kk = 1238;
|
||||||
|
result = 1;
|
||||||
|
jj = 1244;
|
||||||
|
|
||||||
|
hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
QueryPerformanceCounter(&lStart);
|
||||||
|
//_asm("cpuid");
|
||||||
|
start_counter();
|
||||||
|
for (i=0;i<100;i++)
|
||||||
|
for (j=0;j<1000;j++)
|
||||||
|
for (k=0;k<1000;k++)
|
||||||
|
kk += kk*ii+jj;
|
||||||
|
|
||||||
|
result = get_counter();
|
||||||
|
QueryPerformanceCounter(&lEnd);
|
||||||
|
fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart);
|
||||||
|
printf("CPU<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>Ϊ%f",result);
|
||||||
|
printf("\t %f\n",fTime);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double CPU_Factor(void)
|
||||||
|
{
|
||||||
|
double frequency;
|
||||||
|
double multiplier = 1000 * 1000 * 1000;//nano
|
||||||
|
LARGE_INTEGER lFrequency;
|
||||||
|
LARGE_INTEGER start,stop;
|
||||||
|
HANDLE hThread;
|
||||||
|
int i;
|
||||||
|
const int gigahertz= 1000*1000*1000;
|
||||||
|
const int known_instructions_per_loop = 27317;
|
||||||
|
|
||||||
|
int iterations = 100000000;
|
||||||
|
int g = 0;
|
||||||
|
double normal_ticks_per_second;
|
||||||
|
double ticks;
|
||||||
|
double time;
|
||||||
|
double loops_per_sec;
|
||||||
|
double instructions_per_loop;
|
||||||
|
double ratio;
|
||||||
|
double actual_freq;
|
||||||
|
|
||||||
|
QueryPerformanceFrequency(&lFrequency);
|
||||||
|
frequency = (double)lFrequency.QuadPart;
|
||||||
|
|
||||||
|
hThread=GetCurrentThread();
|
||||||
|
SetThreadAffinityMask(hThread,0x1);
|
||||||
|
QueryPerformanceCounter(&start);
|
||||||
|
for( i = 0; i < iterations; i++)
|
||||||
|
{
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
g++;
|
||||||
|
}
|
||||||
|
QueryPerformanceCounter(&stop);
|
||||||
|
|
||||||
|
//normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199
|
||||||
|
normal_ticks_per_second = frequency * 1000;
|
||||||
|
ticks = (double)((double)stop.QuadPart - (double)start.QuadPart);
|
||||||
|
time = (ticks * multiplier) /frequency;
|
||||||
|
loops_per_sec = iterations / (time/multiplier);
|
||||||
|
instructions_per_loop = normal_ticks_per_second / loops_per_sec;
|
||||||
|
|
||||||
|
ratio = (instructions_per_loop / known_instructions_per_loop);
|
||||||
|
actual_freq = normal_ticks_per_second / ratio;
|
||||||
|
/*
|
||||||
|
actual_freq = normal_ticks_per_second / ratio;
|
||||||
|
actual_freq = known_instructions_per_loop*iterations*multiplier/time;
|
||||||
|
|
||||||
|
2293 = x/time;
|
||||||
|
|
||||||
|
2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000
|
||||||
|
loops_per_sec = iterations*frequency / ticks
|
||||||
|
|
||||||
|
instructions_per_loop = / loops_per_sec;
|
||||||
|
*/
|
||||||
|
printf("Perf counter freq: %f\n", normal_ticks_per_second);
|
||||||
|
printf("Loops per sec: %f\n", loops_per_sec);
|
||||||
|
printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop);
|
||||||
|
printf("Presumed freq: %f\n", actual_freq);
|
||||||
|
printf("ratio: %f\n", ratio);
|
||||||
|
printf("time=%f\n",time);
|
||||||
|
return ratio;
|
||||||
|
}
|
||||||
12
perflab/poly/clock.h
Normal file
12
perflab/poly/clock.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
/* Routines for using cycle counter */
|
||||||
|
|
||||||
|
/* Start the counter */
|
||||||
|
void start_counter(void);
|
||||||
|
|
||||||
|
/* Get # cycles since counter started. Returns 1e20 if detect timing anomaly */
|
||||||
|
double get_counter(void);
|
||||||
|
void make_CPU_busy(void);
|
||||||
|
|
||||||
|
double mhz(int verbose);
|
||||||
|
double CPU_Factor(void);
|
||||||
|
//double GetCpuClock(void);
|
||||||
117
perflab/poly/cpe.c
Normal file
117
perflab/poly/cpe.c
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/* Compute CPE for function */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "fcyc.h"
|
||||||
|
#include "cpe.h"
|
||||||
|
#include "lsquare.h"
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
/* Find number of cycles taken by function.
|
||||||
|
Do this by running number of trials until best two within TOL of
|
||||||
|
each other
|
||||||
|
*/
|
||||||
|
double measure_function(elem_fun_t f, int cnt)
|
||||||
|
{
|
||||||
|
/* Need to fudge fact that fcyc wants a function taking an
|
||||||
|
long int *, while our function takes an long int */
|
||||||
|
test_funct tf = (test_funct) f;
|
||||||
|
return fcyc(tf, (int *) (int) cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAXCNT 100
|
||||||
|
|
||||||
|
#define LIM RAND_MAX
|
||||||
|
|
||||||
|
/* LCM of unrolling degree */
|
||||||
|
#ifdef USE_UNI
|
||||||
|
#define UNROLL 32
|
||||||
|
#else /* USE_UNI */
|
||||||
|
#define UNROLL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static long int get_cnt(long int index, long int samples,
|
||||||
|
long int maxcnt, sample_t smethod, double bias)
|
||||||
|
{
|
||||||
|
long int mincnt = (long int) (bias*maxcnt);
|
||||||
|
double weight;
|
||||||
|
long int val;
|
||||||
|
switch (smethod) {
|
||||||
|
case UNI_SAMPLE:
|
||||||
|
weight = (double) index/(samples - 1);
|
||||||
|
break;
|
||||||
|
case RAN_SAMPLE:
|
||||||
|
weight = (double) (rand() % LIM) / (double) (LIM-1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Undefined sampling method %d\n", smethod);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
val = mincnt + weight*(maxcnt-mincnt);
|
||||||
|
return UNROLL * (val/UNROLL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SEED 31415
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||||
|
specified number of sample points.
|
||||||
|
If data_file, then print data so that can plot points with Excel
|
||||||
|
smethod determines method for generating samples
|
||||||
|
*/
|
||||||
|
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||||
|
sample_t smethod, double bias, long int verbose)
|
||||||
|
{
|
||||||
|
long int i;
|
||||||
|
long int cnt;
|
||||||
|
double cpe;
|
||||||
|
double overhead = 0;
|
||||||
|
double *cnt_val = calloc(samples, sizeof(double));
|
||||||
|
double *cycle_val = calloc(samples, sizeof(double));
|
||||||
|
/* Do the samples */
|
||||||
|
|
||||||
|
srand(SEED);
|
||||||
|
for (i = 0; i < samples; i++) {
|
||||||
|
cnt = get_cnt(i, samples, maxcnt, smethod, bias);
|
||||||
|
cnt_val[i] = cnt;
|
||||||
|
cycle_val[i] = measure_function(f, cnt);
|
||||||
|
if (cycle_val[i] < 1.0) {
|
||||||
|
fprintf(stderr, "Got %.2f cycles for count %ld\n", cycle_val[i], cnt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Fit data */
|
||||||
|
cpe = ls_slope(cnt_val, cycle_val, samples);
|
||||||
|
if (data_file)
|
||||||
|
overhead = ls_intercept(cnt_val, cycle_val, samples);
|
||||||
|
if (data_file && verbose > 1) {
|
||||||
|
/* Print x values */
|
||||||
|
fprintf(data_file, "Cnt\t0");
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.0f", cnt_val[i]);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
/* Print y values */
|
||||||
|
fprintf(data_file, "Cycs.\t");
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.2f", cycle_val[i]);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
/* Print ax*b values */
|
||||||
|
fprintf(data_file, "Interp.\t%.2f", overhead);
|
||||||
|
for (i = 0; i < samples; i++)
|
||||||
|
fprintf(data_file, "\t%.2f", cpe*cnt_val[i]+overhead);
|
||||||
|
fprintf(data_file, "\n");
|
||||||
|
}
|
||||||
|
if (data_file && verbose) {
|
||||||
|
/* Print results */
|
||||||
|
fprintf(data_file, "cpe\t%.2f\tovhd\t%.2f\tavgerr\t\\%.3f\tmaxerr\t\\%.3f\n",
|
||||||
|
cpe, overhead,
|
||||||
|
ls_error(cnt_val, cycle_val, samples, LS_AVG),
|
||||||
|
ls_error(cnt_val, cycle_val, samples, LS_MAX));
|
||||||
|
}
|
||||||
|
free(cnt_val);
|
||||||
|
free(cycle_val);
|
||||||
|
return cpe;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use default parameters */
|
||||||
|
double find_cpe(elem_fun_t f, int maxcnt)
|
||||||
|
{
|
||||||
|
return find_cpe_full(f, maxcnt, 100, stdout, RAN_SAMPLE, 0.3, 0);
|
||||||
|
}
|
||||||
31
perflab/poly/cpe.h
Normal file
31
perflab/poly/cpe.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/* Compute CPE for function */
|
||||||
|
|
||||||
|
/* Compute for function that is linear in some parameter cnt */
|
||||||
|
typedef void (*elem_fun_t)(int);
|
||||||
|
|
||||||
|
/* Different ways of finding samples
|
||||||
|
UNI_SAMPLE: samples uniformly spaced between bias*maxcnt and maxcnt
|
||||||
|
RAN_SAMPLE: samples randomly selected between bias*maxcnt and maxcnt
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef enum {UNI_SAMPLE, RAN_SAMPLE}
|
||||||
|
sample_t;
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt.
|
||||||
|
Uses default parameters
|
||||||
|
*/
|
||||||
|
double find_cpe(elem_fun_t f, int maxcnt);
|
||||||
|
|
||||||
|
/* Find cpe for function f, which allows cnt up to maxcnt, using
|
||||||
|
specified number of sample points.
|
||||||
|
If data_file, then print data so that can plot points with Excel
|
||||||
|
smethod determines method for generating samples
|
||||||
|
*/
|
||||||
|
double find_cpe_full(elem_fun_t f, long int maxcnt, long int samples, FILE *data_file,
|
||||||
|
sample_t smethod, double bias, long int verbose);
|
||||||
|
|
||||||
|
/* Find number of cycles taken by function.
|
||||||
|
Do this by running number of trials until best two within TOL (2%) of
|
||||||
|
each other
|
||||||
|
*/
|
||||||
|
double measure_function(elem_fun_t f, int cnt);
|
||||||
223
perflab/poly/fcyc.c
Normal file
223
perflab/poly/fcyc.c
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
/* Compute time used by function f */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "clock.h"
|
||||||
|
#include "fcyc.h"
|
||||||
|
|
||||||
|
#define K 3
|
||||||
|
#define MAXSAMPLES 20
|
||||||
|
#define EPSILON 0.01
|
||||||
|
#define COMPENSATE 0
|
||||||
|
#define CLEAR_CACHE 0
|
||||||
|
#define CACHE_BYTES (1<<19)
|
||||||
|
#define CACHE_BLOCK 32
|
||||||
|
#define MAX_ITER_TIMES 10
|
||||||
|
|
||||||
|
static long int kbest = K;
|
||||||
|
static long int compensate = COMPENSATE;
|
||||||
|
static long int clear_cache = CLEAR_CACHE;
|
||||||
|
static long int maxsamples = MAXSAMPLES;
|
||||||
|
static double epsilon = EPSILON;
|
||||||
|
static long int cache_bytes = CACHE_BYTES;
|
||||||
|
static long int cache_block = CACHE_BLOCK;
|
||||||
|
|
||||||
|
static long int *cache_buf = NULL;
|
||||||
|
|
||||||
|
static double *values = NULL;
|
||||||
|
static long int samplecount = 0;
|
||||||
|
|
||||||
|
#define KEEP_VALS 0
|
||||||
|
#define KEEP_SAMPLES 0
|
||||||
|
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
static double *samples = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Start new sampling process */
|
||||||
|
static void init_sampler(void)
|
||||||
|
{
|
||||||
|
if (values)
|
||||||
|
free(values);
|
||||||
|
values = calloc(kbest, sizeof(double));
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
if (samples)
|
||||||
|
free(samples);
|
||||||
|
/* Allocate extra for wraparound analysis */
|
||||||
|
samples = calloc(maxsamples+kbest, sizeof(double));
|
||||||
|
#endif
|
||||||
|
samplecount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add new sample. */
|
||||||
|
static void add_sample(double val)
|
||||||
|
{
|
||||||
|
long int pos = 0;
|
||||||
|
if (samplecount < kbest) {
|
||||||
|
pos = samplecount;
|
||||||
|
values[pos] = val;
|
||||||
|
} else if (val < values[kbest-1]) {
|
||||||
|
pos = kbest-1;
|
||||||
|
values[pos] = val;
|
||||||
|
}
|
||||||
|
#if KEEP_SAMPLES
|
||||||
|
samples[samplecount] = val;
|
||||||
|
#endif
|
||||||
|
samplecount++;
|
||||||
|
/* Insertion sort */
|
||||||
|
while (pos > 0 && values[pos-1] > values[pos]) {
|
||||||
|
double temp = values[pos-1];
|
||||||
|
values[pos-1] = values[pos];
|
||||||
|
values[pos] = temp;
|
||||||
|
pos--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Have kbest minimum measurements converged within epsilon? */
|
||||||
|
static long int has_converged(void)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
(samplecount >= kbest) &&
|
||||||
|
((1 + epsilon)*values[0] >= values[kbest-1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Code to clear cache */
|
||||||
|
|
||||||
|
|
||||||
|
static volatile long int sink = 0;
|
||||||
|
|
||||||
|
static void clear(void)
|
||||||
|
{
|
||||||
|
long int x = sink;
|
||||||
|
long int *cptr, *cend;
|
||||||
|
long int incr = cache_block/sizeof(long int);
|
||||||
|
if (!cache_buf) {
|
||||||
|
cache_buf = malloc(cache_bytes);
|
||||||
|
if (!cache_buf) {
|
||||||
|
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cptr = (long int *) cache_buf;
|
||||||
|
cend = cptr + cache_bytes/sizeof(long int);
|
||||||
|
while (cptr < cend) {
|
||||||
|
x += *cptr;
|
||||||
|
cptr += incr;
|
||||||
|
}
|
||||||
|
sink = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
double fcyc(test_funct f, int *params)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double result;
|
||||||
|
init_sampler();
|
||||||
|
if (compensate) {
|
||||||
|
do {
|
||||||
|
double cyc;
|
||||||
|
if (clear_cache)
|
||||||
|
clear();
|
||||||
|
start_counter();
|
||||||
|
f(params);
|
||||||
|
cyc = get_counter();
|
||||||
|
if (cyc > 0.0)
|
||||||
|
add_sample(cyc);
|
||||||
|
} while (!has_converged() && samplecount < maxsamples);
|
||||||
|
} else {
|
||||||
|
do {
|
||||||
|
double cyc;
|
||||||
|
if (clear_cache)
|
||||||
|
clear();
|
||||||
|
start_counter();
|
||||||
|
for (i=0;i<MAX_ITER_TIMES;i++)
|
||||||
|
f(params);
|
||||||
|
cyc = get_counter()/MAX_ITER_TIMES;
|
||||||
|
if (cyc > 0.0)
|
||||||
|
add_sample(cyc);
|
||||||
|
|
||||||
|
} while (!has_converged() && samplecount < maxsamples);
|
||||||
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
|
{
|
||||||
|
long int i;
|
||||||
|
printf(" %ld smallest values: [", kbest);
|
||||||
|
for (i = 0; i < kbest; i++)
|
||||||
|
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
result = values[0];
|
||||||
|
#if !KEEP_VALS
|
||||||
|
free(values);
|
||||||
|
values = NULL;
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************************************/
|
||||||
|
/* Set the various parameters used by measurement routines */
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will run code to clear cache before each measurement
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_clear_cache(long int clear)
|
||||||
|
{
|
||||||
|
clear_cache = clear;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set size of cache to use when clearing cache
|
||||||
|
Default = 1<<19 (512KB)
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_size(long int bytes)
|
||||||
|
{
|
||||||
|
if (bytes != cache_bytes) {
|
||||||
|
cache_bytes = bytes;
|
||||||
|
if (cache_buf) {
|
||||||
|
free(cache_buf);
|
||||||
|
cache_buf = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set size of cache block
|
||||||
|
Default = 32
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_block(long int bytes) {
|
||||||
|
cache_block = bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will attempt to compensate for timer interrupt overhead
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_compensate(long int compensate_arg)
|
||||||
|
{
|
||||||
|
compensate = compensate_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Value of K in K-best
|
||||||
|
Default = 3
|
||||||
|
*/
|
||||||
|
void set_fcyc_k(long int k)
|
||||||
|
{
|
||||||
|
kbest = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||||
|
When exceeded, just return best sample found.
|
||||||
|
Default = 20
|
||||||
|
*/
|
||||||
|
void set_fcyc_maxsamples(long int maxsamples_arg)
|
||||||
|
{
|
||||||
|
maxsamples = maxsamples_arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tolerance required for K-best
|
||||||
|
Default = 0.01
|
||||||
|
*/
|
||||||
|
void set_fcyc_epsilon(double epsilon_arg)
|
||||||
|
{
|
||||||
|
epsilon = epsilon_arg;
|
||||||
|
}
|
||||||
52
perflab/poly/fcyc.h
Normal file
52
perflab/poly/fcyc.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
|
||||||
|
/* Fcyc measures the speed of any "test function." Such a function
|
||||||
|
is passed a list of integer parameters, which it may interpret
|
||||||
|
in any way it chooses.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef void (*test_funct)(long int *);
|
||||||
|
|
||||||
|
/* Compute number of cycles used by function f on given set of parameters */
|
||||||
|
double fcyc(test_funct f, int* params);
|
||||||
|
|
||||||
|
/***********************************************************/
|
||||||
|
/* Set the various parameters used by measurement routines */
|
||||||
|
|
||||||
|
|
||||||
|
/* When set, will run code to clear cache before each measurement
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_clear_cache(long int clear);
|
||||||
|
|
||||||
|
/* Set size of cache to use when clearing cache
|
||||||
|
Default = 1<<19 (512KB)
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_size(long int bytes);
|
||||||
|
|
||||||
|
/* Set size of cache block
|
||||||
|
Default = 32
|
||||||
|
*/
|
||||||
|
void set_fcyc_cache_block(long int bytes);
|
||||||
|
|
||||||
|
/* When set, will attempt to compensate for timer interrupt overhead
|
||||||
|
Default = 0
|
||||||
|
*/
|
||||||
|
void set_fcyc_compensate(long int compensate);
|
||||||
|
|
||||||
|
/* Value of K in K-best
|
||||||
|
Default = 3
|
||||||
|
*/
|
||||||
|
void set_fcyc_k(long int k);
|
||||||
|
|
||||||
|
/* Maximum number of samples attempting to find K-best within some tolerance.
|
||||||
|
When exceeded, just return best sample found.
|
||||||
|
Default = 20
|
||||||
|
*/
|
||||||
|
void set_fcyc_maxsamples(long int maxsamples);
|
||||||
|
|
||||||
|
/* Tolerance required for K-best
|
||||||
|
Default = 0.01
|
||||||
|
*/
|
||||||
|
void set_fcyc_epsilon(double epsilon);
|
||||||
|
|
||||||
|
|
||||||
94
perflab/poly/lsquare.c
Normal file
94
perflab/poly/lsquare.c
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
/* Compute least squares fit of set of data points */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "lsquare.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
double sum_x;
|
||||||
|
double sum_y;
|
||||||
|
double sum_xx;
|
||||||
|
double sum_xy;
|
||||||
|
} ls_stat_t;
|
||||||
|
|
||||||
|
/* Accumulate various sums of the data */
|
||||||
|
static void ls_stats(double *xval, double *yval, int cnt, ls_stat_t *statp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
statp->sum_x = 0.0;
|
||||||
|
statp->sum_y = 0.0;
|
||||||
|
statp->sum_xx = 0.0;
|
||||||
|
statp->sum_xy = 0.0;
|
||||||
|
for (i = 0; i < cnt; i++) {
|
||||||
|
double x = xval[i];
|
||||||
|
double y = yval[i];
|
||||||
|
statp->sum_x += x;
|
||||||
|
statp->sum_y += y;
|
||||||
|
statp->sum_xx += x * x;
|
||||||
|
statp->sum_xy += x * y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_slope(double *xval, double *yval, int cnt)
|
||||||
|
{
|
||||||
|
double slope;
|
||||||
|
ls_stat_t stat;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
return slope;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_intercept(double *xval, double *yval, int cnt)
|
||||||
|
{
|
||||||
|
double intercept;
|
||||||
|
ls_stat_t stat;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
return intercept;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double rel_err(double x, double y, double slope, double intercept)
|
||||||
|
{
|
||||||
|
double pred_y = slope*x + intercept;
|
||||||
|
double offset = y - pred_y;
|
||||||
|
if (offset < 0)
|
||||||
|
offset = -offset;
|
||||||
|
if (pred_y == 0)
|
||||||
|
return offset;
|
||||||
|
return offset/pred_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype)
|
||||||
|
{
|
||||||
|
double slope;
|
||||||
|
double intercept;
|
||||||
|
ls_stat_t stat;
|
||||||
|
int i;
|
||||||
|
double num, denom;
|
||||||
|
ls_stats(xval, yval, cnt, &stat);
|
||||||
|
slope = (cnt * stat.sum_xy - stat.sum_x * stat.sum_y)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
intercept = (stat.sum_xx * stat.sum_y - stat.sum_xy * stat.sum_x)/
|
||||||
|
(cnt * stat.sum_xx - stat.sum_x*stat.sum_x);
|
||||||
|
num = denom = 0;
|
||||||
|
for (i = 0; i < cnt; i++) {
|
||||||
|
double e = rel_err(xval[i], yval[i], slope, intercept);
|
||||||
|
switch (etype) {
|
||||||
|
case LS_AVG:
|
||||||
|
num += e;
|
||||||
|
denom++;
|
||||||
|
break;
|
||||||
|
case LS_MAX:
|
||||||
|
if (num < e)
|
||||||
|
num = e;
|
||||||
|
denom = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Invalid error type: %d\n", etype);
|
||||||
|
exit(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return num/denom;
|
||||||
|
}
|
||||||
11
perflab/poly/lsquare.h
Normal file
11
perflab/poly/lsquare.h
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
/* Compute least squares fit of set of data points */
|
||||||
|
|
||||||
|
/* Fit is of form y = mx + b. m is slope, b is intercept */
|
||||||
|
double ls_slope(double *xval, double *yval, int cnt);
|
||||||
|
double ls_intercept(double *xval, double *yval, int cnt);
|
||||||
|
|
||||||
|
typedef enum {LS_AVG, LS_MAX} ls_err_t;
|
||||||
|
|
||||||
|
/* Determine error (either absolute or average) of least squares fit */
|
||||||
|
double ls_error(double *xval, double *yval, int cnt, ls_err_t etype);
|
||||||
|
|
||||||
125
perflab/poly/poly.c
Normal file
125
perflab/poly/poly.c
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
/**************************************************************************
|
||||||
|
<09><><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD>㺯<EFBFBD><E3BAAF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>༭<EFBFBD><E0BCAD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>
|
||||||
|
1. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ѧ<EFBFBD>š<EFBFBD><C5A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ע<EFBFBD>͵ķ<CDB5>ʽд<CABD><D0B4><EFBFBD><EFBFBD><EFBFBD>棻
|
||||||
|
2. ʵ<>ֲ<EFBFBD>ͬ<EFBFBD>汾<EFBFBD>Ķ<EFBFBD><C4B6><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD>㺯<EFBFBD><E3BAAF><EFBFBD><EFBFBD>
|
||||||
|
3. <20>༭peval_fun_rec peval_fun_tab<61><62><EFBFBD>飬<EFBFBD><E9A3AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>õĴ<C3B5><C4B4><EFBFBD>
|
||||||
|
<09><><EFBFBD><EFBFBD>СCPE<50><45><EFBFBD><EFBFBD>СC10<31><30><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD>
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
ѧ<>ţ<EFBFBD>201209054233
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9><EFBFBD>Ӱ<EFBFBD><D3B0><EFBFBD>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
typedef int (*peval_fun)(int*, int, int);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
peval_fun f;
|
||||||
|
char *descr;
|
||||||
|
} peval_fun_rec, *peval_fun_ptr;
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
Edit this comment to indicate your name and Andrew ID
|
||||||
|
#ifdef ASSIGN
|
||||||
|
Submission by Harry Q. Bovik, bovik@andrew.cmu.edu
|
||||||
|
#else
|
||||||
|
Instructor's version.
|
||||||
|
Created by Randal E. Bryant, Randy.Bryant@cs.cmu.edu, 10/07/02
|
||||||
|
#endif
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
ʵ<><CAB5>һ<EFBFBD><D2BB>ָ<EFBFBD><D6B8><EFBFBD>ij<EFBFBD>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD>
|
||||||
|
<09><>һ<EFBFBD>Σ<EFBFBD><CEA3><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD><EFBFBD>г<EFBFBD><D0B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ա<EFBFBD><D4B1><EFBFBD>֪<EFBFBD><D6AA><EFBFBD><EFBFBD>Ҫʵ<D2AA>ֵij<D6B5>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD>ɶ
|
||||||
|
*/
|
||||||
|
int const_poly_eval(int *not_use, int not_use2, int x)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
/* int i;
|
||||||
|
int xpwr = 1; // x<><78><EFBFBD>ݴ<EFBFBD>
|
||||||
|
int a[4] = {21,90,42,88};
|
||||||
|
for (i = 0; i <= 3; i++) {
|
||||||
|
result += a[i]*xpwr;
|
||||||
|
xpwr *= x;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// 90 = 64 + 32 - 4 - 2
|
||||||
|
// 42 = 32 + 8 + 2
|
||||||
|
// 88 = 64 + 16 + 8
|
||||||
|
int x64,x32,x16,x8,x4,x2;
|
||||||
|
|
||||||
|
x64 = x << 6;
|
||||||
|
x32 = x << 5;
|
||||||
|
x16 = x << 4;
|
||||||
|
x8 = x << 3;
|
||||||
|
x4 = x << 2;
|
||||||
|
x2 = x << 1;
|
||||||
|
result = 21 + x64+x32-x4-x2 + ((x32+x8+x2) + (x64+x16+x8)*x)*x;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* <20><><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD>㺯<EFBFBD><E3BAAF><EFBFBD><EFBFBD>ע<EFBFBD>⣺<EFBFBD><E2A3BA><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>һ<EFBFBD><D2BB><EFBFBD>ο<EFBFBD>ʵ<EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD>Ҫʵ<D2AA><CAB5><EFBFBD>Լ<EFBFBD><D4BC>İ汾 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾ<EFBFBD><CABE>lcc֧<63><D6A7>ATT<54><54>ʽ<EFBFBD><CABD>Ƕ<EFBFBD><C7B6>ʽ<EFBFBD><CABD><EFBFBD>࣬<EFBFBD><E0A3AC><EFBFBD><EFBFBD>
|
||||||
|
|
||||||
|
_asm("movl %eax,%ebx");
|
||||||
|
_asm("pushl %edx");
|
||||||
|
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>lcc<63><63>project->configuration->Compiler->Code Generation->Generate .asm<73><6D>
|
||||||
|
<09><><EFBFBD><EFBFBD>ѡ<EFBFBD>к<D0BA><F3A3ACBF><EFBFBD><EFBFBD><EFBFBD>lccĿ¼<C4BF><C2BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɶ<EFBFBD>Ӧ<EFBFBD><D3A6><EFBFBD><EFBFBD><EFBFBD>Ļ<EFBFBD><C4BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD>֡<EFBFBD>ͨ<EFBFBD><CDA8><EFBFBD>鿴<EFBFBD><E9BFB4><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˽<EFBFBD><CBBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5><EFBFBD><EFBFBD><EFBFBD>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD>ġ<EFBFBD><C4A1><EFBFBD>Щʵ<D0A9>ֿ<EFBFBD><D6BF>ܷdz<DCB7><C7B3><EFBFBD>Ч<EFBFBD><D0A7>
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>ĵط<C4B5><D8B7><EFBFBD><EFBFBD><EFBFBD>Ƕ<EFBFBD><C7B6>ʽ<EFBFBD><CABD><EFBFBD>࣬<EFBFBD><E0A3AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><DFBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ܡ<EFBFBD>
|
||||||
|
*/
|
||||||
|
|
||||||
|
int poly_eval(int *a, int degree, int x)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
int i;
|
||||||
|
int xpwr = 1; /* x<><78><EFBFBD>ݴ<EFBFBD> */
|
||||||
|
// printf("<22><>=%d\n",degree);
|
||||||
|
for (i = 0; i <= degree; i++) {
|
||||||
|
result += a[i]*xpwr;
|
||||||
|
xpwr *= x;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ԫ<EFBFBD>أ<EFBFBD>ÿһ<C3BF><D2BB>Ԫ<EFBFBD>أ<EFBFBD><D8A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, "<22><><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>"<22><>
|
||||||
|
<09><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD>õ<EFBFBD><C3B5><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD>档
|
||||||
|
<09><><EFBFBD>磺
|
||||||
|
{my_poly_eval1, "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>"},
|
||||||
|
{my_poly_eval2, "<22><>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>"},
|
||||||
|
*/
|
||||||
|
|
||||||
|
peval_fun_rec peval_fun_tab[] =
|
||||||
|
{
|
||||||
|
|
||||||
|
/* <20><>һ<EFBFBD>Ӧ<EEA3AC><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>CPE<50>ĺ<EFBFBD><C4BA><EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
{poly_eval, "ҹ<EFBFBD><EFBFBD><EFBFBD>Ӱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>CPE"},
|
||||||
|
/* <20>ڶ<EFBFBD><DAB6>Ӧ<EEA3AC><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD><EFBFBD>10<31><30>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ܵ<EFBFBD>ʵ<EFBFBD><CAB5> */
|
||||||
|
{poly_eval, "ҹ<EFBFBD><EFBFBD><EFBFBD>Ӱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>10<EFBFBD><EFBFBD>ʵ<EFBFBD><EFBFBD>"},
|
||||||
|
|
||||||
|
{poly_eval, "poly_eval: <20>ο<EFBFBD>ʵ<EFBFBD><CAB5>"},
|
||||||
|
|
||||||
|
/* <20><><EFBFBD><EFBFBD><EFBFBD>Ĵ<EFBFBD><C4B4>벻<EFBFBD><EBB2BB><EFBFBD>Ļ<DEB8><C4BB><EFBFBD>ɾ<EFBFBD><C9BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>б<EFBFBD><D0B1><EFBFBD><EFBFBD><EFBFBD> */
|
||||||
|
{NULL, ""}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
17
perflab/poly/poly.h
Normal file
17
perflab/poly/poly.h
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
/*
|
||||||
|
Integer polynomial evaluation.
|
||||||
|
Polynomial given by array of coefficients a[0] ... a[degree].
|
||||||
|
Want to compute SUM(i=0,degree) a[i] * x^i
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Type declaration for a polynomial evaluation function */
|
||||||
|
typedef int (*peval_fun)(int*, int, int);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
peval_fun f;
|
||||||
|
char *descr;
|
||||||
|
} peval_fun_rec, *peval_fun_ptr;
|
||||||
|
|
||||||
|
/* Table of polynomial functions to test. Null terminated */
|
||||||
|
extern peval_fun_rec peval_fun_tab[];
|
||||||
|
|
||||||
28
perflab/poly/poly/poly.sln
Normal file
28
perflab/poly/poly/poly.sln
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio 14
|
||||||
|
VisualStudioVersion = 14.0.25420.1
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "poly", "poly.vcxproj", "{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|x64 = Debug|x64
|
||||||
|
Debug|x86 = Debug|x86
|
||||||
|
Release|x64 = Release|x64
|
||||||
|
Release|x86 = Release|x86
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x64.ActiveCfg = Debug|x64
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x64.Build.0 = Debug|x64
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x86.ActiveCfg = Debug|Win32
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Debug|x86.Build.0 = Debug|Win32
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x64.ActiveCfg = Release|x64
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x64.Build.0 = Release|x64
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x86.ActiveCfg = Release|Win32
|
||||||
|
{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}.Release|x86.Build.0 = Release|Win32
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
123
perflab/poly/poly/poly.vcxproj
Normal file
123
perflab/poly/poly/poly.vcxproj
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<ProjectGuid>{E1020F7E-007A-4A1B-8E21-FE8F513E2A3B}</ProjectGuid>
|
||||||
|
<RootNamespace>poly</RootNamespace>
|
||||||
|
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="Shared">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup />
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\clock.c" />
|
||||||
|
<ClCompile Include="..\cpe.c" />
|
||||||
|
<ClCompile Include="..\fcyc.c" />
|
||||||
|
<ClCompile Include="..\lsquare.c" />
|
||||||
|
<ClCompile Include="..\poly.c" />
|
||||||
|
<ClCompile Include="..\poly_test.c" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
||||||
37
perflab/poly/poly/poly.vcxproj.filters
Normal file
37
perflab/poly/poly/poly.vcxproj.filters
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Filter Include="源文件">
|
||||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||||
|
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="头文件">
|
||||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
|
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="资源文件">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||||
|
</Filter>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\clock.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\cpe.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\fcyc.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\lsquare.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\poly.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\poly_test.c">
|
||||||
|
<Filter>源文件</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
302
perflab/poly/poly_test.c
Normal file
302
perflab/poly/poly_test.c
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
/* Test setup for polynomial evaluation. Do not change this. */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
//#include <random.h>
|
||||||
|
#include "poly.h"
|
||||||
|
#include "cpe.h"
|
||||||
|
#include "clock.h"
|
||||||
|
|
||||||
|
double CPU_Mhz;
|
||||||
|
|
||||||
|
/* Degree for fixed evaluation */
|
||||||
|
#define FIXDEGREE 10
|
||||||
|
/* Largest degree polynomial tested */
|
||||||
|
#define MAXDEGREE 2000
|
||||||
|
static int coeff[MAXDEGREE+1];
|
||||||
|
|
||||||
|
#define MAX_ITER_COUNT 100
|
||||||
|
|
||||||
|
#define REF_CPU_MHZ 2292.6 // <20><><EFBFBD><EFBFBD><EFBFBD>ҵĴ<D2B5><C4B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ
|
||||||
|
|
||||||
|
/* Define performance standards */
|
||||||
|
static struct {
|
||||||
|
double cref; /* Cycles taken by reference solution */
|
||||||
|
double cbest; /* Cycles taken by our best implementation */
|
||||||
|
} cstandard[3] =
|
||||||
|
{{4.00, 1.75}, /* CPE */
|
||||||
|
{50, 43}, /* C(10) */
|
||||||
|
{57,31} /* <20><>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD> */
|
||||||
|
};
|
||||||
|
|
||||||
|
int coeff_const[4];
|
||||||
|
|
||||||
|
/* Should I print extra information? */
|
||||||
|
int verbose = 0;
|
||||||
|
|
||||||
|
/* Standard value for polynomial evaluation */
|
||||||
|
static int xval;
|
||||||
|
|
||||||
|
/* How many degrees should I compute reference value for? */
|
||||||
|
#define DCNT 20
|
||||||
|
|
||||||
|
/* Correct value of polynomial evaluation for range of different degrees */
|
||||||
|
/* pval[i] contains evaluation for degree MAXDEGREE-i */
|
||||||
|
static int pval[DCNT];
|
||||||
|
/* fixval contains evaluation for degree FIXDEGREE */
|
||||||
|
static int fixval;
|
||||||
|
static int fixval_const;
|
||||||
|
|
||||||
|
static void init_const_poly(void);
|
||||||
|
static void init(void);
|
||||||
|
extern int const_poly_eval(int *not_use, int not_use2, int x);
|
||||||
|
void run_fun_const(int degree);
|
||||||
|
static double compute_score(double cmeas, double cref, double cbest);
|
||||||
|
unsigned long rand1_h,rand1_l,rand_div;
|
||||||
|
void rand_step(unsigned long divv);
|
||||||
|
void GenerateRandomNumber(unsigned long divv);
|
||||||
|
extern void make_CPU_busy(void);
|
||||||
|
double run_poly_perf_test(void);
|
||||||
|
|
||||||
|
/* Reference implementation */
|
||||||
|
static int ref_poly_eval(int *a, int degree, int x)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
int i;
|
||||||
|
int xpwr = 1; /* Successive powers of x */
|
||||||
|
for (i = 0; i <= degree; i++) {
|
||||||
|
result += a[i]*xpwr;
|
||||||
|
xpwr *= x;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize polynomial to constant values and compute reference values */
|
||||||
|
static void init_const_poly(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i=0;i<4;i++)
|
||||||
|
{
|
||||||
|
GenerateRandomNumber(90);
|
||||||
|
coeff_const[i] = rand_div+10;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD>poly.c<><63>const_poly_eval<61><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><C4B3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD>㣡\n");
|
||||||
|
printf("\tresult=%d+%d*x+%d*x^2+%d*x^3\n",coeff_const[0],coeff_const[1],coeff_const[2],coeff_const[3]);
|
||||||
|
|
||||||
|
fixval_const = ref_poly_eval(coeff_const, 3, xval);
|
||||||
|
// printf("x=%d, fixval_const=%d\n",xval,fixval_const);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_const_poly(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double fix_time=0;
|
||||||
|
int my_cal = const_poly_eval(coeff_const, 3, xval);
|
||||||
|
if (fixval_const != my_cal)
|
||||||
|
{
|
||||||
|
printf("<EFBFBD><EFBFBD>ϵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>const_poly_evalʵ<EFBFBD>ִ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>x=%d<><64><EFBFBD><EFBFBD>Ԥ<EFBFBD>ڽ<EFBFBD><DABD><EFBFBD><EFBFBD><EFBFBD>%d<><64><EFBFBD><EFBFBD><EFBFBD>Ǽ<EFBFBD><C7BC><EFBFBD><EFBFBD>õ<EFBFBD><C3B5><EFBFBD><EFBFBD><EFBFBD>%d\n",xval,fixval_const,my_cal);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
fix_time = 0;
|
||||||
|
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||||
|
fix_time += measure_function(run_fun_const, 3);
|
||||||
|
fix_time = fix_time / MAX_ITER_COUNT;
|
||||||
|
printf(" <20><>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD>ʱ<EFBFBD><CAB1> = %.1f\n", fix_time);
|
||||||
|
printf(" <20><><EFBFBD>ߵij<DFB5>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD><EFBFBD>÷<EFBFBD> ============== %.0f\n",
|
||||||
|
compute_score(fix_time, cstandard[2].cref, cstandard[2].cbest));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize polynomial to random values and compute reference values */
|
||||||
|
static void init(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
xval = rand();
|
||||||
|
for (i = 0; i <= MAXDEGREE; i++)
|
||||||
|
coeff[i] = rand();
|
||||||
|
for (i = 0; i < DCNT; i++)
|
||||||
|
pval[i] = ref_poly_eval(coeff, MAXDEGREE-i, xval);
|
||||||
|
fixval = ref_poly_eval(coeff, FIXDEGREE, xval);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test function on standard test cases. */
|
||||||
|
int test_poly(peval_fun f, FILE *rpt) {
|
||||||
|
int i;
|
||||||
|
int v;
|
||||||
|
int ok = 1;
|
||||||
|
for (i = 0; i < DCNT; i++) {
|
||||||
|
v = f(coeff, MAXDEGREE-i, xval);
|
||||||
|
if (v != pval[i]) {
|
||||||
|
ok = 0;
|
||||||
|
if (rpt) {
|
||||||
|
fprintf(rpt,
|
||||||
|
"<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD><EFBFBD>㲻<EFBFBD>ԣ<EFBFBD><EFBFBD><EFBFBD>=%dʱ<64><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>%d<><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȷֵ<C8B7><D6B5>%d\n",
|
||||||
|
MAXDEGREE-i, v, pval[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v = f(coeff, FIXDEGREE, xval);
|
||||||
|
if (v != fixval) {
|
||||||
|
ok = 0;
|
||||||
|
if (rpt) {
|
||||||
|
fprintf(rpt,
|
||||||
|
"<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD><EFBFBD>㲻<EFBFBD>ԣ<EFBFBD><EFBFBD><EFBFBD>=%dʱ<64><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>%d<><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȷֵ<C8B7><D6B5>%d\n",
|
||||||
|
FIXDEGREE, v, fixval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fit into framework of cpe measuring code */
|
||||||
|
static peval_fun pfun;
|
||||||
|
|
||||||
|
volatile int sink;
|
||||||
|
/* Run pfun for given degree */
|
||||||
|
void run_fun(int degree)
|
||||||
|
{
|
||||||
|
sink = pfun(coeff, degree, xval);
|
||||||
|
}
|
||||||
|
|
||||||
|
volatile int sink_const;
|
||||||
|
/* Run pfun for given degree */
|
||||||
|
void run_fun_const(int degree)
|
||||||
|
{
|
||||||
|
sink_const = const_poly_eval(coeff_const, degree, xval);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Test and measure polynomial evaluation function. Set values
|
||||||
|
of CPE and CFIX */
|
||||||
|
void run_poly(peval_fun f, char *descr, double *cpep, double *cfixp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double cpe=0;
|
||||||
|
double fix_time=0;
|
||||||
|
pfun = f;
|
||||||
|
printf("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>%s\n", descr);
|
||||||
|
if (test_poly(f, stdout)) {
|
||||||
|
cpe = 0;
|
||||||
|
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||||
|
cpe += find_cpe(run_fun, MAXDEGREE);
|
||||||
|
cpe = cpe/MAX_ITER_COUNT;
|
||||||
|
fix_time = 0;
|
||||||
|
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||||
|
fix_time += measure_function(run_fun, FIXDEGREE);
|
||||||
|
fix_time = fix_time/MAX_ITER_COUNT;
|
||||||
|
printf(" CPE = %.2f\tC(%d) = %.1f\n", cpe,
|
||||||
|
FIXDEGREE, fix_time);
|
||||||
|
if (cpep)
|
||||||
|
*cpep = cpe;
|
||||||
|
if (cfixp)
|
||||||
|
*cfixp = fix_time;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the grade achieved by function */
|
||||||
|
static double compute_score(double cmeas, double cref, double cbest)
|
||||||
|
{
|
||||||
|
double sbest = cref/cbest;
|
||||||
|
double smeas = cref/cmeas;
|
||||||
|
if (smeas < 0.1*(sbest-1)+1)
|
||||||
|
return 0;
|
||||||
|
if (smeas > 1.1*(sbest-1)+1)
|
||||||
|
return 120;
|
||||||
|
return 100*((smeas-1.0)/(sbest-1.0) + 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>0~divv-1֮<31><D6AE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͬʱ<CDAC><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
|
||||||
|
void GenerateRandomNumber(unsigned long divv)
|
||||||
|
{
|
||||||
|
unsigned long long x = rand1_h;
|
||||||
|
x *= 0x6AC690C5;
|
||||||
|
x += rand1_l;
|
||||||
|
|
||||||
|
rand1_h = (unsigned long)x;
|
||||||
|
rand1_l = (unsigned long)(x>>32);
|
||||||
|
if (divv==0) return;
|
||||||
|
|
||||||
|
rand_div = rand1_h % divv;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double cpe = cstandard[0].cref;
|
||||||
|
double cfix = cstandard[1].cref;
|
||||||
|
verbose = 0;
|
||||||
|
srand((unsigned int)time(NULL));
|
||||||
|
|
||||||
|
// CPU_Factor();
|
||||||
|
// GetCpuClock();
|
||||||
|
printf("\t2015<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD>Ż<EFBFBD>ʵ<EFBFBD>飬<EFBFBD><EFBFBD>ӭ<EFBFBD>㣡\n");
|
||||||
|
printf("============================\n");
|
||||||
|
|
||||||
|
if (argc == 1)
|
||||||
|
{
|
||||||
|
printf("ʹ<EFBFBD>÷<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>%s ѧ<>ź<EFBFBD>6λ [ѧ<>ź<EFBFBD>6λ] [ѧ<>ź<EFBFBD>6λ] ...\n",argv[0]);
|
||||||
|
printf("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾ<EFBFBD><EFBFBD>дpoly.c<><63><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʽ<EFBFBD>ļ<EFBFBD><C4BC>㣬<EFBFBD><E3A3AC><EFBFBD><EFBFBD><EFBFBD>ܿ<EFBFBD>Ŷ....\n");
|
||||||
|
printf("<EFBFBD><EFBFBD><EFBFBD>⣬<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD>дpoly.c<><63><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʵ<EFBFBD><CAB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ķ<D7B5><C4B6><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>10<31>Ķ<D7B5><C4B6><EFBFBD>ʽ<EFBFBD><CABD><EFBFBD>㣬Ҫ<E3A3AC>죡\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*<2A><><EFBFBD><EFBFBD>ѧ<EFBFBD>ţ<EFBFBD><C5A3><EFBFBD>ʼ<EFBFBD><CABC>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
|
||||||
|
rand1_h = (unsigned long)atoi(argv[1]);
|
||||||
|
rand1_l=0x29A;
|
||||||
|
GenerateRandomNumber(0);
|
||||||
|
for (i=2;i<argc;i++)
|
||||||
|
{
|
||||||
|
rand1_l = (unsigned long)atoi(argv[i]);
|
||||||
|
GenerateRandomNumber(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
GenerateRandomNumber(50);
|
||||||
|
//srand(rand_div);
|
||||||
|
|
||||||
|
//make_CPU_busy();
|
||||||
|
//CPU_Mhz=mhz(1);
|
||||||
|
init();
|
||||||
|
init_const_poly();
|
||||||
|
printf("============================\n");
|
||||||
|
//make_CPU_busy();
|
||||||
|
//run_poly_perf_test();
|
||||||
|
test_const_poly();
|
||||||
|
for (i = 0; peval_fun_tab[i].f != NULL; i++) {
|
||||||
|
//make_CPU_busy();
|
||||||
|
run_poly(peval_fun_tab[i].f, peval_fun_tab[i].descr, &cpe, &cfix);
|
||||||
|
if (i == 0)
|
||||||
|
printf(" <20><><EFBFBD>ߵ<EFBFBD>CPE<50>÷<EFBFBD> =========================== %.0f\n",
|
||||||
|
compute_score(cpe, cstandard[0].cref, cstandard[0].cbest));
|
||||||
|
if (i == 1)
|
||||||
|
printf(" <20><><EFBFBD>ߵ<EFBFBD>C(10)<29>÷<EFBFBD> ========================= %.0f\n",
|
||||||
|
compute_score(cfix, cstandard[1].cref, cstandard[1].cbest));
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int poly_eval_perf_test(int *a, int degree, int x)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
int i;
|
||||||
|
int xpwr = 1; /* Successive powers of x */
|
||||||
|
for (i = 0; i <= degree; i++) {
|
||||||
|
result += a[i] * xpwr;
|
||||||
|
xpwr *= x;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double run_poly_perf_test(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double fix_time=0;
|
||||||
|
pfun = poly_eval_perf_test;
|
||||||
|
for (i=0;i<MAX_ITER_COUNT;i++)
|
||||||
|
fix_time += measure_function(run_fun, FIXDEGREE);
|
||||||
|
fix_time = fix_time/MAX_ITER_COUNT;
|
||||||
|
printf("fix_time=%f\n",fix_time);
|
||||||
|
return fix_time;
|
||||||
|
}
|
||||||
BIN
perflab/╝╞╦у╗·╧╡═│╘н└э-╩╡╤щ6.pptx
Normal file
BIN
perflab/╝╞╦у╗·╧╡═│╘н└э-╩╡╤щ6.pptx
Normal file
Binary file not shown.
BIN
perflab/╩╡╤щ6.docx
Normal file
BIN
perflab/╩╡╤щ6.docx
Normal file
Binary file not shown.
BIN
profile/.cache/clangd/index/options.c.1B32B575634E7B86.idx
Normal file
BIN
profile/.cache/clangd/index/options.c.1B32B575634E7B86.idx
Normal file
Binary file not shown.
BIN
profile/.cache/clangd/index/options.h.CA19534CD060082F.idx
Normal file
BIN
profile/.cache/clangd/index/options.h.CA19534CD060082F.idx
Normal file
Binary file not shown.
BIN
profile/.cache/clangd/index/prog.c.269F0A6EE74D2C4B.idx
Normal file
BIN
profile/.cache/clangd/index/prog.c.269F0A6EE74D2C4B.idx
Normal file
Binary file not shown.
BIN
profile/Collage_20250411_085050.jpg
Normal file
BIN
profile/Collage_20250411_085050.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 655 KiB |
23
profile/Makefile
Normal file
23
profile/Makefile
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Makefile for word frequency analysis program
|
||||||
|
|
||||||
|
CC = icx
|
||||||
|
CFLAGS = -Ofast -pg
|
||||||
|
TARGET = prog
|
||||||
|
SOURCES = prog.c options.c
|
||||||
|
|
||||||
|
all: $(TARGET)
|
||||||
|
|
||||||
|
$(TARGET): $(SOURCES)
|
||||||
|
$(CC) $(CFLAGS) $(SOURCES) -o $(TARGET)
|
||||||
|
|
||||||
|
run: $(TARGET)
|
||||||
|
./$(TARGET) -file shakespeare.txt
|
||||||
|
|
||||||
|
profile: $(TARGET)
|
||||||
|
./$(TARGET) -file shakespeare.txt
|
||||||
|
gprof $(TARGET)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(TARGET) gmon.out
|
||||||
|
|
||||||
|
.PHONY: all run profile clean
|
||||||
6
profile/Readme.txt
Normal file
6
profile/Readme.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
gcc -Og -pg prog.c options.c -o prog
|
||||||
|
|
||||||
|
./prog -file shakespeare.txt
|
||||||
|
|
||||||
|
|
||||||
|
gprof prog
|
||||||
38
profile/compile_commands.json
Normal file
38
profile/compile_commands.json
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"arguments": [
|
||||||
|
"/usr/lib/ccache/bin/gcc",
|
||||||
|
"-c",
|
||||||
|
"-Og",
|
||||||
|
"-pg",
|
||||||
|
"-I",
|
||||||
|
"/usr/share/verilator/include",
|
||||||
|
"-I",
|
||||||
|
"/usr/share/verilator/include",
|
||||||
|
"-o",
|
||||||
|
"prog",
|
||||||
|
"prog.c"
|
||||||
|
],
|
||||||
|
"directory": "/home/gh0s7/project/csapp2025/profile",
|
||||||
|
"file": "/home/gh0s7/project/csapp2025/profile/prog.c",
|
||||||
|
"output": "/home/gh0s7/project/csapp2025/profile/prog"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"arguments": [
|
||||||
|
"/usr/lib/ccache/bin/gcc",
|
||||||
|
"-c",
|
||||||
|
"-Og",
|
||||||
|
"-pg",
|
||||||
|
"-I",
|
||||||
|
"/usr/share/verilator/include",
|
||||||
|
"-I",
|
||||||
|
"/usr/share/verilator/include",
|
||||||
|
"-o",
|
||||||
|
"prog",
|
||||||
|
"options.c"
|
||||||
|
],
|
||||||
|
"directory": "/home/gh0s7/project/csapp2025/profile",
|
||||||
|
"file": "/home/gh0s7/project/csapp2025/profile/options.c",
|
||||||
|
"output": "/home/gh0s7/project/csapp2025/profile/prog"
|
||||||
|
}
|
||||||
|
]
|
||||||
BIN
profile/gmon.out
Normal file
BIN
profile/gmon.out
Normal file
Binary file not shown.
226
profile/options.c
Normal file
226
profile/options.c
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/*
|
||||||
|
* Code to process options from command line arguments.
|
||||||
|
* Option values can be integers,
|
||||||
|
* floats, or strings. Allow prefix of option name, as long as
|
||||||
|
* unambiguous. Also support printing of usage information.
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "options.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {INT_OPTION, DOUBLE_OPTION, STRING_OPTION} option_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *name;
|
||||||
|
option_t type;
|
||||||
|
union {
|
||||||
|
int *i;
|
||||||
|
double *d;
|
||||||
|
char **s;
|
||||||
|
} valp;
|
||||||
|
} option_entry;
|
||||||
|
|
||||||
|
#define MAX_OPTION 100
|
||||||
|
static option_entry options[MAX_OPTION];
|
||||||
|
|
||||||
|
static int option_count = 0;
|
||||||
|
|
||||||
|
/* Determine length of string match */
|
||||||
|
static int match_length(char *s, char *t)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
while (*s == *t) {
|
||||||
|
result ++;
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
s++; t++;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void usage(char *prog)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
fprintf(stderr, "Usage: %s", prog);
|
||||||
|
for (j = 0; j < option_count; j++) {
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%d)]", options[j].name, *(options[j].valp.i));
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%.2f)]", options[j].name, *(options[j].valp.d));
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%s)]", options[j].name, *(options[j].valp.s));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine which option is best match. */
|
||||||
|
static int find_option(char *prog, char *name)
|
||||||
|
{
|
||||||
|
int sofar = -1;
|
||||||
|
int sofar_length = 0;
|
||||||
|
int i;
|
||||||
|
int ambiguous = 0;
|
||||||
|
for (i = 0; i < option_count; i++) {
|
||||||
|
int length = match_length(options[i].name, name);
|
||||||
|
if (length > sofar_length) {
|
||||||
|
sofar = i;
|
||||||
|
sofar_length = length;
|
||||||
|
ambiguous = 0;
|
||||||
|
} else if (length > 0 && length == sofar_length) {
|
||||||
|
ambiguous = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sofar_length == 0) {
|
||||||
|
fprintf(stderr, "No match found to option '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
} else if (ambiguous) {
|
||||||
|
fprintf(stderr, "Ambiguous option: '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
return sofar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void add_int_option(char *name, int *var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = INT_OPTION;
|
||||||
|
options[option_count].valp.i = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_double_option(char *name, double *var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = DOUBLE_OPTION;
|
||||||
|
options[option_count].valp.d = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_string_option(char *name, char **var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = STRING_OPTION;
|
||||||
|
options[option_count].valp.s = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int parse_options(int argc, char *argv[], char *otherargs[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int ocount = 0;
|
||||||
|
float f;
|
||||||
|
char *prog = argv[0];
|
||||||
|
for (i = 1; i < argc; i++) {
|
||||||
|
/* Look for options */
|
||||||
|
if (*argv[i] != '-') {
|
||||||
|
/* Must be another class of argument */
|
||||||
|
if (otherargs)
|
||||||
|
otherargs[ocount] = argv[i];
|
||||||
|
ocount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
j = find_option(prog, argv[i]+1);
|
||||||
|
i++; /* Move to next argument */
|
||||||
|
if (i >= argc) {
|
||||||
|
fprintf(stderr, "Missing value for option %s\n", options[j].name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
if (sscanf(argv[i], "%d", options[j].valp.i) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as integer\n", argv[i]);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
if (sscanf(argv[i], "%f", &f) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as double\n", argv[i]);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
*options[j].valp.d = f;
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
*(options[j].valp.s) = argv[i];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr,
|
||||||
|
"Internal error. Don't know option type %d\n", options[j].type);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ocount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *strsave(char *s)
|
||||||
|
{
|
||||||
|
char *result = (char *) malloc(strlen(s)+1);
|
||||||
|
strcpy (result, s);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_option_file(char *prog, FILE *option_file)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
float f;
|
||||||
|
char name[50], val[50];
|
||||||
|
while (fscanf(option_file, "%s %s", name, val) == 2) {
|
||||||
|
if (name[0] != '-') {
|
||||||
|
fprintf(stderr, "Need '-' before option '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
j = find_option(prog, name+1);
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
if (sscanf(val, "%d", options[j].valp.i) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as integer\n", val);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
if (sscanf(val, "%f", &f) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as double\n", val);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
*options[j].valp.d = f;
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
*(options[j].valp.s) = strsave(val);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr,
|
||||||
|
"Internal error. Don't know option type %d\n", options[j].type);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void show_options(FILE *outfile)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < option_count; i++) {
|
||||||
|
switch(options[i].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
fprintf(outfile, "%s\t%d\n", options[i].name, *(options[i].valp.i));
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
fprintf(outfile, "%s\t%f\n", options[i].name, *(options[i].valp.d));
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
if (*options[i].valp.s)
|
||||||
|
fprintf(outfile, "%s\t%s\n", options[i].name, *(options[i].valp.s));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
28
profile/options.h
Normal file
28
profile/options.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* Code to process options from
|
||||||
|
* command line arguments. Arguments can be integers,
|
||||||
|
* floats, or strings. Allow prefix of argument name, as long as
|
||||||
|
* unambigous. Also support printing of usage information.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
void add_int_option(char *name, int *var);
|
||||||
|
void add_double_option(char *name, double *var);
|
||||||
|
void add_string_option(char *name, char **var);
|
||||||
|
|
||||||
|
/* Print usage information and exit */
|
||||||
|
void usage(char *prog);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse option from arguments. Print error message & exit if any problems
|
||||||
|
* If otherargs nonnull, fill it with any nonoption arguments.
|
||||||
|
* Return number of such arguments.
|
||||||
|
*/
|
||||||
|
int parse_options(int argc, char *argv[], char *otherargs[]);
|
||||||
|
|
||||||
|
/* Parse options from file */
|
||||||
|
void parse_option_file(char *prog, FILE *option_file);
|
||||||
|
|
||||||
|
/* Show which options are in effect */
|
||||||
|
void show_options(FILE *outfile);
|
||||||
BIN
profile/prog
Executable file
BIN
profile/prog
Executable file
Binary file not shown.
435
profile/prog.c
Normal file
435
profile/prog.c
Normal file
@ -0,0 +1,435 @@
|
|||||||
|
/* Example of Program for Profiling */
|
||||||
|
/* Create a dictionary of strings */
|
||||||
|
|
||||||
|
#include "options.h"
|
||||||
|
#include "string.h"
|
||||||
|
#include <omp.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#define USESTRLEN 1
|
||||||
|
size_t Strlen(const char *s);
|
||||||
|
|
||||||
|
/* Some statistics */
|
||||||
|
int wcnt = 0; /* Number of words */
|
||||||
|
int ucnt = 0; /* Number of unique words */
|
||||||
|
int mcnt = 0; /* Count of Most frequent word */
|
||||||
|
int scnt = 0; /* Count of number of singletons */
|
||||||
|
char *mstring = ""; /* Most frequent word */
|
||||||
|
int llen = 0; /* Length of the longest word */
|
||||||
|
char *lstring = ""; /* A longest string */
|
||||||
|
int lcnt = 0; /* Number of words having maximum length */
|
||||||
|
|
||||||
|
/* Use function pointers to keep track of which options we are using */
|
||||||
|
typedef void (*lower_fun_t)(char *s);
|
||||||
|
|
||||||
|
/* Lower case conversion routines */
|
||||||
|
|
||||||
|
/* Convert string to lower case: slow */
|
||||||
|
void lower1(char *s) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < Strlen(s); i++)
|
||||||
|
if (s[i] >= 'A' && s[i] <= 'Z')
|
||||||
|
s[i] -= ('A' - 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert string to lower case: faster */
|
||||||
|
void lower2(char *s) {
|
||||||
|
int i;
|
||||||
|
int len = Strlen(s);
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
if (s[i] >= 'A' && s[i] <= 'Z')
|
||||||
|
s[i] -= ('A' - 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set of lower case functions */
|
||||||
|
#define LCNT 2
|
||||||
|
lower_fun_t lower_fun_set[LCNT] = {lower1, lower2};
|
||||||
|
char *lower_fun_names[LCNT] = {"lower1", "lower2"};
|
||||||
|
|
||||||
|
/* Implementation of library function strlen */
|
||||||
|
/* Compute length of string */
|
||||||
|
size_t Strlen(const char *s) {
|
||||||
|
#ifdef USESTRLEN
|
||||||
|
return strlen(s);
|
||||||
|
#else
|
||||||
|
int length = 0;
|
||||||
|
while (*s != '\0') {
|
||||||
|
s++;
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The hash table */
|
||||||
|
|
||||||
|
typedef struct HELE {
|
||||||
|
char *word;
|
||||||
|
int freq;
|
||||||
|
struct HELE *next;
|
||||||
|
} h_rec, *h_ptr;
|
||||||
|
|
||||||
|
/* The hash table */
|
||||||
|
h_ptr *htable;
|
||||||
|
int tsize;
|
||||||
|
|
||||||
|
static void new_table(int size) {
|
||||||
|
tsize = size;
|
||||||
|
htable = (h_ptr *)calloc(size, sizeof(h_ptr));
|
||||||
|
if (!htable) {
|
||||||
|
fprintf(stderr, "Couldn't allocate hash array, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static h_ptr new_ele(char *s) {
|
||||||
|
h_ptr result = (h_ptr)malloc(sizeof(h_rec));
|
||||||
|
int wlen = Strlen(s);
|
||||||
|
if (wlen > llen) {
|
||||||
|
lstring = s;
|
||||||
|
llen = wlen;
|
||||||
|
lcnt = 1;
|
||||||
|
} else if (wlen == llen)
|
||||||
|
lcnt++;
|
||||||
|
if (!result) {
|
||||||
|
fprintf(stderr, "Couldn't allocate hash element, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
result->word = s;
|
||||||
|
result->freq = 1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some hash functions */
|
||||||
|
|
||||||
|
/* Division hashing */
|
||||||
|
|
||||||
|
typedef unsigned (*hash_fun_t)(char *s);
|
||||||
|
|
||||||
|
unsigned h_mod(char *s) {
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val = (val * 128 + c) % tsize;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Simply add characters together */
|
||||||
|
unsigned h_add(char *s) {
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val += c;
|
||||||
|
return val % tsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Combine with Xors */
|
||||||
|
unsigned h_xor(char *s) {
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val = ((val ^ c) << 4) | ((val >> 28) & 0xF);
|
||||||
|
return val % tsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define HCNT 3
|
||||||
|
hash_fun_t hash_fun_set[HCNT] = {h_mod, h_add, h_xor};
|
||||||
|
char *hash_fun_names[HCNT] = {"h_mod", "h_add", "h_xor"};
|
||||||
|
|
||||||
|
char *save_string(char *s) {
|
||||||
|
char *result = (char *)malloc(Strlen(s) + 1);
|
||||||
|
if (!result) {
|
||||||
|
fprintf(stderr, "Couldn't allocate space for string, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
strcpy(result, s);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recursively find string in list. Add to end if not found */
|
||||||
|
h_ptr find_ele_rec(h_ptr ls, char *s) {
|
||||||
|
if (!ls) {
|
||||||
|
/* Come to end of list. Insert this one */
|
||||||
|
ucnt++;
|
||||||
|
return new_ele(save_string(s));
|
||||||
|
}
|
||||||
|
if (strcmp(s, ls->word) == 0) {
|
||||||
|
ls->freq++;
|
||||||
|
if (ls->freq > mcnt) {
|
||||||
|
mcnt = ls->freq;
|
||||||
|
mstring = ls->word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
ls->next = find_ele_rec(ls->next, s);
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iteratively find string in list. Add to front if not found */
|
||||||
|
h_ptr find_ele_iter_f(h_ptr ls, char *s) {
|
||||||
|
h_ptr ele = ls;
|
||||||
|
for (ele = ls; ele; ele = ele->next) {
|
||||||
|
char *word = ele->word;
|
||||||
|
if (strcmp(s, word) == 0) {
|
||||||
|
int freq = ++ele->freq;
|
||||||
|
if (freq > mcnt) {
|
||||||
|
mcnt = freq;
|
||||||
|
mstring = word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ele = new_ele(save_string(s));
|
||||||
|
ucnt++;
|
||||||
|
ele->next = ls;
|
||||||
|
return ele;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iteratively find string in list. Add to end if not found */
|
||||||
|
h_ptr find_ele_iter_r(h_ptr ls, char *s) {
|
||||||
|
h_ptr ele = ls;
|
||||||
|
h_ptr last = NULL;
|
||||||
|
#pragma omp parallel shared(ls, s, last)
|
||||||
|
for (ele = ls; ele; ele = ele->next) {
|
||||||
|
char *word = ele->word;
|
||||||
|
if (strcmp(s, word) == 0) {
|
||||||
|
int freq = ++ele->freq;
|
||||||
|
if (freq > mcnt) {
|
||||||
|
mcnt = freq;
|
||||||
|
mstring = word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
last = ele;
|
||||||
|
}
|
||||||
|
ele = new_ele(save_string(s));
|
||||||
|
ucnt++;
|
||||||
|
ele->next = NULL;
|
||||||
|
if (last) {
|
||||||
|
last->next = ele;
|
||||||
|
return ls;
|
||||||
|
} else
|
||||||
|
return ele;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef h_ptr (*find_ele_fun_t)(h_ptr, char *);
|
||||||
|
|
||||||
|
#define FCNT 3
|
||||||
|
find_ele_fun_t find_ele_fun_set[FCNT] = {find_ele_iter_r, find_ele_iter_f,
|
||||||
|
find_ele_rec};
|
||||||
|
char *find_ele_fun_names[FCNT] = {"find_ele_iter_r", "find_ele_iter_f",
|
||||||
|
"find_ele_rec"};
|
||||||
|
|
||||||
|
/* Comparision function for sorting */
|
||||||
|
int compare_ele(const void *vele1, const void *vele2) {
|
||||||
|
h_ptr ele1 = *(h_ptr *)vele1;
|
||||||
|
h_ptr ele2 = *(h_ptr *)vele2;
|
||||||
|
return ele2->freq - ele1->freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort hash table elements by frequency */
|
||||||
|
h_ptr sort_words(int quick) {
|
||||||
|
h_ptr ls = NULL;
|
||||||
|
h_ptr ele;
|
||||||
|
h_ptr *array = calloc(ucnt, sizeof(h_ptr));
|
||||||
|
int i, j;
|
||||||
|
int cnt = 0;
|
||||||
|
scnt = 0; /* Count singletons */
|
||||||
|
for (i = 0; i < tsize; i++)
|
||||||
|
for (ele = htable[i]; ele; ele = ele->next) {
|
||||||
|
if (ele->freq == 1)
|
||||||
|
scnt++;
|
||||||
|
if (quick)
|
||||||
|
array[cnt] = ele;
|
||||||
|
else {
|
||||||
|
for (j = cnt; j > 0 && ele->freq > array[j - 1]->freq; j--)
|
||||||
|
array[j] = array[j - 1];
|
||||||
|
array[j] = ele;
|
||||||
|
}
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
if (quick) {
|
||||||
|
qsort((void *)array, cnt, sizeof(h_ptr), compare_ele);
|
||||||
|
}
|
||||||
|
ls = array[0];
|
||||||
|
for (j = 0; j < cnt - 1; j++)
|
||||||
|
array[j]->next = array[j + 1];
|
||||||
|
array[cnt - 1]->next = NULL;
|
||||||
|
free((void *)array);
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
void insert_string(char *s, hash_fun_t hash_fun, lower_fun_t lower_fun,
|
||||||
|
find_ele_fun_t find_ele_fun) {
|
||||||
|
int index;
|
||||||
|
lower_fun(s);
|
||||||
|
index = hash_fun(s);
|
||||||
|
htable[index] = find_ele_fun(htable[index], s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract word from file */
|
||||||
|
#define BSIZE 1024
|
||||||
|
char buf[BSIZE];
|
||||||
|
int bufvalid = 0;
|
||||||
|
FILE *infile;
|
||||||
|
|
||||||
|
void init_token(FILE *in) {
|
||||||
|
bufvalid = 0;
|
||||||
|
infile = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Added some non-ASCII characters encountered in European parliament corpus */
|
||||||
|
static char *skipchar = " \t\n\r.,:;/<>()[]{}?!\"-'\0xc2\0xa0";
|
||||||
|
|
||||||
|
/* Keep getting tokens. Return NULL when no more */
|
||||||
|
char *get_word() {
|
||||||
|
char *s = NULL;
|
||||||
|
while (1) {
|
||||||
|
if (bufvalid) {
|
||||||
|
s = strtok(NULL, skipchar);
|
||||||
|
if (s)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!fgets(buf, BSIZE, infile))
|
||||||
|
return NULL;
|
||||||
|
bufvalid = 1;
|
||||||
|
s = strtok(buf, skipchar);
|
||||||
|
if (s)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
wcnt++;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAXNG 10
|
||||||
|
|
||||||
|
char *get_token(int ngram) {
|
||||||
|
/* Buffer of last ngram-1 tokens */
|
||||||
|
static char token_buf[MAXNG][BSIZE];
|
||||||
|
static int first = 1;
|
||||||
|
static int bindex = 0; /* In which buffer to insert next token */
|
||||||
|
static char sbuf[BSIZE];
|
||||||
|
char *nextpos = sbuf;
|
||||||
|
int i;
|
||||||
|
int index;
|
||||||
|
|
||||||
|
if (ngram == 1)
|
||||||
|
return get_word();
|
||||||
|
if (first) {
|
||||||
|
/* Get ngram-1 tokens */
|
||||||
|
while (bindex < ngram - 1) {
|
||||||
|
char *word = get_word();
|
||||||
|
if (!word) {
|
||||||
|
return NULL; /* Document doesn't have enough tokens */
|
||||||
|
}
|
||||||
|
strcpy(token_buf[bindex++], word);
|
||||||
|
}
|
||||||
|
first = 0;
|
||||||
|
}
|
||||||
|
/* Get new token */
|
||||||
|
char *word = get_word();
|
||||||
|
if (!word) {
|
||||||
|
return NULL; /* No more ngrams */
|
||||||
|
}
|
||||||
|
strcpy(token_buf[bindex++], word);
|
||||||
|
if (bindex >= MAXNG)
|
||||||
|
bindex = 0;
|
||||||
|
/* Generate string of last ngram-1 tokens */
|
||||||
|
index = bindex - ngram;
|
||||||
|
if (index < 0)
|
||||||
|
index += MAXNG;
|
||||||
|
for (i = 0; i < ngram; i++) {
|
||||||
|
if (i != 0)
|
||||||
|
*nextpos++ = ' ';
|
||||||
|
word = token_buf[index];
|
||||||
|
strcpy(nextpos, word);
|
||||||
|
nextpos += Strlen(word);
|
||||||
|
index++;
|
||||||
|
if (index >= MAXNG)
|
||||||
|
index = 0;
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
|
printf("Next n-gram = '%s'\n", sbuf);
|
||||||
|
#endif
|
||||||
|
return sbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find statistics of word frequency in document */
|
||||||
|
void word_freq(FILE *src, int verbose, int ngram, int size, int quick,
|
||||||
|
hash_fun_t hash_fun, lower_fun_t lower_fun,
|
||||||
|
find_ele_fun_t find_ele_fun) {
|
||||||
|
char *s;
|
||||||
|
h_ptr ls;
|
||||||
|
|
||||||
|
init_token(src);
|
||||||
|
new_table(size);
|
||||||
|
|
||||||
|
while ((s = get_token(ngram))) {
|
||||||
|
insert_string(s, hash_fun, lower_fun, find_ele_fun);
|
||||||
|
}
|
||||||
|
if (verbose > 0) {
|
||||||
|
ls = sort_words(quick);
|
||||||
|
while (ls && verbose--) {
|
||||||
|
printf("%d\t'%s'\n", ls->freq, ls->word);
|
||||||
|
ls = ls->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("%d n-grams, %d unique, %d singletons. Most common (%d) = '%s'. "
|
||||||
|
"Longest (%d have length %d) = '%s'\n",
|
||||||
|
wcnt, ucnt, scnt, mcnt, mstring, lcnt, llen, lstring);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
int verbose = 1;
|
||||||
|
int size = 1024;
|
||||||
|
int hash_fun_index = 0;
|
||||||
|
int lower_fun_index = 0;
|
||||||
|
int find_fun_index = 0;
|
||||||
|
int ngram = 1;
|
||||||
|
int quick = 0;
|
||||||
|
char *fname = NULL;
|
||||||
|
FILE *infile = stdin;
|
||||||
|
add_int_option("verbose", &verbose);
|
||||||
|
add_int_option("size", &size);
|
||||||
|
add_int_option("hash", &hash_fun_index);
|
||||||
|
add_int_option("lower", &lower_fun_index);
|
||||||
|
add_int_option("find", &find_fun_index);
|
||||||
|
add_int_option("ngram", &ngram);
|
||||||
|
add_int_option("quicksort", &quick);
|
||||||
|
add_string_option("file", &fname);
|
||||||
|
parse_options(argc, argv, NULL);
|
||||||
|
show_options(stdout);
|
||||||
|
printf("N-gram size %d\n", ngram);
|
||||||
|
printf("Lower case function %s\n", lower_fun_names[lower_fun_index]);
|
||||||
|
printf("Hash function %s\n", hash_fun_names[hash_fun_index]);
|
||||||
|
printf("Find element function %s\n", find_ele_fun_names[find_fun_index]);
|
||||||
|
if ((unsigned)hash_fun_index >= HCNT) {
|
||||||
|
fprintf(stderr, "Invalid hash function index %d\n", hash_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if ((unsigned)lower_fun_index >= LCNT) {
|
||||||
|
fprintf(stderr, "Invalid lower function index %d\n", lower_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if ((unsigned)find_fun_index >= FCNT) {
|
||||||
|
fprintf(stderr, "Invalid find function index %d\n", find_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fname) {
|
||||||
|
infile = fopen(fname, "r");
|
||||||
|
if (!infile) {
|
||||||
|
fprintf(stderr, "Couldn't open file '%s'\n", fname);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
word_freq(infile, verbose, ngram, size, quick, hash_fun_set[hash_fun_index],
|
||||||
|
lower_fun_set[lower_fun_index], find_ele_fun_set[find_fun_index]);
|
||||||
|
printf("Total time = %f seconds\n", (double)clock() / CLOCKS_PER_SEC);
|
||||||
|
fclose(infile);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
172948
profile/shakespeare.txt
Normal file
172948
profile/shakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
resources/profile.tgz
Normal file
BIN
resources/profile.tgz
Normal file
Binary file not shown.
6
resources/profile/Readme.txt
Normal file
6
resources/profile/Readme.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
gcc -Og -pg prog.c options.c -o prog
|
||||||
|
|
||||||
|
./prog -file shakespeare.txt
|
||||||
|
|
||||||
|
|
||||||
|
gprof prog
|
||||||
226
resources/profile/options.c
Normal file
226
resources/profile/options.c
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/*
|
||||||
|
* Code to process options from command line arguments.
|
||||||
|
* Option values can be integers,
|
||||||
|
* floats, or strings. Allow prefix of option name, as long as
|
||||||
|
* unambiguous. Also support printing of usage information.
|
||||||
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "options.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum {INT_OPTION, DOUBLE_OPTION, STRING_OPTION} option_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *name;
|
||||||
|
option_t type;
|
||||||
|
union {
|
||||||
|
int *i;
|
||||||
|
double *d;
|
||||||
|
char **s;
|
||||||
|
} valp;
|
||||||
|
} option_entry;
|
||||||
|
|
||||||
|
#define MAX_OPTION 100
|
||||||
|
static option_entry options[MAX_OPTION];
|
||||||
|
|
||||||
|
static int option_count = 0;
|
||||||
|
|
||||||
|
/* Determine length of string match */
|
||||||
|
static int match_length(char *s, char *t)
|
||||||
|
{
|
||||||
|
int result = 0;
|
||||||
|
while (*s == *t) {
|
||||||
|
result ++;
|
||||||
|
if (*s == '\0')
|
||||||
|
break;
|
||||||
|
s++; t++;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void usage(char *prog)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
fprintf(stderr, "Usage: %s", prog);
|
||||||
|
for (j = 0; j < option_count; j++) {
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%d)]", options[j].name, *(options[j].valp.i));
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%.2f)]", options[j].name, *(options[j].valp.d));
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
fprintf(stderr, " [-%s (%s)]", options[j].name, *(options[j].valp.s));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine which option is best match. */
|
||||||
|
static int find_option(char *prog, char *name)
|
||||||
|
{
|
||||||
|
int sofar = -1;
|
||||||
|
int sofar_length = 0;
|
||||||
|
int i;
|
||||||
|
int ambiguous = 0;
|
||||||
|
for (i = 0; i < option_count; i++) {
|
||||||
|
int length = match_length(options[i].name, name);
|
||||||
|
if (length > sofar_length) {
|
||||||
|
sofar = i;
|
||||||
|
sofar_length = length;
|
||||||
|
ambiguous = 0;
|
||||||
|
} else if (length > 0 && length == sofar_length) {
|
||||||
|
ambiguous = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sofar_length == 0) {
|
||||||
|
fprintf(stderr, "No match found to option '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
} else if (ambiguous) {
|
||||||
|
fprintf(stderr, "Ambiguous option: '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
return sofar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void add_int_option(char *name, int *var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = INT_OPTION;
|
||||||
|
options[option_count].valp.i = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_double_option(char *name, double *var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = DOUBLE_OPTION;
|
||||||
|
options[option_count].valp.d = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_string_option(char *name, char **var)
|
||||||
|
{
|
||||||
|
options[option_count].name = name;
|
||||||
|
options[option_count].type = STRING_OPTION;
|
||||||
|
options[option_count].valp.s = var;
|
||||||
|
option_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int parse_options(int argc, char *argv[], char *otherargs[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
int ocount = 0;
|
||||||
|
float f;
|
||||||
|
char *prog = argv[0];
|
||||||
|
for (i = 1; i < argc; i++) {
|
||||||
|
/* Look for options */
|
||||||
|
if (*argv[i] != '-') {
|
||||||
|
/* Must be another class of argument */
|
||||||
|
if (otherargs)
|
||||||
|
otherargs[ocount] = argv[i];
|
||||||
|
ocount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
j = find_option(prog, argv[i]+1);
|
||||||
|
i++; /* Move to next argument */
|
||||||
|
if (i >= argc) {
|
||||||
|
fprintf(stderr, "Missing value for option %s\n", options[j].name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
if (sscanf(argv[i], "%d", options[j].valp.i) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as integer\n", argv[i]);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
if (sscanf(argv[i], "%f", &f) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as double\n", argv[i]);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
*options[j].valp.d = f;
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
*(options[j].valp.s) = argv[i];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr,
|
||||||
|
"Internal error. Don't know option type %d\n", options[j].type);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ocount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *strsave(char *s)
|
||||||
|
{
|
||||||
|
char *result = (char *) malloc(strlen(s)+1);
|
||||||
|
strcpy (result, s);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_option_file(char *prog, FILE *option_file)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
float f;
|
||||||
|
char name[50], val[50];
|
||||||
|
while (fscanf(option_file, "%s %s", name, val) == 2) {
|
||||||
|
if (name[0] != '-') {
|
||||||
|
fprintf(stderr, "Need '-' before option '%s'\n", name);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
j = find_option(prog, name+1);
|
||||||
|
switch(options[j].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
if (sscanf(val, "%d", options[j].valp.i) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as integer\n", val);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
if (sscanf(val, "%f", &f) != 1) {
|
||||||
|
fprintf(stderr, "Can't parse argument '%s' as double\n", val);
|
||||||
|
usage(prog);
|
||||||
|
}
|
||||||
|
*options[j].valp.d = f;
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
*(options[j].valp.s) = strsave(val);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr,
|
||||||
|
"Internal error. Don't know option type %d\n", options[j].type);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void show_options(FILE *outfile)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < option_count; i++) {
|
||||||
|
switch(options[i].type) {
|
||||||
|
case INT_OPTION:
|
||||||
|
fprintf(outfile, "%s\t%d\n", options[i].name, *(options[i].valp.i));
|
||||||
|
break;
|
||||||
|
case DOUBLE_OPTION:
|
||||||
|
fprintf(outfile, "%s\t%f\n", options[i].name, *(options[i].valp.d));
|
||||||
|
break;
|
||||||
|
case STRING_OPTION:
|
||||||
|
if (*options[i].valp.s)
|
||||||
|
fprintf(outfile, "%s\t%s\n", options[i].name, *(options[i].valp.s));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
29
resources/profile/options.h
Normal file
29
resources/profile/options.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* Code to process options from
|
||||||
|
* command line arguments. Arguments can be integers,
|
||||||
|
* floats, or strings. Allow prefix of argument name, as long as
|
||||||
|
* unambigous. Also support printing of usage information.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
void add_int_option(char *name, int *var);
|
||||||
|
void add_double_option(char *name, double *var);
|
||||||
|
void add_string_option(char *name, char **var);
|
||||||
|
|
||||||
|
/* Print usage information and exit */
|
||||||
|
void usage(char *prog);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse option from arguments. Print error message & exit if any problems
|
||||||
|
* If otherargs nonnull, fill it with any nonoption arguments.
|
||||||
|
* Return number of such arguments.
|
||||||
|
*/
|
||||||
|
int parse_options(int argc, char *argv[], char *otherargs[]);
|
||||||
|
|
||||||
|
/* Parse options from file */
|
||||||
|
void parse_option_file(char *prog, FILE *option_file);
|
||||||
|
|
||||||
|
/* Show which options are in effect */
|
||||||
|
void show_options(FILE *outfile);
|
||||||
|
|
||||||
|
|
||||||
458
resources/profile/prog.c
Normal file
458
resources/profile/prog.c
Normal file
@ -0,0 +1,458 @@
|
|||||||
|
/* Example of Program for Profiling */
|
||||||
|
/* Create a dictionary of strings */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "options.h"
|
||||||
|
#include "string.h"
|
||||||
|
|
||||||
|
#define USESTRLEN 1
|
||||||
|
size_t Strlen(const char *s);
|
||||||
|
|
||||||
|
/* Some statistics */
|
||||||
|
int wcnt = 0; /* Number of words */
|
||||||
|
int ucnt = 0; /* Number of unique words */
|
||||||
|
int mcnt = 0; /* Count of Most frequent word */
|
||||||
|
int scnt = 0; /* Count of number of singletons */
|
||||||
|
char *mstring = ""; /* Most frequent word */
|
||||||
|
int llen = 0; /* Length of the longest word */
|
||||||
|
char *lstring = ""; /* A longest string */
|
||||||
|
int lcnt = 0; /* Number of words having maximum length */
|
||||||
|
|
||||||
|
/* Use function pointers to keep track of which options we are using */
|
||||||
|
typedef void (*lower_fun_t)(char *s);
|
||||||
|
|
||||||
|
/* Lower case conversion routines */
|
||||||
|
|
||||||
|
/* Convert string to lower case: slow */
|
||||||
|
void lower1(char *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < Strlen(s); i++)
|
||||||
|
if (s[i] >= 'A' && s[i] <= 'Z')
|
||||||
|
s[i] -= ('A' - 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert string to lower case: faster */
|
||||||
|
void lower2(char *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int len = Strlen(s);
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
if (s[i] >= 'A' && s[i] <= 'Z')
|
||||||
|
s[i] -= ('A' - 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set of lower case functions */
|
||||||
|
#define LCNT 2
|
||||||
|
lower_fun_t lower_fun_set[LCNT] = {lower1, lower2};
|
||||||
|
char *lower_fun_names[LCNT] = {"lower1", "lower2"};
|
||||||
|
|
||||||
|
/* Implementation of library function strlen */
|
||||||
|
/* Compute length of string */
|
||||||
|
size_t Strlen(const char *s)
|
||||||
|
{
|
||||||
|
#ifdef USESTRLEN
|
||||||
|
return strlen(s);
|
||||||
|
#else
|
||||||
|
int length = 0;
|
||||||
|
while (*s != '\0') {
|
||||||
|
s++;
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The hash table */
|
||||||
|
|
||||||
|
typedef struct HELE {
|
||||||
|
char *word;
|
||||||
|
int freq;
|
||||||
|
struct HELE *next;
|
||||||
|
} h_rec, *h_ptr;
|
||||||
|
|
||||||
|
/* The hash table */
|
||||||
|
h_ptr *htable;
|
||||||
|
int tsize;
|
||||||
|
|
||||||
|
static void new_table(int size)
|
||||||
|
{
|
||||||
|
tsize = size;
|
||||||
|
htable = (h_ptr *) calloc(size, sizeof(h_ptr));
|
||||||
|
if (!htable) {
|
||||||
|
fprintf(stderr, "Couldn't allocate hash array, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static h_ptr new_ele(char *s)
|
||||||
|
{
|
||||||
|
h_ptr result = (h_ptr) malloc(sizeof(h_rec));
|
||||||
|
int wlen = Strlen(s);
|
||||||
|
if (wlen > llen) {
|
||||||
|
lstring = s;
|
||||||
|
llen = wlen;
|
||||||
|
lcnt = 1;
|
||||||
|
} else if (wlen == llen)
|
||||||
|
lcnt++;
|
||||||
|
if (!result) {
|
||||||
|
fprintf(stderr, "Couldn't allocate hash element, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
result->word = s;
|
||||||
|
result->freq = 1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some hash functions */
|
||||||
|
|
||||||
|
/* Division hashing */
|
||||||
|
|
||||||
|
typedef unsigned (*hash_fun_t)(char *s);
|
||||||
|
|
||||||
|
unsigned h_mod(char *s)
|
||||||
|
{
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val = (val * 128 + c) % tsize;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Simply add characters together */
|
||||||
|
unsigned h_add(char *s)
|
||||||
|
{
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val += c;
|
||||||
|
return val % tsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Combine with Xors */
|
||||||
|
unsigned h_xor(char *s)
|
||||||
|
{
|
||||||
|
unsigned val = 0;
|
||||||
|
int c;
|
||||||
|
while ((c = *s++))
|
||||||
|
val = ((val ^ c)<<4) | ((val >> 28) & 0xF);
|
||||||
|
return val % tsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define HCNT 3
|
||||||
|
hash_fun_t hash_fun_set[HCNT] = {h_mod, h_add, h_xor};
|
||||||
|
char *hash_fun_names[HCNT] = {"h_mod", "h_add", "h_xor"};
|
||||||
|
|
||||||
|
char *save_string(char *s)
|
||||||
|
{
|
||||||
|
char *result = (char *) malloc(Strlen(s)+1);
|
||||||
|
if (!result) {
|
||||||
|
fprintf(stderr, "Couldn't allocate space for string, exiting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
strcpy(result,s);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recursively find string in list. Add to end if not found */
|
||||||
|
h_ptr find_ele_rec(h_ptr ls, char *s)
|
||||||
|
{
|
||||||
|
if (!ls) {
|
||||||
|
/* Come to end of list. Insert this one */
|
||||||
|
ucnt++;
|
||||||
|
return new_ele(save_string(s));
|
||||||
|
}
|
||||||
|
if (strcmp(s,ls->word) == 0) {
|
||||||
|
ls->freq++;
|
||||||
|
if (ls->freq > mcnt) {
|
||||||
|
mcnt = ls->freq;
|
||||||
|
mstring = ls->word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
ls->next = find_ele_rec(ls->next, s);
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iteratively find string in list. Add to front if not found */
|
||||||
|
h_ptr find_ele_iter_f(h_ptr ls, char *s)
|
||||||
|
{
|
||||||
|
h_ptr ele = ls;
|
||||||
|
for (ele = ls; ele; ele = ele->next) {
|
||||||
|
char *word = ele->word;
|
||||||
|
if (strcmp(s, word) == 0) {
|
||||||
|
int freq = ++ele->freq;
|
||||||
|
if (freq > mcnt) {
|
||||||
|
mcnt = freq;
|
||||||
|
mstring = word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ele = new_ele(save_string(s));
|
||||||
|
ucnt++;
|
||||||
|
ele->next = ls;
|
||||||
|
return ele;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iteratively find string in list. Add to end if not found */
|
||||||
|
h_ptr find_ele_iter_r(h_ptr ls, char *s)
|
||||||
|
{
|
||||||
|
h_ptr ele = ls;
|
||||||
|
h_ptr last = NULL;
|
||||||
|
for (ele = ls; ele; ele = ele->next) {
|
||||||
|
char *word = ele->word;
|
||||||
|
if (strcmp(s, word) == 0) {
|
||||||
|
int freq = ++ele->freq;
|
||||||
|
if (freq > mcnt) {
|
||||||
|
mcnt = freq;
|
||||||
|
mstring = word;
|
||||||
|
}
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
last = ele;
|
||||||
|
}
|
||||||
|
ele = new_ele(save_string(s));
|
||||||
|
ucnt++;
|
||||||
|
ele->next = NULL;
|
||||||
|
if (last) {
|
||||||
|
last->next = ele;
|
||||||
|
return ls;
|
||||||
|
} else
|
||||||
|
return ele;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef h_ptr (*find_ele_fun_t)(h_ptr, char *);
|
||||||
|
|
||||||
|
#define FCNT 3
|
||||||
|
find_ele_fun_t find_ele_fun_set[FCNT] =
|
||||||
|
{find_ele_rec, find_ele_iter_f, find_ele_iter_r};
|
||||||
|
char *find_ele_fun_names[FCNT] =
|
||||||
|
{"find_ele_rec", "find_ele_iter_f", "find_ele_iter_r"};
|
||||||
|
|
||||||
|
/* Comparision function for sorting */
|
||||||
|
int compare_ele(const void *vele1, const void *vele2) {
|
||||||
|
h_ptr ele1 = *(h_ptr *) vele1;
|
||||||
|
h_ptr ele2 = *(h_ptr *) vele2;
|
||||||
|
return ele2->freq - ele1->freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Sort hash table elements by frequency */
|
||||||
|
h_ptr sort_words(int quick)
|
||||||
|
{
|
||||||
|
h_ptr ls = NULL;
|
||||||
|
h_ptr ele;
|
||||||
|
h_ptr *array = calloc(ucnt, sizeof(h_ptr));
|
||||||
|
int i, j;
|
||||||
|
int cnt = 0;
|
||||||
|
scnt = 0; /* Count singletons */
|
||||||
|
for (i = 0; i < tsize; i++)
|
||||||
|
for (ele = htable[i]; ele; ele = ele->next) {
|
||||||
|
if (ele->freq == 1)
|
||||||
|
scnt++;
|
||||||
|
if (quick)
|
||||||
|
array[cnt] = ele;
|
||||||
|
else {
|
||||||
|
for (j = cnt; j > 0 && ele->freq > array[j-1]->freq; j--)
|
||||||
|
array[j] = array[j-1];
|
||||||
|
array[j] = ele;
|
||||||
|
}
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
if (quick) {
|
||||||
|
qsort((void *) array, cnt, sizeof(h_ptr), compare_ele);
|
||||||
|
}
|
||||||
|
ls = array[0];
|
||||||
|
for (j = 0; j < cnt-1; j++)
|
||||||
|
array[j]->next = array[j+1];
|
||||||
|
array[cnt-1]->next = NULL;
|
||||||
|
free ((void *) array);
|
||||||
|
return ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void insert_string(char *s,
|
||||||
|
hash_fun_t hash_fun, lower_fun_t lower_fun,
|
||||||
|
find_ele_fun_t find_ele_fun)
|
||||||
|
{
|
||||||
|
int index;
|
||||||
|
lower_fun(s);
|
||||||
|
index = hash_fun(s);
|
||||||
|
htable[index] = find_ele_fun(htable[index], s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract word from file */
|
||||||
|
#define BSIZE 1024
|
||||||
|
char buf[BSIZE];
|
||||||
|
int bufvalid = 0;
|
||||||
|
FILE *infile;
|
||||||
|
|
||||||
|
void init_token(FILE *in) {
|
||||||
|
bufvalid = 0;
|
||||||
|
infile = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Added some non-ASCII characters encountered in European parliament corpus */
|
||||||
|
static char *skipchar = " \t\n\r.,:;/<>()[]{}?!\"-'\0xc2\0xa0";
|
||||||
|
|
||||||
|
/* Keep getting tokens. Return NULL when no more */
|
||||||
|
char *get_word()
|
||||||
|
{
|
||||||
|
char *s = NULL;
|
||||||
|
while (1) {
|
||||||
|
if (bufvalid) {
|
||||||
|
s = strtok(NULL, skipchar);
|
||||||
|
if (s)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!fgets(buf, BSIZE, infile))
|
||||||
|
return NULL;
|
||||||
|
bufvalid = 1;
|
||||||
|
s = strtok(buf, skipchar);
|
||||||
|
if (s)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
wcnt++;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define MAXNG 10
|
||||||
|
|
||||||
|
char *get_token(int ngram)
|
||||||
|
{
|
||||||
|
/* Buffer of last ngram-1 tokens */
|
||||||
|
static char token_buf[MAXNG][BSIZE];
|
||||||
|
static int first = 1;
|
||||||
|
static int bindex = 0; /* In which buffer to insert next token */
|
||||||
|
static char sbuf[BSIZE];
|
||||||
|
char *nextpos = sbuf;
|
||||||
|
int i; int index;
|
||||||
|
|
||||||
|
if (ngram == 1)
|
||||||
|
return get_word();
|
||||||
|
if (first) {
|
||||||
|
/* Get ngram-1 tokens */
|
||||||
|
while (bindex < ngram-1) {
|
||||||
|
char *word = get_word();
|
||||||
|
if (!word) {
|
||||||
|
return NULL; /* Document doesn't have enough tokens */
|
||||||
|
}
|
||||||
|
strcpy(token_buf[bindex++], word);
|
||||||
|
}
|
||||||
|
first = 0;
|
||||||
|
}
|
||||||
|
/* Get new token */
|
||||||
|
char *word = get_word();
|
||||||
|
if (!word) {
|
||||||
|
return NULL; /* No more ngrams */
|
||||||
|
}
|
||||||
|
strcpy (token_buf[bindex++], word);
|
||||||
|
if (bindex >= MAXNG)
|
||||||
|
bindex = 0;
|
||||||
|
/* Generate string of last ngram-1 tokens */
|
||||||
|
index = bindex - ngram;
|
||||||
|
if (index < 0)
|
||||||
|
index += MAXNG;
|
||||||
|
for (i = 0; i < ngram; i++) {
|
||||||
|
if (i != 0)
|
||||||
|
*nextpos++ = ' ';
|
||||||
|
word = token_buf[index];
|
||||||
|
strcpy(nextpos, word);
|
||||||
|
nextpos += Strlen(word);
|
||||||
|
index++;
|
||||||
|
if (index >= MAXNG)
|
||||||
|
index = 0;
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
|
printf("Next n-gram = '%s'\n", sbuf);
|
||||||
|
#endif
|
||||||
|
return sbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Find statistics of word frequency in document */
|
||||||
|
void word_freq(FILE *src, int verbose, int ngram, int size, int quick,
|
||||||
|
hash_fun_t hash_fun, lower_fun_t lower_fun,
|
||||||
|
find_ele_fun_t find_ele_fun)
|
||||||
|
{
|
||||||
|
char *s;
|
||||||
|
h_ptr ls;
|
||||||
|
|
||||||
|
init_token(src);
|
||||||
|
new_table(size);
|
||||||
|
|
||||||
|
while ((s = get_token(ngram))) {
|
||||||
|
insert_string(s, hash_fun, lower_fun, find_ele_fun);
|
||||||
|
}
|
||||||
|
if (verbose > 0) {
|
||||||
|
ls = sort_words(quick);
|
||||||
|
while (ls && verbose--) {
|
||||||
|
printf("%d\t'%s'\n", ls->freq, ls->word);
|
||||||
|
ls = ls->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("%d n-grams, %d unique, %d singletons. Most common (%d) = '%s'. Longest (%d have length %d) = '%s'\n",
|
||||||
|
wcnt, ucnt, scnt, mcnt, mstring, lcnt, llen, lstring);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int verbose = 1;
|
||||||
|
int size = 1024;
|
||||||
|
int hash_fun_index = 0;
|
||||||
|
int lower_fun_index = 0;
|
||||||
|
int find_fun_index = 0;
|
||||||
|
int ngram = 1;
|
||||||
|
int quick = 0;
|
||||||
|
char *fname = NULL;
|
||||||
|
FILE *infile = stdin;
|
||||||
|
add_int_option("verbose", &verbose);
|
||||||
|
add_int_option("size", &size);
|
||||||
|
add_int_option("hash", &hash_fun_index);
|
||||||
|
add_int_option("lower", &lower_fun_index);
|
||||||
|
add_int_option("find", &find_fun_index);
|
||||||
|
add_int_option("ngram", &ngram);
|
||||||
|
add_int_option("quicksort", &quick);
|
||||||
|
add_string_option("file", &fname);
|
||||||
|
parse_options(argc, argv, NULL);
|
||||||
|
show_options(stdout);
|
||||||
|
printf("N-gram size %d\n", ngram);
|
||||||
|
printf("Lower case function %s\n", lower_fun_names[lower_fun_index]);
|
||||||
|
printf("Hash function %s\n", hash_fun_names[hash_fun_index]);
|
||||||
|
printf("Find element function %s\n", find_ele_fun_names[find_fun_index]);
|
||||||
|
if ((unsigned) hash_fun_index >= HCNT) {
|
||||||
|
fprintf(stderr, "Invalid hash function index %d\n", hash_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if ((unsigned) lower_fun_index >= LCNT) {
|
||||||
|
fprintf(stderr, "Invalid lower function index %d\n", lower_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if ((unsigned) find_fun_index >= FCNT) {
|
||||||
|
fprintf(stderr, "Invalid find function index %d\n", find_fun_index);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (fname) {
|
||||||
|
infile = fopen(fname, "r");
|
||||||
|
if (!infile) {
|
||||||
|
fprintf(stderr, "Couldn't open file '%s'\n", fname);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
word_freq(infile, verbose, ngram, size, quick,
|
||||||
|
hash_fun_set[hash_fun_index],
|
||||||
|
lower_fun_set[lower_fun_index],
|
||||||
|
find_ele_fun_set[find_fun_index]);
|
||||||
|
printf("Total time = %f seconds\n", (double) clock() / CLOCKS_PER_SEC);
|
||||||
|
fclose(infile);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
172948
resources/profile/shakespeare.txt
Normal file
172948
resources/profile/shakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
13
y86lab/Y86_instr.coe
Executable file
13
y86lab/Y86_instr.coe
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
MEMORY_INITIALIZATION_RADIX=16;
|
||||||
|
MEMORY_INITIALIZATION_VECTOR=
|
||||||
|
30f001000000
|
||||||
|
30f801000000
|
||||||
|
30f300000000
|
||||||
|
30f264000000
|
||||||
|
6102
|
||||||
|
720a000000
|
||||||
|
30f264000000
|
||||||
|
6003
|
||||||
|
6080
|
||||||
|
7004000000
|
||||||
|
403400000000
|
||||||
268
y86lab/Y86_toy.py
Executable file
268
y86lab/Y86_toy.py
Executable file
@ -0,0 +1,268 @@
|
|||||||
|
#encoding=utf-8
|
||||||
|
# reg file
|
||||||
|
reg = {
|
||||||
|
'rax':0,
|
||||||
|
'rcx':0,
|
||||||
|
'rdx':0,
|
||||||
|
'rbx':0,
|
||||||
|
|
||||||
|
'rsp':0,
|
||||||
|
'rbp':0,
|
||||||
|
'rsi':0,
|
||||||
|
'rdi':0,
|
||||||
|
|
||||||
|
'r8' :0,
|
||||||
|
'r9' :0,
|
||||||
|
'r10':0,
|
||||||
|
'r11':0,
|
||||||
|
|
||||||
|
'r12':0,
|
||||||
|
'r13':0,
|
||||||
|
'r14':0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 读取可执行文件
|
||||||
|
def read_exe_file(path):
|
||||||
|
infile = open(path)
|
||||||
|
file = infile.readlines()[2:]
|
||||||
|
return file
|
||||||
|
|
||||||
|
# 名称转换
|
||||||
|
def get_reg(s):
|
||||||
|
s = s.upper()
|
||||||
|
reg_name = {
|
||||||
|
'0':'rax',
|
||||||
|
'1':'rcx',
|
||||||
|
'2':'rdx',
|
||||||
|
'3':'rbx',
|
||||||
|
|
||||||
|
'4':'rsp',
|
||||||
|
'5':'rbp',
|
||||||
|
'6':'rsi',
|
||||||
|
'7':'rdi',
|
||||||
|
|
||||||
|
'8':'r8' ,
|
||||||
|
'9':'r9' ,
|
||||||
|
'A':'r10',
|
||||||
|
'B':'r11',
|
||||||
|
|
||||||
|
'C':'r12',
|
||||||
|
'D':'r13',
|
||||||
|
'E':'r14',
|
||||||
|
'F':None
|
||||||
|
}
|
||||||
|
return reg_name[s]
|
||||||
|
|
||||||
|
# 小端数据处理
|
||||||
|
def data_reverse(data):
|
||||||
|
return int(''.join([data[i:i+2] for i in range(16,-2,-2)]),16)
|
||||||
|
|
||||||
|
# 标志处理
|
||||||
|
def flags(result):
|
||||||
|
global zf,sf,of
|
||||||
|
if result == 0:
|
||||||
|
zf = 1
|
||||||
|
else:
|
||||||
|
zf = 0
|
||||||
|
if result < 0:
|
||||||
|
sf = 1
|
||||||
|
else:
|
||||||
|
sf = 0
|
||||||
|
if result > 4294967296 or result < -4294967296: #溢出
|
||||||
|
of = 1
|
||||||
|
else:
|
||||||
|
of = 0
|
||||||
|
|
||||||
|
# 标志位
|
||||||
|
zf = 0
|
||||||
|
sf = 0
|
||||||
|
of = 0
|
||||||
|
|
||||||
|
pc = 0
|
||||||
|
stat = 0
|
||||||
|
|
||||||
|
mem = [0 for i in range(101)]
|
||||||
|
reg['rsp'] = 100
|
||||||
|
try:
|
||||||
|
exe_file = read_exe_file("./Y86_instr.coe")
|
||||||
|
if len(exe_file) < 1:
|
||||||
|
exit()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"FileNotFoundError: {"./Y86_instr.coe"} not found, the progarm will use internal exe_file stored in this python code")
|
||||||
|
#exe_file:
|
||||||
|
#0 irmovq $1,%rax 30f001000000
|
||||||
|
#1 irmovq $1,%r8 30f801000000
|
||||||
|
#2 irmovq $0,%rbx 30f300000000
|
||||||
|
#3 irmovq $100,%rdx 30f264000000
|
||||||
|
#loop:
|
||||||
|
#4 subq %rax,%rdx 6102
|
||||||
|
#5 jl then 720a000000
|
||||||
|
#6 irmovq $100,%rdx 30f264000000
|
||||||
|
#7 addq %rax,%rbx 6003
|
||||||
|
#8 addq %r8,%rax 6080
|
||||||
|
#9 jmp loop 7004000000
|
||||||
|
#then:
|
||||||
|
#10 rmmovq %rbx,0(%rsp) 403400000000
|
||||||
|
exe_file = ['30f001000000',
|
||||||
|
'30f801000000',
|
||||||
|
'30f300000000',
|
||||||
|
'30f264000000',
|
||||||
|
'6102',
|
||||||
|
'720a000000',
|
||||||
|
'30f264000000',
|
||||||
|
'6003',
|
||||||
|
'6080',
|
||||||
|
'7004000000',
|
||||||
|
'403400000000',
|
||||||
|
]
|
||||||
|
|
||||||
|
# 主循环
|
||||||
|
def cycle():
|
||||||
|
global pc
|
||||||
|
s = exe_file[pc]
|
||||||
|
ins = s[0]
|
||||||
|
fn = s[1]
|
||||||
|
if ins == '0': # halt
|
||||||
|
print("instr \'halt\' is not allowed to be used by application")
|
||||||
|
return False
|
||||||
|
elif ins == '1': # nop
|
||||||
|
pc += 1
|
||||||
|
elif ins == '2': # mov
|
||||||
|
ra = get_reg(s[2])
|
||||||
|
rb = get_reg(s[3])
|
||||||
|
if fn == '0': # rrmovq
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '1': # cmovle
|
||||||
|
if (sf ^ of) | zf:
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '2': # cmovl
|
||||||
|
if sf ^ of:
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '3': # cmove
|
||||||
|
if zf:
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '4': #cmovne
|
||||||
|
if ~zf:
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '5': #cmovge
|
||||||
|
if ~(sf ^ of):
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
elif fn == '6': #cmovg
|
||||||
|
if ~(sf ^ of) & ~ zf:
|
||||||
|
reg[rb] = reg[ra]
|
||||||
|
pc += 1
|
||||||
|
elif ins == '3': # irmovq
|
||||||
|
ra = None
|
||||||
|
rb = get_reg(s[3])
|
||||||
|
data = data_reverse(s[4:])
|
||||||
|
reg[rb] = data
|
||||||
|
pc += 1
|
||||||
|
elif ins == '4': # rmmovq
|
||||||
|
ra = get_reg(s[2])
|
||||||
|
rb = get_reg(s[3])
|
||||||
|
data = reg[ra]
|
||||||
|
mem[reg[rb]] = data
|
||||||
|
pc += 1
|
||||||
|
elif ins == '5': # mrmovq
|
||||||
|
ra = get_reg(s[2])
|
||||||
|
rb = get_reg(s[3])
|
||||||
|
data = mem[reg[rb]]
|
||||||
|
reg[ra] = data
|
||||||
|
pc += 1
|
||||||
|
elif ins == '6': # OPq
|
||||||
|
ra = get_reg(s[2])
|
||||||
|
rb = get_reg(s[3])
|
||||||
|
if fn == '0':
|
||||||
|
reg[rb] = reg[rb] + reg[ra]
|
||||||
|
elif fn == '1':
|
||||||
|
reg[rb] = reg[rb] - reg[ra]
|
||||||
|
elif fn == '2':
|
||||||
|
reg[rb] = reg[rb] & reg[ra]
|
||||||
|
elif fn == '3':
|
||||||
|
reg[rb] = reg[rb] ^ reg[ra]
|
||||||
|
flags(reg[rb])
|
||||||
|
pc += 1
|
||||||
|
elif ins == '7': #jXX
|
||||||
|
dst = data_reverse(s[2:])
|
||||||
|
if fn =='0': # jmp
|
||||||
|
pc = dst
|
||||||
|
elif fn =='1': #jle
|
||||||
|
if (sf ^ of) | zf:
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif fn =='2': #jl
|
||||||
|
if sf ^ of:
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif fn =='3': #je
|
||||||
|
if zf:
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif fn =='4': #jne
|
||||||
|
if ~zf:
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif fn =='5': #jge
|
||||||
|
if ~(sf ^ of):
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif fn =='6': #jg
|
||||||
|
if ~(sf ^ of) & ~ zf:
|
||||||
|
pc = dst
|
||||||
|
else:
|
||||||
|
pc += 1
|
||||||
|
elif ins == '8': #call
|
||||||
|
dst = data_reverse(s[2:])
|
||||||
|
reg['rsp'] -= 1
|
||||||
|
mem[reg['rsp']] = pc + 1
|
||||||
|
pc = dst
|
||||||
|
elif ins == '9': # ret
|
||||||
|
pc = mem[reg['rsp']]
|
||||||
|
reg['rsp'] += 1
|
||||||
|
elif ins == 'A': # pushq
|
||||||
|
ra = get_reg(s[3])
|
||||||
|
rb = get_reg(s[4])
|
||||||
|
reg['rsp'] -= 1
|
||||||
|
mem[reg['rsp']] = reg[ra]
|
||||||
|
pc += 1
|
||||||
|
elif ins == 'B': # popq
|
||||||
|
ra = get_reg(s[3])
|
||||||
|
rb = get_reg(s[4])
|
||||||
|
reg[ra] = mem[reg['rsp']]
|
||||||
|
reg['rsp'] += 1
|
||||||
|
pc += 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 打印寄存器文件
|
||||||
|
|
||||||
|
def print_reg():
|
||||||
|
print('-'*20+"reg file"+'-'*20)
|
||||||
|
i = 0
|
||||||
|
for reg_name in reg.keys():
|
||||||
|
print("%s:%d"%(reg_name,reg[reg_name]),end='\t')
|
||||||
|
i += 1
|
||||||
|
if i % 4 == 0 or reg_name == 'r14':
|
||||||
|
print()
|
||||||
|
print('-'*20+"end"+'-'*20)
|
||||||
|
def print_mem():
|
||||||
|
print('-'*20+"mem"+'-'*20)
|
||||||
|
print('address:value')
|
||||||
|
j = 0
|
||||||
|
for i in range(len(mem)):
|
||||||
|
print("%d:%d"%(i,mem[i]),end=' ')
|
||||||
|
j += 1
|
||||||
|
if j % 16 == 0:
|
||||||
|
print()
|
||||||
|
print('\n'+'-'*20+"end"+'-'*20)
|
||||||
|
|
||||||
|
# start
|
||||||
|
flag = True
|
||||||
|
while flag and pc < len(exe_file):
|
||||||
|
flag = cycle()
|
||||||
|
print_mem()
|
||||||
|
print_reg()
|
||||||
Reference in New Issue
Block a user