From ba21f80f3b826b04953ffd7158c350fc51ee2b5f Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Sat, 12 Apr 2025 11:37:07 +0800 Subject: [PATCH] matrix fixed --- perflab/matrix/Makefile | 34 +++ perflab/matrix/clock.c | 425 ++++++++++++--------------- perflab/matrix/clock.c.bak | 229 +++++++++++++++ perflab/matrix/clock.o | Bin 0 -> 16992 bytes perflab/matrix/cpe.o | Bin 0 -> 14432 bytes perflab/matrix/fcyc.c | 4 +- perflab/matrix/fcyc.o | Bin 0 -> 13792 bytes perflab/matrix/lsquare.o | Bin 0 -> 13520 bytes perflab/matrix/matrix_test | Bin 0 -> 55496 bytes perflab/matrix/rowcol.c | 102 +++---- perflab/matrix/rowcol.c~ | 162 ++++++++++ perflab/matrix/rowcol.o | Bin 0 -> 6096 bytes perflab/matrix/rowcol.y~ | 240 +++++++++++++++ perflab/matrix/rowcol.z~ | 240 +++++++++++++++ perflab/matrix/rowcol_202302723005.c | 69 +++++ perflab/matrix/rowcol_202302723005.o | Bin 0 -> 6120 bytes perflab/matrix/rowcol_test.c | 195 ++++++------ perflab/matrix/rowcol_test.o | Bin 0 -> 16840 bytes perflab/poly/Makefile | 35 +++ perflab/poly/clock.c | 314 ++++++++++---------- perflab/poly/poly.cu | 325 ++++++++++++++++++++ perflab/poly/poly.o | Bin 0 -> 5872 bytes perflab/poly/poly_test.c | 35 +-- perflab/poly/poly_test.o | Bin 0 -> 30088 bytes 24 files changed, 1840 insertions(+), 569 deletions(-) create mode 100644 perflab/matrix/Makefile create mode 100644 perflab/matrix/clock.c.bak create mode 100644 perflab/matrix/clock.o create mode 100644 perflab/matrix/cpe.o create mode 100644 perflab/matrix/fcyc.o create mode 100644 perflab/matrix/lsquare.o create mode 100644 perflab/matrix/matrix_test create mode 100644 perflab/matrix/rowcol.c~ create mode 100644 perflab/matrix/rowcol.o create mode 100644 perflab/matrix/rowcol.y~ create mode 100644 perflab/matrix/rowcol.z~ create mode 100644 perflab/matrix/rowcol_202302723005.c create mode 100644 perflab/matrix/rowcol_202302723005.o create mode 100644 perflab/matrix/rowcol_test.o create mode 100644 perflab/poly/Makefile create mode 100644 perflab/poly/poly.cu create mode 100644 perflab/poly/poly.o create mode 100644 perflab/poly/poly_test.o diff --git a/perflab/matrix/Makefile b/perflab/matrix/Makefile new file mode 100644 index 0000000..2dc672d --- /dev/null +++ b/perflab/matrix/Makefile @@ -0,0 +1,34 @@ +CC = gcc +CFLAGS = -Wall -O1 -g +#LDFLAGS = -lm -lcudart -lcuda + +# Source files +SRCS = rowcol_test.c clock.c cpe.c fcyc.c lsquare.c rowcol_202302723005.c +#CUDA_SRCS = rowcol.cu +OBJS = $(SRCS:.c=.o) +#rowcol.o + +# Target executable +TARGET = matrix_test + +# Default target +all: $(TARGET) + +# Rule to build the executable +$(TARGET): $(OBJS) + $(CC) $(OBJS) -o $(TARGET) $(LDFLAGS) + +# Rule to build object files +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +# Rule to build CUDA object files +#rowcol.o: rowcol.cu +# $(NVCC) $(CUDA_FLAGS) -c $< -o $@ + +# Clean rule +clean: + rm -f $(OBJS) $(TARGET) + +# Phony targets +.PHONY: all clean diff --git a/perflab/matrix/clock.c b/perflab/matrix/clock.c index a587590..b826af4 100644 --- a/perflab/matrix/clock.c +++ b/perflab/matrix/clock.c @@ -1,229 +1,196 @@ -/* clock.c - * Retrofitted to use thread-specific timers - * and to get clock information from /proc/cpuinfo - * (C) R. E. Bryant, 2010 - * - */ - -/* When this constant is not defined, uses time stamp counter */ -#define USE_POSIX 0 - -/* Choice to use cpu_gettime call or Intel time stamp counter directly */ - -#include -#include -#include -#include -//#include -#include -#include -#include "clock.h" - -/* Use x86 cycle counter */ - -/* Initialize the cycle counter */ -static unsigned cyc_hi = 0; -static unsigned cyc_lo = 0; - -/* Set *hi and *lo to the high and low order bits of the cycle counter. - Implementation requires assembly code to use the rdtsc instruction. */ -void access_counter(unsigned *hi, unsigned *lo) -{ - - long long counter; - - counter = __rdtsc(); - (*hi) = (unsigned int)(counter >> 32); - (*lo) = (unsigned int)counter; -/* - - LARGE_INTEGER lPerformanceCount; - - QueryPerformanceCounter(&lPerformanceCount); - (*hi) = (unsigned int)lPerformanceCount.HighPart; - (*lo) = (unsigned int)lPerformanceCount.LowPart; -// printf("%08X %08X\n",(*hi),(*lo)); -*/ -} - - -/* Record the current value of the cycle counter. */ -void start_counter() -{ - access_counter(&cyc_hi, &cyc_lo); -} - -/* Return the number of cycles since the last call to start_counter. */ -double get_counter() -{ - unsigned ncyc_hi, ncyc_lo; - unsigned hi, lo, borrow; - double result; - - /* Get cycle counter */ - access_counter(&ncyc_hi, &ncyc_lo); - - /* Do double precision subtraction */ - lo = ncyc_lo - cyc_lo; - borrow = cyc_lo > ncyc_lo; - hi = ncyc_hi - cyc_hi - borrow; - result = (double) hi * (1 << 30) * 4 + lo; - return result; -} -void make_CPU_busy(void) -{ - volatile double old_tick,new_tick; - start_counter(); - old_tick = get_counter(); - new_tick = get_counter(); - while (new_tick - old_tick < 1000000000) - new_tick = get_counter(); -} - -//CPU的频率 -double mhz(int verbose) -{ - LARGE_INTEGER lFrequency; - LARGE_INTEGER lPerformanceCount_Start; - LARGE_INTEGER lPerformanceCount_End; - double mhz; - double fTime; - __int64 _i64StartCpuCounter; - __int64 _i64EndCpuCounter; - //On a multiprocessor machine, it should not matter which processor is called. - //However, you can get different results on different processors due to bugs in - //the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function. - HANDLE hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); - - //主板上高精度定时器的晶振频率 - //这个定时器应该就是一片8253或者8254 - //在intel ich7中集成了8254 - QueryPerformanceFrequency(&lFrequency); -// if (verbose>0) -// printf("高精度定时器的晶振频率:%1.0fHz.\n",(double)lFrequency.QuadPart); - - //这个定时器每经过一个时钟周期,其计数器会+1 - QueryPerformanceCounter(&lPerformanceCount_Start); - - //RDTSC指令:获取CPU经历的时钟周期数 - _i64StartCpuCounter=__rdtsc(); - - //延时长一点,误差会小一点 - //int nTemp=100000; - //while (--nTemp); - Sleep(200); - - QueryPerformanceCounter(&lPerformanceCount_End); - - _i64EndCpuCounter=__rdtsc(); - - //f=1/T => f=计数次数/(计数次数*T) - //这里的“计数次数*T”就是时间差 - fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart) - /(double)lFrequency.QuadPart; - - mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0); - if (verbose>0) - printf("CPU频率为:%1.6fMHz.\n",mhz); - return mhz; -} - -double CPU_Factor1(void) -{ - double result; - int i,j,k,ii,jj,kk; - LARGE_INTEGER lStart,lEnd; - LARGE_INTEGER lFrequency; - HANDLE hThread; - double fTime; - - QueryPerformanceFrequency(&lFrequency); - - ii = 43273; - kk = 1238; - result = 1; - jj = 1244; - - hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); - QueryPerformanceCounter(&lStart); - //_asm("cpuid"); - start_counter(); - for (i=0;i<100;i++) - for (j=0;j<1000;j++) - for (k=0;k<1000;k++) - kk += kk*ii+jj; - - result = get_counter(); - QueryPerformanceCounter(&lEnd); - fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart); - printf("CPU运行时间为%f",result); - printf("\t %f\n",fTime); - return result; -} - -double CPU_Factor(void) -{ - double frequency; - double multiplier = 1000 * 1000 * 1000;//nano - LARGE_INTEGER lFrequency; - LARGE_INTEGER start,stop; - HANDLE hThread; - int i; - const int gigahertz= 1000*1000*1000; - const int known_instructions_per_loop = 27317; - - int iterations = 100000000; - int g = 0; - double normal_ticks_per_second; -double ticks; -double time; -double loops_per_sec; -double instructions_per_loop; -double ratio; -double actual_freq; - - QueryPerformanceFrequency(&lFrequency); - frequency = (double)lFrequency.QuadPart; - - hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); - QueryPerformanceCounter(&start); - for( i = 0; i < iterations; i++) - { - g++; - g++; - g++; - g++; - } - QueryPerformanceCounter(&stop); - - //normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199 - normal_ticks_per_second = frequency * 1000; - ticks = (double)((double)stop.QuadPart - (double)start.QuadPart); - time = (ticks * multiplier) /frequency; - loops_per_sec = iterations / (time/multiplier); - instructions_per_loop = normal_ticks_per_second / loops_per_sec; - - ratio = (instructions_per_loop / known_instructions_per_loop); - actual_freq = normal_ticks_per_second / ratio; -/* - actual_freq = normal_ticks_per_second / ratio; - actual_freq = known_instructions_per_loop*iterations*multiplier/time; - - 2293 = x/time; - - 2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000 - loops_per_sec = iterations*frequency / ticks - - instructions_per_loop = / loops_per_sec; -*/ - printf("Perf counter freq: %f\n", normal_ticks_per_second); - printf("Loops per sec: %f\n", loops_per_sec); - printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop); - printf("Presumed freq: %f\n", actual_freq); - printf("ratio: %f\n", ratio); - printf("time=%f\n",time); - return ratio; -} +/* clock.c + * Retrofitted to use thread-specific timers + * and to get clock information from /proc/cpuinfo + * (C) R. E. Bryant, 2010 + * Modified for cross-platform compatibility + */ + +#define _GNU_SOURCE // For sched_setaffinity on Linux +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#include +typedef struct { + uint64_t QuadPart; +} LARGE_INTEGER; +typedef void *HANDLE; +#define __int64 long long +#define Sleep(ms) usleep((ms) * 1000) +#endif + +#include "clock.h" + +/* Use x86 cycle counter */ +static unsigned cyc_hi = 0; +static unsigned cyc_lo = 0; + +void access_counter(unsigned *hi, unsigned *lo) { + uint64_t counter = __rdtsc(); + *hi = (unsigned)(counter >> 32); + *lo = (unsigned)counter; +} + +void start_counter() { access_counter(&cyc_hi, &cyc_lo); } + +double get_counter() { + unsigned ncyc_hi, ncyc_lo; + access_counter(&ncyc_hi, &ncyc_lo); + uint64_t start = ((uint64_t)cyc_hi << 32) | cyc_lo; + uint64_t end = ((uint64_t)ncyc_hi << 32) | ncyc_lo; + return (double)(end - start); +} + +void make_CPU_busy(void) { + volatile double old_tick = get_counter(); + volatile double new_tick; + while ((new_tick - old_tick) < 1000000000) { + new_tick = get_counter(); + } +} + +#ifdef _WIN32 +#define GET_TIME(dest) QueryPerformanceCounter(dest) +#else +static inline void GET_TIME(LARGE_INTEGER *dest) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + dest->QuadPart = (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec; +} +#define QueryPerformanceFrequency(freq) ((freq)->QuadPart = 1000000000) +#endif + +double mhz(int verbose) { + LARGE_INTEGER lFrequency; + LARGE_INTEGER lPerformanceCount_Start; + LARGE_INTEGER lPerformanceCount_End; + double mhz; + double fTime; + __int64 _i64StartCpuCounter; + __int64 _i64EndCpuCounter; + +#ifdef _WIN32 + HANDLE hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); +#endif + + QueryPerformanceFrequency(&lFrequency); + GET_TIME(&lPerformanceCount_Start); + _i64StartCpuCounter = __rdtsc(); + Sleep(200); + GET_TIME(&lPerformanceCount_End); + _i64EndCpuCounter = __rdtsc(); + + fTime = (lPerformanceCount_End.QuadPart - lPerformanceCount_Start.QuadPart) / + (double)lFrequency.QuadPart; + mhz = (_i64EndCpuCounter - _i64StartCpuCounter) / (fTime * 1000000.0); + + if (verbose > 0) { + printf("CPU棰戠巼涓: %.6fMHz.\n", mhz); + } + return mhz; +} + +double CPU_Factor1(void) { + double result; + int i, j, k; + LARGE_INTEGER lStart, lEnd; + LARGE_INTEGER lFrequency; + double fTime; + +#ifdef _WIN32 + HANDLE hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); +#endif + + QueryPerformanceFrequency(&lFrequency); + GET_TIME(&lStart); + start_counter(); + + for (i = 0; i < 100; i++) + for (j = 0; j < 1000; j++) + for (k = 0; k < 1000; k++) + ; + + result = get_counter(); + GET_TIME(&lEnd); + + fTime = (lEnd.QuadPart - lStart.QuadPart) / (double)lFrequency.QuadPart; + printf("CPU璁$畻鏃堕暱涓: %f", result); + printf("\t %f\n", fTime); + return result; +} + +double CPU_Factor(void) { + double frequency; + double multiplier = 1000 * 1000 * 1000; // nano + LARGE_INTEGER lFrequency; + LARGE_INTEGER start, stop; + int i; + const int known_instructions_per_loop = 27317; + int iterations = 100000000; + int g = 0; + double normal_ticks_per_second; + double ticks; + double time; + double loops_per_sec; + double instructions_per_loop; + double ratio; + double actual_freq; + +#ifdef _WIN32 + HANDLE hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); +#endif + + QueryPerformanceFrequency(&lFrequency); + frequency = (double)lFrequency.QuadPart; + GET_TIME(&start); + + for (i = 0; i < iterations; i++) { + g++; + g++; + g++; + g++; + } + + GET_TIME(&stop); + normal_ticks_per_second = frequency * 1000; + ticks = (double)(stop.QuadPart - start.QuadPart); + time = (ticks * multiplier) / frequency; + loops_per_sec = iterations / (time / multiplier); + instructions_per_loop = normal_ticks_per_second / loops_per_sec; + ratio = instructions_per_loop / known_instructions_per_loop; + actual_freq = normal_ticks_per_second / ratio; + + printf("Perf counter freq: %f\n", normal_ticks_per_second); + printf("Loops per sec: %f\n", loops_per_sec); + printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop); + printf("Presumed freq: %f\n", actual_freq); + printf("ratio: %f\n", ratio); + printf("time=%f\n", time); + return ratio; +} diff --git a/perflab/matrix/clock.c.bak b/perflab/matrix/clock.c.bak new file mode 100644 index 0000000..3b2a198 --- /dev/null +++ b/perflab/matrix/clock.c.bak @@ -0,0 +1,229 @@ +/* clock.c + * Retrofitted to use thread-specific timers + * and to get clock information from /proc/cpuinfo + * (C) R. E. Bryant, 2010 + * + */ + +/* When this constant is not defined, uses time stamp counter */ +#define USE_POSIX 0 + +/* Choice to use cpu_gettime call or Intel time stamp counter directly */ + +#include +#include +#include +#include +//#include +//#include +#include +#include "clock.h" + +/* Use x86 cycle counter */ + +/* Initialize the cycle counter */ +static unsigned cyc_hi = 0; +static unsigned cyc_lo = 0; + +/* Set *hi and *lo to the high and low order bits of the cycle counter. + Implementation requires assembly code to use the rdtsc instruction. */ +void access_counter(unsigned *hi, unsigned *lo) +{ + + long long counter; + + counter = __rdtsc(); + (*hi) = (unsigned int)(counter >> 32); + (*lo) = (unsigned int)counter; +/* + + LARGE_INTEGER lPerformanceCount; + + QueryPerformanceCounter(&lPerformanceCount); + (*hi) = (unsigned int)lPerformanceCount.HighPart; + (*lo) = (unsigned int)lPerformanceCount.LowPart; +// printf("%08X %08X\n",(*hi),(*lo)); +*/ +} + + +/* Record the current value of the cycle counter. */ +void start_counter() +{ + access_counter(&cyc_hi, &cyc_lo); +} + +/* Return the number of cycles since the last call to start_counter. */ +double get_counter() +{ + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; + + /* Get cycle counter */ + access_counter(&ncyc_hi, &ncyc_lo); + + /* Do double precision subtraction */ + lo = ncyc_lo - cyc_lo; + borrow = cyc_lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double) hi * (1 << 30) * 4 + lo; + return result; +} +void make_CPU_busy(void) +{ + volatile double old_tick,new_tick; + start_counter(); + old_tick = get_counter(); + new_tick = get_counter(); + while (new_tick - old_tick < 1000000000) + new_tick = get_counter(); +} + +//CPU的频率 +double mhz(int verbose) +{ + LARGE_INTEGER lFrequency; + LARGE_INTEGER lPerformanceCount_Start; + LARGE_INTEGER lPerformanceCount_End; + double mhz; + double fTime; + __int64 _i64StartCpuCounter; + __int64 _i64EndCpuCounter; + //On a multiprocessor machine, it should not matter which processor is called. + //However, you can get different results on different processors due to bugs in + //the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function. + HANDLE hThread=GetCurrentThread(); + SetThreadAffinityMask(hThread,0x1); + + //主板上高精度定时器的晶振频率 + //这个定时器应该就是一片8253或者8254 + //在intel ich7中集成了8254 + QueryPerformanceFrequency(&lFrequency); +// if (verbose>0) +// printf("高精度定时器的晶振频率:%1.0fHz.\n",(double)lFrequency.QuadPart); + + //这个定时器每经过一个时钟周期,其计数器会+1 + QueryPerformanceCounter(&lPerformanceCount_Start); + + //RDTSC指令:获取CPU经历的时钟周期数 + _i64StartCpuCounter=__rdtsc(); + + //延时长一点,误差会小一点 + //int nTemp=100000; + //while (--nTemp); + Sleep(200); + + QueryPerformanceCounter(&lPerformanceCount_End); + + _i64EndCpuCounter=__rdtsc(); + + //f=1/T => f=计数次数/(计数次数*T) + //这里的“计数次数*T”就是时间差 + fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart) + /(double)lFrequency.QuadPart; + + mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0); + if (verbose>0) + printf("CPU频率为:%1.6fMHz.\n",mhz); + return mhz; +} + +double CPU_Factor1(void) +{ + double result; + int i,j,k,ii,jj,kk; + LARGE_INTEGER lStart,lEnd; + LARGE_INTEGER lFrequency; + HANDLE hThread; + double fTime; + + QueryPerformanceFrequency(&lFrequency); + + ii = 43273; + kk = 1238; + result = 1; + jj = 1244; + + hThread=GetCurrentThread(); + SetThreadAffinityMask(hThread,0x1); + QueryPerformanceCounter(&lStart); + //_asm("cpuid"); + start_counter(); + for (i=0;i<100;i++) + for (j=0;j<1000;j++) + for (k=0;k<1000;k++) + kk += kk*ii+jj; + + result = get_counter(); + QueryPerformanceCounter(&lEnd); + fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart); + printf("CPU运行时间为%f",result); + printf("\t %f\n",fTime); + return result; +} + +double CPU_Factor(void) +{ + double frequency; + double multiplier = 1000 * 1000 * 1000;//nano + LARGE_INTEGER lFrequency; + LARGE_INTEGER start,stop; + HANDLE hThread; + int i; + const int gigahertz= 1000*1000*1000; + const int known_instructions_per_loop = 27317; + + int iterations = 100000000; + int g = 0; + double normal_ticks_per_second; +double ticks; +double time; +double loops_per_sec; +double instructions_per_loop; +double ratio; +double actual_freq; + + QueryPerformanceFrequency(&lFrequency); + frequency = (double)lFrequency.QuadPart; + + hThread=GetCurrentThread(); + SetThreadAffinityMask(hThread,0x1); + QueryPerformanceCounter(&start); + for( i = 0; i < iterations; i++) + { + g++; + g++; + g++; + g++; + } + QueryPerformanceCounter(&stop); + + //normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199 + normal_ticks_per_second = frequency * 1000; + ticks = (double)((double)stop.QuadPart - (double)start.QuadPart); + time = (ticks * multiplier) /frequency; + loops_per_sec = iterations / (time/multiplier); + instructions_per_loop = normal_ticks_per_second / loops_per_sec; + + ratio = (instructions_per_loop / known_instructions_per_loop); + actual_freq = normal_ticks_per_second / ratio; +/* + actual_freq = normal_ticks_per_second / ratio; + actual_freq = known_instructions_per_loop*iterations*multiplier/time; + + 2293 = x/time; + + 2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000 + loops_per_sec = iterations*frequency / ticks + + instructions_per_loop = / loops_per_sec; +*/ + printf("Perf counter freq: %f\n", normal_ticks_per_second); + printf("Loops per sec: %f\n", loops_per_sec); + printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop); + printf("Presumed freq: %f\n", actual_freq); + printf("ratio: %f\n", ratio); + printf("time=%f\n",time); + return ratio; +} diff --git a/perflab/matrix/clock.o b/perflab/matrix/clock.o new file mode 100644 index 0000000000000000000000000000000000000000..0e68ee3a8b323765cfb85a2617fab3cee78d1fc7 GIT binary patch literal 16992 zcmb`Od0bD+|Hn^#+O^}#UP_CULJ2oo^+^jwS&B-1O0=m&xTR8xqHI}0wAgOh*DFM& z5Fwi5@g9*=Wo=6PQ4_sn^pIdkT7Iz?OCPZAXoVMr2TCNRHh zgffih_+Mo+wrIu-VEQu(hWx^(s(k4Re$r)r;cL~TK1mW+{(!x)MUpt)|2nvqrPzF} z96rLAzQ)(7SSsVI@cvRWUt!p#E7<4l*KzeCeCf-CUT#*HiH^iIDK=uGW(6fNjb;T! zD-9{MmqkVQLR4TK`YMbljnF1Cp@Pz}T|l~uB~|>mBE>7_uI4W0(_N7Tcg> zLB;%-qD<6Nw>jqS=5x&Fc7FlhWcn&-7xGg~k(xR4^X%T_vL^`7Qg)n2;--U zw_GhYviG>;GM!?GbXN4WN}B_Ymvoo<6em@ ze#XksSixcX-JShf9}N}0Ke~FXPaNiA;duJv>?WsJ4Q2Q`RZDIDi1Hs=9ASLAzap#$ zUSo=ITyTj0kMQneJIs#5Qq~J+6rMpq7)1jdMH4ED2Hi(d_4g>Mk|=7QVV<;!D-5<` zgmJ?m`b}%4A=IMippmAHVxW-N3$??EgYcF^qYd4DbJ2!FjmAhlsy!h$Br4xPW%i~+ z3VRJjL~f%Jx|fi)8r24(lfyUl0DM!k1NV>6vT;N=AT{<_tkMsfS0uEY>hfxD+RJsTpPxB5PIaih zv9AMvnZ69>jrn_C=BGELpL(_VqVT9MBdt2rSB9A?2=!I<4qhA-E(lfi4HYaxmI&Ji z2Zw~Ih9DLu@E)hiCRQFQ2wNN|@Zm^9y~6#2g;wGIfdW%3W78k5Rek&;R0DqZ{8KHo z5tNsk7i`fR@HK|0R&Zt53kfw@^;wHX>DY_gXX=A^zfBhPhCz;x?gKN@NucbuLW z6n*M#guj2xn!LBp6<=kPH?NsD`eDDG(kV&BTLgA4qe~id4JV~2yj~zuyunk6XE^$w zidFS?wiQLIHH#!~_t{&hwR^>FE$3>P=N2n2Y2Ys!zG=!|nwDR} zmkixB@y+A@*Iq8~XI%GbM!)_i=gNGrI{H1{%<)H+?T_Q>>K(HqqNL3RmyI3&qE#j{ zzSsElsPst(p0`|QGCpLEog46cruV~Fx)l=fk$!Ki>aX;@u|c-APJN}0NWhRCv+eB9ZGW=q(wrszw2#Mdm#JzN`0n^k*cN> zk=9sVuf9W6rh4c3dln6s?xXGrkR+1@S?#p>*#L_qAOeSOaz)&_T5@x;kaJL$v7iY zZC9qZaezH<<`!eQ*D@dNU$;2V?EU!b?4An`U2yKWtn(@%JosrspJxf)4zg~WzrWe% z@8IMSo_S!&sYwbcEqJeZaF@jN zspb2b8DXauZS(D+aqPW%b)NTp{hRmGANaj7zwY-&_l9fAoasvYcW-|%V%F)x{D$Pm zP3waNZ#%D54UCvpl91ESe$Spwrj7Ta$}U*n(X*0M_fa|Mcv0r)Rj0e1Q~8pUFFzjs zuIxauAo$tGt1V9^6|^5~OzW_DdiZYEqWK=LC-qupAGfu)V0Z6Bi(Jn)eExT7v*y2z zpRVpZJg!{#%(1!RGZvV=TXryM$~Ev zKGDN#t>)*yoLRG@^{Vr3-?tMycHG^1Sf}|*Ki5|+o{d}d<*rWHkb7~{hxxKEyeo`5 zZ`l|JFZeVodBN00v&stkmj6?B;(}8{p6;4ECv&xDOK)@$IE8D zGA#9a$#1$oEKq!vi~iiTD)v@M{dP4Sl2(eotYF|Y(Qe|cjKFx;f11z?G3vP6n$(uM ze^ehxw&=O(dpdZXeXu=fX-V3($lW~@s+MJr`8qY5`A^5>szK^$!!KUFk7Z4$uTpe2 zie_658 zZjWnrb-$ItcZ!sF);CI$^<|>R93MVW=FXwnomPU(K&{8GqTGG2K3b{JbkDzj*r{`Q z`Sz86XYP1(h}YU{R?xAX)*d~r8z!|I4KebqufH?>Zo7!={qG-=$F#{hT+Y6EN-lY% zWMq9v;PO{_GylH+Na^k!-nx{7$DJG`CmfyTZ0o&GEzrI-xpLNaiB%1Hc@wYBs{2?P znHaEj&=j7h@*Rz5^%hH8>WeoI7OC61bK3h~*WtbxeO*#E-m;tS)a&Z4g7%E%@BfyN zxfVYz(fYRIdvnVaOpIp-U!ogl=wn@miNm;0VH``#9 zcPS>^E6_qry2d`EqoH8io(=CB3f_Ou*xml9LrFwx_y=WCX7{!$TT`z5r)3*`=jo~$ zABKo4n=NE68MV}w9-0ueRj%jSD-XOUTgJY~O)9q59#bAAC$9U^>hRIQ+UW`kIAPEZ^_op{%IBw_Do*p_Cz}nacfX@EAU1ye+g-iCpYhZBd-I$d zo`uVwL}gyyU8AlW^d#+Bd}^N|Kem5)E8`*mCA8?G!QlNLR+P5%Rdk7pxR5d>BI4ky zo8|E>f#nxQO`LIjN4ejw*%D1Z#xQGaj`@a9DAi?7-|*6i^h$HJZGATE?SdJ5q$Ww5 zO!hAF?w2*|PK0OT>XCLmvfpNQcx2{;?Br zCMFsx?j9F!H$4+6xRavZS){wHy_f6;yQOAEtKG8m_z68GCOYYyTQogL_jY{E!R!#n z@Q#_z>!*gFZ7CW1#o5J8=fLWjfjP>#`y)0r77lcA-aB|o)!C~jlO_tBj1&r`G>%>0 zyxPFyfsO6Xj0KE-q1%^PNwt~()~dc6q2v6hA;Ue*C@|>S;IvH=8*O(=)GT`_S!|KL zGB&Ynoo=C<_)4AR^y@D|HwG58Uab`$JpQYd)~ClE9?L6MmN}-0<{UBYd*4~hWTG4Y zQo_Qyy{@F>Me!K7YU?!@k4mJ)*6rG-#tcWJQIs8#Qo?Zwdn0y<`5R&@J<(pA5o*N_ zNR~*7l}J`dB1q;L?LWu&G}HOh%@u1STQ1xxNd528*%6vcik^q2DW;C%D<%*4YfIAB zPeWZ}8>aSfvMSOH$?mDKrXVvfC*^3L;tBJjT_a{!mo{B59`&QXS4-`Rmg99dns)Jb z#y7@Y*G9eKh;(cCMMV&Q5;%=r%L!;ilOk)tmRKCzMN%&X%^C zXcFVAvZpCqH0MIKvc1#Pb3I-RncS;%$hxEPwjynX(Y_ISVLe}#cppq)UKd0+NXYb8 z^UQF$5xln`_O?TE#`{*asDW~aj;-;RiM4Ys+vBbgEmbu+tD_(E_m1F?X|Z=zV?|K zeOsnVx!v8g$md~4JM5#xsZTRAzT9@>E{IQtr)_qeJ7+TDo6t~RMV36L<((()X zV{_+h7`Av`PDt+;V~?}UA!MeUeL!8^si zY6hNkIT6-BKIpEbR9t7ZRmbq6i8ed?Kk7TX#_Y$P3vHd@A&)AU3nqPYxAr+LcEb6H zTju5~dKu=q3acl-oxL;I(ZX!4(WcPKZ^O?Q9@@Gu)Avwx+_=EfDB}wQbKd(l-IlCt zo0EK0wymdcenQzj*98E zC^e^`)j-T~s^^TE_9&U`6c9)8) zzogXS;bZGq*wX)Cj{F9b1`)-)2ce5*XpA`3=KiudqfTpIM93EPQbQ}fe`hFmtakD$ zSYDE%CZ|*?{`!c(rDl|>{HaOeeO9<7?{<&Z)>3;mVDh>>7Ug?i?h|>t;ESC@Wn1cf zwa6Dzvm7OIKOSptD|QJUR}%0f^|@F|k4b80mMwTw^I%_Kyu$j91#YH=4=BIazElH;5;x7PedpxF zqpoUahTGqNvmi45^}iW%>)ma(%y_b;_r%)p@#`EC3qBU9_aC4ZTamV~m+Rfp_uWim z>_09_o)OwP`FXDO6(-BBNW;GHL9EOKt#e8(k$IO(8$De@-<{p(@I1X}m!o1zwSiYr zEq_ega>3JqO`krVotb{x`t12xk2V$0yYP9W$Bp2%vksTF{BwO*R?fWpJNQ>S>XsaO zxM0?e>Up2C#4NWbU(syt@wxrs5BuEO7Z;RTMtV(H;e7I1meB|^OJ*EYqmecYz z)%xh4Dj4u!WFMdN`;X*(IureQuEVN{TBGDdohqlU4sSYQc;Ec^trrQ2);G%z4^S{y z`623H?Ye?9-#EfRA>(Cf#^&|6gW?Qity?UU+^$vqkcjb4JgvC5Dy^|@hX1Q8C-#}n z`SQryuH?+K^_D!ZI;s01X;VrCoto3lwuQdae=O-U%r3*XN>gvew`uv#s?D;xLk4(U zF=;ii=%eiRoGL`qhiwyc_>Er?0FX^}RvjmQ8Kh-MnYn@f&+Iy!y}6 z%2ey*=8eX~Uz_UbpWjlo{_Cooz3Z;^x77>?%y-?tSZQPH z*jQSQQ`NR{bkR{Y9IZc6-%xd=!AK*6kp^RgHw2zmRndm$Wu!urw#X&S$a#xET^cc* zdI>W|WSKJ~vQ$x|zpSKGDiSLqJ^+rf?;B7VGXr4kdj?=uz^oz1K4(Ii&8_5BM5oD1 znIRSed=l=Eg}mZAQ44vM6rP2=YOFuvlHNh4?dDb&E-`Rct{v0hAnw2A~N!-h?#>C(&GnM6WCQ@J9I1o+r{y0lWDei^p%A9dX?p=J}VNUAg{*3}Y68w;RcnxsUF1eq# z0>1@%_C7}C47A~~OQ(DUy5i@%;a9rh*Sg^~h_mM=HyZDZ@Xi?T#^9X^-i^gOL!k+4W5^mA zj$*vLy#--m9`Kft2@CfM4gXp56Z|R%dMy-qpx2ci^A?9iGJ*4#F=5{G1wI~Of^aWi zU;iNg@JL~2J&+ZC*~lyo3lIoGn2=Eapm1Nt!vp#DUg+UHf1!u3mwy0$_31Im%R4+c z)R0rg=Zun;Fk=_-5E7QK?{-lMhY#~_ox;Svw&ieCWRnf#w0;F}JYNk)8}Wn33r<*% zXA#1)Fhq&gUt;N>`oVbOMG5PNppEz$&f)4DK83?IINY7Xx%!>J>3-&LI3CBOUG{`f z9qW}iTm}P_a6gBljp&s)9M4sRdvN#=4iDmRZa-r>IEE*pJ~KTFmRl4FMAw>J!VwmyvA3-z*4#+f09<91JhkqO1K zfeY6KzxwF|^T;GhKM3>=6u$@h=@j1$T=@L|<$ng~f+)Qb=))-f540Oi@ffJDrTAZP zzok>$3H)SHd@$$_Q=HfrP~0B;T&4IS=>J`cdqKT{;z>|{L-Fh2r=8|d7lY$uTv-Dr z@grOUxEiHD0d;MPlo5*1aMzU?+0;?r1(szucUY@w3|$E zay}a=ZUg=^DULq2U{gNDc~Cz`@nKLefgESp1xyRfElR%(>NS*J*eb(3r}Siei$R=7 z-1>p8KgIFyL7WUJE&+8{iq99y8S=cwZn1={FUj>G?V3V-k|;mq_do{46QQ2V;n){h zJ0GMtnVSxCIG&eOpni_xHc&6;aO~d*Z8+VgxSkL*_c7#|{sX}e zd$Ca&GrTPXE)I5>VgGZ%UWMWyXNGV%wjt|IO^Qc=0qGa#{~4D6dwtE z35R1pU%*cchhslvK8dHeGx%A<;no}b2=Phv3&ofY$g9(c8d%*wdh8qCKeZku?Xm>Qlj{vuz{5%K$ zb0|*Y5JT}WsBhtL{EQ;^^bu6Md`=uBq%9LM?$+pIUKi}3GI&HaO@`&^rI-g z9k>aHbLSZw;JDjZ$_KqWr6=>uLQ3xq`tWY}I*uRQPx3t5K=B{oCzJ9M3j89)j{z_3 zhCikJD?r>DC{EU6t(2ca@UH+jEr|~~-%%8g2fZ1^{{`+q@h0H&I2?BY|E9$$g2SIM$Q(RX)XUv1*1X22RH9Nr(^o$7ZC#9B&%HD>-`12LOM_;p}|@ z=k<)kv7S8dUsBu({4{Ym_VW(zqjrjub*C)+ASL}=2!0ePE&$$}!@2j-U=GK=$a7kq z;^eum&EeS3X|&-qlEbl|8_=#X#Y=&Y=Wy(&5<5Z3n!~Z5$Dp^Rcnxqz4#$4T`e6=m zGH#_Ho}rYU+}AObULEvny5ZRzKe(SSp+Ea6{sQ4tX89?z?;KP7pw|FRff`40z2LX5Ph6^Z8;+aHo(*JCV%fo$imcwC+ zLcL;`YaEWHx8Hc`l_N3Hrs9UL4Lhn!|BX9M(T^6z2g?JV?X3~We$hq;vmo;ptuU~GZZK9JT2_ME=25?#6bpLyI!-sMF;5aXX>otPX<9E0?P40$I z>xM7j_{aUsg?zVJhmT*nIN=Rk_z1g;63^k3)&tzWv~mK;C0|LdXuHr?=p z&|lmi{3ysL^C!j60+$4TMBf4H`@bkI0h8`Kiobz%$#;r>0p!8hssyfB@_9J09{o zT9e}QpgxS^8=$UF@ocD*`Hc8K5BzURUj}t^irQ2t`9tj;zq#9dt4G9vW{9x>B)BovB0r8+!8EN1&$e)kES@8 zfA|z9*U6pYWc?6Maq@fQ48_TM<^#pa?;1uwEHW_MYaYtOLxuT#QVJCWc zK3?HojQ+f^Fh)N#7+0{|P#=AIz^$TB6QJrH7LF^}fsbI`Vm}Z6AYb&AgRty1Z(gV% z;AeK<}kgYk}xfXd79KojlcU^ zdoJVvs^dh~wOh!E3h_^Ss4^>2p)32T@cd8bPEJTmHNz}NtgHS{HAV+enhNdX{^KPM zu_yiCj2P8LM9}YC{BT3bK$!j9)7n&Yq^tHb#|TY`J#HVbZ|L^35u<%#%RUNtk8)x5 zb5Co}q9ZILjzMVv_T+shaX`t~Z$-Q6e+Bfv6dJ_+S411JC-Eyr3`-2i0aV6mCI09K zrGedCKXM)wNYj;lW>40fAls77KtiQXW)w&LVcijo-4Nw1h2CV{L1 znQtmPxlF@zoy>V44A?cg$Teu-ioA3bPQwfw)24{IB6}STE~r73D+<+N{TlY!Hx zb_d5hncf$FnhGFZ6_x?VPNqi{4E`KT&UbQ{YO-xy-bZfgL>);|64KtuRMmwQ2Qo+B zkBQ|bo?|##-es;y1!zi=Sh$9hC}u=kNgTMmi(FAXx9Z_gE(2`7OcGU9b!!{ci>_2X z&^EYY0Q0HqWEx%sx%GuHUwDeeVtdgSZrWIAa#JV4%_+E&1gwDlBn>U_Ka&+KW1zM4y~Q&!p!7PD+OhT6awnaQN*QXku>SavgmJbAt#R=ZapG^#)az z!{FRBNj8!&lKv3_D(M5zFsh)bP|~6|r=VP-a}?b}Efjs5D|%g_gb%>Ta({{}3Fwq& z431Hfxb04&*Iar`uIQ#T=7s3?Ut)ruf%*R-jbDO-Ty;R?d9rPw(gx6lg!oK}KHRLayU>H$)OLHV=0kp8sDUPHYxv7#Xvn&9o zE4k%e!vpw%A>sT0R+L{@WN1iu5G#x?42}q3=>{lKlf#9ICX}Ks+a!>h9LI}dD?-C8 zka7$c@&%D>so@YIWI@W}#qmP7L(`AihAL z=%vdx4^#~Ei~V81>x!b{!KvKiNOq~WHs+GjVgq{(mf$|pEw*EGcWiNX@^V$bAbwWf z71-Wrviw6LvyYkj7r$K!=iLs>ntwU?xO2R6#zORH%` zD>Z0y`ko@jnSE^)AsUruI&E*s##?NbcNG^2PJ~{jB1YvoHOqL+vUZKWN=>gHF}A;) zhnw|5gBANG=ETZZ2Gs|hDEEyYy`_2Kl6wZ}U+2huw!S&k!aJ&OrOJ-U(?bSVc&}wl z%&}OUm)-P~J!ypZQAV$(s9|Zdo7Cq{%5#rM?ozwkcEeb`TY87w z4>tKP`{I#fsq2fZLq1v^YkQ$)IV`NysO+UtxMjCda%RpH_sK2;7c7f&Dkz$KzUYxx zS<2qi4_9XFt=cd&`}WJ?dc7-iG);3SS)`rY?=C^y0d3uVT$*9=o%Qr*|aZ{oJ;2%(H~95se%4K5fzH zny_6Vt?jYpzb0cQEY(=^Z10mB9d$=GKknP?_N`8Ix7fTqXwQe#>k1k23j!`K@XAgV z1$-YYyQgW|X7|6%4w;!Z2zQ%zy!2?Uvx+f_oKQE~JN44{-Jdn1@`@jJ*80@-{XSv$ z(}IP8#!UxgmVX?bvrGN$qWt1Seq6d-K*dF09iyn}U#m8nUs3K;Fvq}mCTmlovei+; z`--d1#>?MOimg)7sN0#*?m4>O>2_}`_lzqyA6{)6TQSy99Cwbp>fj(zdeiv{07-`Wbqxj}SfzWz|O5}+31CHOjbZhjXoCh;SeU}_G z>duQv;~m#2y@VO*c|=Z(Ujv|J5#3f0S?X;qdsd<*Fyuv_AHB z4ZRvvvLx`sl$Q$at}DjrHsx=;muY)fHs`bUKaU>WY0R7T>VvFdeYg9$bDu^P&fNE( z?Wqak_hANKG8xRLy2I{kM=QP92uB<|-aPC@jz3|W?#!sRRZSNA%`;fPV2$e?C8hm> zw{z{PlN4OeZ8~;{FnUE&YJqubTl`*9(nh&^@c%q#610c9n)(`_k#vf?d1df_AoZ{_c^|{Y1lh{ zUcAHp>Q`PP?{^KGo5>i^vS4$F=HWrQOEu>e>`R{2qHuFCCssUZVPW6e=mMQrQ?+!K zR#qOGp6TYP`-NSydfBpT(~Uo+3KvZaT_;-dseWpr^2N=1g3xc?k7KKK_Y8TjWIjKj zWmCn}k5^ZG-f69UvG3coArmh*Mk@?quUr47mxHO%m+d9|Eh5I{eJ&0m0}oDZ`0`5D z*2;66e70eB!lUrA>;0YWOiv%5x3A@cdWrFuZ-xn%78cuD-22ypZ~lDVhHuL8D;4g) z+?f0&V`b1p=FaYJ#p`zz8u;jh=+G4L$ z@AsM+7Ccl?aAjB|sLQR0SM=g39uQ?O=dk+&6=(`V~yXM z$y56jTVLU*u9 zS#o`Zxp(0;&aUz`ccK&g)-cykJJ=bq&9)}=ny3Bez40f_iz93shdGRSkTO8+=)>yd zKJDEpanJ639eJi})%Le`TW{al=Hc$9v5XtHJ5Xuw(VhDpk{H@s>Q-hdTJX;Wob=nCB{RemjD~I;laxXCL<%)GOMGchRq279Z-@O|mKT+KO{x!8dC#(nl z)0k4*zQwL>^-`6NFqRc3^~O=JMPFy!xn~=_%(y&!?B)?ZgSbVfTf$N+w!NNlUQwKQ zde5p0GLf};?W37N!+rTBMZF&MTji(ao|m!C%soqE*(aZzK+lBeO)6zC*Bop%{!2&2 zvBRMFV7GkXtbcYHf&P@iBv8#{72w@-s|l*l|>oV*?- zw&g9FNEIJ@D;vx3)G~iFEUsgq)oJna)9+_Kow_5j&+B?iz2QE)3wjrO4j&_U-8?%a zxb&@Orp|+vUTYMeUs8QE-g4;7j88453X)lBRC>c`AL@9w)}obT!K+ zk#Bmrg?&hJn2%X-U8}6se`azkq7-(&nN(=YS&{NhwtkMUjF$S`z_2-$?DMChIi{8? zSfBedZuE0#*7ww16})#*{OaVE+)m@;myR0u zH+hHAm@o3LZu@O2X{@dsb!?$UvM72;sO;?nedScAwv4k~)ta;H!}pNd)o$jyYuvBK z?YyLSpo#myzML^@mDP{?_404`D}R%7_#G3~q-XkN@iLx~JHks$){Rr$cymgk`rW?z z0)4e2Zo&}5ORxIOE_uY#TD+?}?#=eK&yP=)3*Df>)gNL>)q88m&m4V9{8eGx@T7R< zX-90sj2N};^5L266=7FzuH*Wi{dZrdBRl6(#=iBu%e4lZM?6Vr{<j`Zl6;)KZH}h28s*QpVP~}R(CvoeAe&psLa!5Bou{fjrR)~j%jS9<%VOVeP|>Lh2;^eNpU0$ku8yQiyUDgX7tv6KW^o+SXEL{|i z=(s2?`ks5TyYl^k3+EPn}orw8(w`gDM$CJ@#%MZKB=s7U|vQ(Ptwx1(=%OvO zzpbC02sxR}dy&Z;A3Lg3w*HFReVhIV=Fgfs?Rct2&4;b|O_nR)e|%9m{guYGX}YI( zUfNp7^Kd?xS!eoTOy(ng)y5AlE8_HyvmGOqzs}OVHMLge3}qZMN0xG!Jb403-(iNU z0n2m@+l+0>GBYt7Yhq?%A;}z$RxGfsHXR_Q_!k0*mBx5vU^+)bYO+cpkFU zJMo`1GC$7p*P}9$*7%h{ONN3U5q>W+XjmtY$WYQZ#osx4IV29qP$~qGnlco#2QF#p z-;T*p{eENe5+eUwrpkzu4${bySMy&E4U|~Fw}Y01_ojyaqLyA;@aobbxGvHg5?pHc zmi900*l47eUbR0DNUtq9$u<0wQk12{kf&7X5bWE4r5~-|aSP4@oYX>BSER=}$u$fZ z4=jJuFYJLY>VZf0z+-ygt9syTdf;h2@SGm_t{(WF9{7o~&uExQY1ze4XD@(XCHKD@zeo@f^zE5CuI8PW7 z5l#hy?>rRvXcXWB0s|ZxO7Z+cLnC-pl)x`MfQk|ZfbTg}V5A@ zXhb9*?f}Xc@c5BJDlkS6BIE;Qw2%jn0JjL>DBuHz;=^bW(L#8Bcr=1zC>*c8fv-fc zOoj(Rfm{*`&geq4bO8BXI`X#Nd1@ZQIIeZ*nC^n5rpK7yuqrNx1xGLAE!rXNn@ zfi!)88edM+)9c+rENf2GI0Qv^exU=h8TQhT!;KJ@9ojK7yv-i8!p6-YkVAI38!<41@kOmV7@4405=*BY!+2fDFz)jKKnn z^m#7T(q|=bjDiK;HmIv5wxMJ#)#EuKM0kA1_3EMfZ9Gj6kG(`Td=A5c^*fN>S&Dz~ zRAkR3_!^XyC&8;x9DjntpO0V(k#2s(--c}X9EJtwvkAqImf{~AnN$M7-y?k%!S5no zKyZAX@VN>LuGbssk4fr8IV!zDe-!DE?!Dm!a$AEy0hWcKb%~2`G*N zI*#+VMLKnYV}BOGpCj9d;0`FC@dU@?i#@@EP(HH=4#yN&ya-+)#Z)lCzabkx*YJ7q z5KkoZmdH*gIBu8i1V4%L+(YnjD4(MQ|AcJGkFh|5^O=Hp1EG&Yaasw!2es!Tf^S3o z4Z-pA8$UO3y6X<-?X&jyxp1)-hJQKyqrg0c27qxQ% zjl(!CNMA(oCdBvCIE*s{#W_ynFwRS)FD3X>#H$g9CJdp8cmquj=T>+exI^PGz6`4O z9>K*Z&U1nfMEaKm?}zwDf*T;-O>kYrd!b1`Yy+5&9pZ8Xw?>>vaQt4{pWrS?uR-JV zb{IzEuwDVu>k>R1aW=vCAZ|wRLd2~IehP6Lf}cR#9&y}{hN%5r2)+#QKpICwFgOc} zT0-M6H2lLFEa?P)CB;;B5BwP7xL>MKo~HewZS@fa=!$Q3dSYW|FA3zTEQ&78&qUj;R?`5_$4&xYt1(x|V z4)yUUPAtKSjYH{uq<={8Zp6g|XTo5h=%bf*e7<^!yAXU9 z;-NGS`;i5MfU=p!VU-?8e}Le=h}RH&1L6+|j^9&0(KwXic2!7m|h zPva2b_MC?}+`y#-=@$|_6Y*$*KL$Rq#1kCP4^s(_=OI;mPkLJ;JeX@`4AjGXL$s_fb?kClk_=yv`UWP_W+y^l2gOc4$K9HG)MMG zg8L%dkl=XU%O?0bWaD`{j*sWJl7Bw~4dy$L-j0Y}Qem`16=<$0W z#g2*#6Z-jstxzD{2ID;eKh%#c)Blns7~qMP8SCuH)X zgwTKi0{H&XLEv|zzz7gVvhU~bFW@i17WnNcG$cw0Bmb29fghuS_<%`01mQuyYrr3# z`B(t5!MhOqNNiq2SQtMXWFHxA_{LE}Ki(o_2ZcwoBL(1RErBo&)y)s~2^9E+ z;XXo5_W#E{^5`9Wj**y25WE(&m*y(sD|2z{jmUTZ=na0 z3Bhr@W1I?h{&fBu(SVQr;rYUlxPE;8@H+2?2q%W1PoSTf)9XwSdOSjx5MazHb7X0(`o?hPyRR2 z1Nk3mm>+A>{|@&5p^)yxqXz>1j*1WCdhzx#^2f*VHWxooFhE=Ehw%rn1Lsr3X;S&f zANN1jD pI2f=&fAEL5DLHlmMCqgP`#dyg1=dF|@HGGApMvrqNH`EC^*`etl(YZ< literal 0 HcmV?d00001 diff --git a/perflab/matrix/fcyc.c b/perflab/matrix/fcyc.c index dc2f735..a5f4077 100644 --- a/perflab/matrix/fcyc.c +++ b/perflab/matrix/fcyc.c @@ -119,7 +119,7 @@ double fcyc(test_funct f, int *params) if (clear_cache) clear(); start_counter(); - f(params); + f((long*)params); cyc = get_counter(); if (cyc > 0.0) add_sample(cyc); @@ -131,7 +131,7 @@ double fcyc(test_funct f, int *params) clear(); start_counter(); for (i=0;i 0.0) add_sample(cyc); diff --git a/perflab/matrix/fcyc.o b/perflab/matrix/fcyc.o new file mode 100644 index 0000000000000000000000000000000000000000..0943503f01f79c1283daead7142e69beb9122d42 GIT binary patch literal 13792 zcmbW7d0b7;|Hsemwo5l!x+2j5tV!prO5pxol%`W z6NSe5X<&V($a^cfEM^b6x?^_e#3|hKqLn{|r!iowjGT;uOZh%FrI|PqCAZ3y*i)xe zajkaTpJQ7=Cm9i3MC>c$2EEqS)>ozt*M79NijvC&$ysc5c|_K1CRn+gQz_;O+l*_< z^VVBzDM;}4U(|CIvBw*}e@DI!H&h~DuZ$WN$X&*`LRZP%EwF|5W8@Bwt0VWUzob{Z zn2)T}M!_t$ZRGp{a$X)=*sv8lM;*lWhbXNls}udasvwadqH_AokuxeqDXgYaWX zQv#qa>Z`7JXlV~3G&Yq#Yl9amM>CBLtrA5oM29@7vRQ)80p~~amfGtO$|w04Kbc>c zP%M!|O3Z}9@qS@pkpV)9SQaCR5C;k)V#30Nb3?=tLYX8!G$L3iixdWgiTxzP0Kb3` zG438Vy|b6+Mh7V8-@e%sf7Iqk6AA`* zzWO-3wsm;zh}NcK1y^|r<_1jvwR;q#7bjX>+7M8>a^Uz1^=#|8oi=D^>^iY>MBt_F z*F2iNHayTDIcIT7Pi4-@F;UX*ysxC&PQyrzYJOeD zq$m3E+SVg$To3e(G(0FA`q!O|tnMveU9xt~2@Ud$?y=OrznK$czVh%XOl% z9_pS_sx$i>=e1GcpY#1Le$_Fb@aEaN+_edgy_s2SCT?gRP&cr2V{yVnyQdoc>+%vT zH_ZIuTB{+AF}fU^qu{!xw`7K8RtW#?3a`D)9qHY?9?_R{8cyx09&P?~)PS=AtH+EA z{wJ|}f{~A{yTh#=3I@A~=-C#{K0zxpKCw9tD`-5}kC;R>3O^Ii8u_i=6+dq#QUe3$oGdeyn3 z4Ib@OuUcWIY`WoMVfRfJI~VCbsropjprhEk-tXl4lv7hvs~dPZTaMg#pY434sj0J9 z=JiK51=5_M1ARm7(_ieUZ85yl;wboRIOqKC8PB`C&h~yecf#!F%2i&SnP#v~gR^l!Bvb7F1d8 zblcOR>06%OSY0&*rIVE>|JM2VDL$`N^~;jlzHa-HHZi{4y1z`V5>Bh=P;=Mj!qy3r z*NfK5lw12;XjDI$*XR`Dxg~UEdD-#0Y`*dJ#Sh~igbekttK9XyBr!Pj;;9WTaid-d zWP6tT57Kv>wj=z_vz}>5_&Y-Y_g9gE{Eh=VE<1cp^@Iust>h}VE_(tlRjI#DRh@F^ zhmlp^%oSP|GpidWoxD@6{xRzIGI0vfq89VJ%4uQhqbQY)|y8wOV;?t zR=4EdqpW)>O%X!>q5-EXQ?6)?zZ&M3a9uF-ZeDb1{-ClkC8>Q&Y|OKxee+t6rd`{b zwrrW+^3rP6KSw2Mb!}P|D=|~a70mD$kUQ>x`r*f$V>e&0sy!9@D}i9qa=3?P~Iltla!+jJx0)Upm_0=K8C;M{K{RCLHNtZP)BIWaFYmWA=zV z$Is2b9@ct(@N2%>*Qow0DiuYeoKEixi1t)}>LR}B8DSW-w|mT>5f6<0^H0`&FwhGA z7FF zOh?D1&l6P(2dF42{!nOT`WhCyI#zf|%ADUB?l9Vwu)01hMeSI^7x(HLR;!mtqe>I% zv$faMo;+Z3Jm&y2t3?doqSnu3j`m$QtFxk8in%J+8%GE<&J1=p_h0=rBYd64Ta$F-hCAlEblpMUZa!!69g{f?WBZ7aWCOdJfMgDcIxJi&U1Iysfk)IHqBw|@9O)uJem+*QP$zMzO-W>rTWuZ8p}Uxq+c&P zUuEo-?fOj3_EF&4IaBAoatU4g*2mU5JeOFUBM-Pg=>RjWUIKhV>vs5$)Kce(e!46CaD>>QFFb~B@ka7({D?^BNrYt3|P zrakB-&A8Rta(s5r_~>4Xm_lX8SwF|u4i7eGyyLRPlOC+fGS|wP_(-M5#clKJOLkML zO+0SdeuxkJk7t^`(l%i8>pRmj$B8~E8D$NAZmDv{?N0y1JKe^3D66Dg8}NLI^3DB4 zJu)6>?R1IxvCGA3YO30dcjMReyigo&yQyN{V)xQf8aB=R_^FP&{P{6P3OCC$dWcnW zv^sc}m?-$K->3bm@Q3z(E#L6&pWj_`t|?PKdw=(`p)+rEf2rfHvT@#)s%aDU%f@+@ zZXMomD`$%Ru*arXmrk};(75UvYOb~~vTwQfqL7D`dj*Cy9eJ-}W~h0bIIQ`?B<~V~B z`5V_(2R?TRORs)BTRNxXHuvp}v-|qIn9z93>et&t{#2=|{5Uizbx*LT!BZE{NO7UT zwkhd0Eo=Opl#iu)GX07!byaF!_i1XCf!Xxh5skV2cddA9C%Wb<>N#0BIxVp=8l*E# zt!(?_`pH$N>rMOhyt=ZgdQ;a$-@eS8Km4R#LDRU;5|fJopPzd6@JVwTbm7Kyr`+wg z&TgCLUK07~;Ks_QGYmFAjv8M6v0&hLNo-@uxVh6q@>9=TP;0nw+dfNm`H-LU;*ES- zw5Uut(Bq#ZFkvwrW}_Y33BM>oek z&6j0LPQ-ic@Sik0D>m)Ib3@PNdACdj`XPOt5?jL#ZAiCt-*wWtpZCJU-xoSmF(g#Dc- zOfeQ(3^B7bvk+RETUwc0nh!-WqbTf?-yX95ZuLCL3_3Ke9&=hJP?IBQ>>_DU8(APvVc#6e>(o*VIR3 zq^9b~4y-ncXnWuV)lpL`9c!2IM`{X_6#05QR_&;%%C?_Pw6Lnyh0E=7f{&&9XL)%(C#Ca|Lh0gqlsRhQ~*Aa=)s2kBhmM#^tvQ|+7EsOg5wcV zdTjC;6+(Z__)Pn=fX}r3ApNBRpV9oq27}KCp!iG9XFC02gTZGyL$NKN&X$zhY$*7$ z1@`ASAT}4Xf0f(D8&1wf1sGdkIFJ4fkNXW@^c%hu@y@8HL(n+j#NO{HPeDg}?c#ae z9$)_(p7$Gm5OLh@GP3uBd2)vERfMza2V!>#KSSwX6MmWE_>~~`f0N=Ggg>OXknksj zx6P!WfcOBj0LCvc&`0VQ9u+2L^l}lDhot!U6`P`l2DnL5yyqf z7#so^$nYp}gwzj3&?e-Z4)=?bcY^fiX>pV^G%PX#4F{WG#lYQ(R~1-U;7-IGYv9?X zhsrpCUPEp&#v}(0eyk`C`k>!%JfCr*{j8B=mUH9D;c#yJ`4oqC4{&%lRQ`ruQsQ)! z!?AzR-{x@aAN186j`hHwa5&cE=L$}5Qv3 zT`=G9-U%n{!w4M^&qj`(>mPpKL%-p=IPIqNxNV#+ayV`m`sX@_WB>TTsfOZs4&n5S zaIC|71Bmk-hxg_1t~fxHU>u=cA%}C@HKsVUYe6{O?r09@wmY8Vr#osN;uLc@_66e= z&hf(?FBzqW=Ryj_VI0?RIPPcM22PuQ!?$rb)CyD5w9pL+k|9O-jVlvApK8*Bp7R6r@Kiese_un`b$g{TbiY5BP z6hB4mGK#}IxJ>a~#K!w2oZz~gNt{pQxQ+M5lN6BD4GfGU#HWfc9wp%BB%Y3hP$_0;L%g-%jH3qx``AfWF(X3F4nj+D)bO@H>7RrT3IqXAV-lC-HNb@&oS) zE>Zd+M1POsKM1d({J?vN50oDKv~oD!Q^9_VPhRoh>&3in|3)}&4tE)>J9->F-bcM9 z{n?Gu!@6ck>0OE5f#R_Kj-&j*`<~exJ&w~F9XLf$e6SodQVwVLdt_cM;Bf2*#%nRf zVV}2(;t9n6Ulflfd?SbBc3~cG<8a(A?4Nd0{1EZ8hr_!d2IF#)!?B;UL|;zvO2Ypk z9LBMP%%2++hkZ^R#o@WzOz~#oho>xW2>kQO_zDQe^9sk051e{&IF7%Hr3}-T;#UYC zK>1fE{)bWAity1CwNaTJ%3>s|aCzKPpw~l{n>%Ml7{;o_9p_0-y!|4!{K;7JCggT2Z!Tvg!Q%;#pVBf zz%czd9Q&~*>yZVAV?V9vz{!f@-{qLG;c)C{H0ifd9FG0KzE?zX6`~(cI5u%tS`qF| z@mYjNa(D-%hqRQ#u^(8kG6{!qF(UnzN9k7)M@K1rA2PnjDLu6NkfX=f3%^h5DBg?2 z|CZun!kZ`#@4s3I$1)rW>~qw}Mg^`5u2-AGaXgvCe;0~xAiM{M<8kal`llbo`DC69 zp!jN32PYx>SNm1g+n3@Gh%Kgg zJ+UK6PWQ8ra6G5*0SWpWINTQ^A^;BSn2h2u54Tbr_92HU4*R9c6o>s-9mQe4*GzHP zH>%1S3D~u!( z8(3~(HmuDIkjk)uYXpk@V}gA`BZ4APJ+|!U?=KO@{&E_9KM4z!%CNtmb^IiL5yALN zkGz^BBKUtbVWAOX5TF>Kh_MfA2SkR4i_xbhvxrEU*bM!zz(Dl*2YsxO+rberW>J#J zD6vEqPudoT_ykG(!eIu<`|tnzSBNU&u7>*_{5X&T{l8nlLDvlx@KQtbO!C7?{=e+< z-+{OtSZDN687I6>U{01XjM!vOKz{%vhx&j`Q2{f$z9#|ZAS5|m-vV*mz9prD`t}ru z`nV2G4(PyT_+5aCY|13O+qA%d9T|M7WF8_oXh3k}>0vzPh>H1;lM0@dj4v^P?IB|~G zVyF*%8Ddn3Y{h-#11~-Pw01F4x7U6YX&>t2_B7Bzx4#}S+9yKHL(+aPY;Y2--Gk1t z3>gTS@$|zT@}>=zwOR|BfPt+c$?3sEkux2YIfC7iLZZ z8C*v>I@ezPmX7jD+c|Y{k*zAMelG8$Z>7@)qFvh{p@>51~GcUwdui znp)((cf-z6!h1#heV49}rzy3>Ai^-dI&8+k_hGpuzg8&Yyc0UwtFL7yS7}h!PnpdB E0YY?z=l}o! literal 0 HcmV?d00001 diff --git a/perflab/matrix/lsquare.o b/perflab/matrix/lsquare.o new file mode 100644 index 0000000000000000000000000000000000000000..f36c57e72e1c3c332ca84e61af224a973a2dc4ee GIT binary patch literal 13520 zcmbuF2Rv3^{P?etReB^M4ML(}XH}9-k%Ux|y|Q{d3TcQ+L!?wf(YHjUJv0<5g|zn& z?UIZpDZg_bpQHOdUta(J>-Rga*LBY4eLw50d+&3vN4BJQZZZOvgq5$b!97OG=j6s(DAC3 z<8c|)HRyPX+SRQb&$3H&{Dw-LW&VG&e2rp#LdRuvJY%QUbHXgz-J$>FU*1Fh_1*cS zJg<|EHJpPZ+frePU8KsC0{`jL| zcCW+v+ZmVg$G;Pee>+mb{kV#1H0HC>;eqF)17tKm9&KnYej;{1z>=e0jvCKeEjb>S z&>zoITi-jjWiPw?@g12}priJSC1;r=UMNUQN)|U7Ol%WBiLI@&O&O0x+x3%h#b0Lc zaqA*pto#{u*-Qt$m2s4S4I@$ThK|5Duh3vqds5?XI6bHr@UGqfa6jh zz;Ue!;J956;Aq~B6XXe_?5kE)@(L5%1URc=Iy+If=U+o+sGCy!R2sIS3FK#vR4Xpo zg)j0B4fa#==W-*sO1#)ee-ovVeiAsxzomIHv{w*D_L`zt7a?*?_LYpOwyrZ|R(F=% zH2$&kKyB30hMKSK>Mz_vc9g^~7-TTH+`Vo0yWc-&48QWd{K2~Tb$!*J9QpXnUQ)f* zJL1UJ3xdDQzc3yNYO7rme)l^gC@L?}r@!u&zC(Q;sYGtiv&+rQbInc3v&qfLo1dFl zzsGFWqBSDc!W;d*2i5H^opGo=WJA5Y#piV+eM`c3iRAZbPuCLIKH&ZI$^E??y_C3u zrQ;H&crqUO018*!p;QP~F~A z`E6%4wLjP1D_sz)J3+o=`nD>S`<~M8c8vF$X|mZfb$?ZePxYe|g*4-TmhmzJW)7bC zD?EOY$fqUx(;_@CFN%FXF670;tYu^RKN-hl=l*aMdK@qD!~SE`%A@-f)HPfZobpcw z%gatH{QCSK50Qnb5<1CCT%;`yecF>=G+p7*l!bNY`ahg&rkfQxYL?+4=j3(`hiJ9U z>CSs@dFd&he(V+-Wj5}nvDx-=b*q+&OkKU11_s7`?t4`4%RD~EmUq6TX|L;+%)(Qb zU6v$$R<1oS&P%#pQc~G4>sDs*LA{!388d&}7dgN1>+0*9);+au;bxT7zca{Hlox70 zD!xGNyta~g&X=ldMiG)s$L0@K2BC2!d1+c&u9EHtLZn<Vwykq}LZ>9B}weqFQM4c|ZduXHXrZ`M#smQlJ zmLHB6Xa!z8_VR4ek&N=85<@x)HZ`pt=k9HKE6(F(>n}#?WlKCaTVYUvk9^h0SFZYL zhWD7cxub8(-oLnABcxWNGqIp8Ywm`Q!~-(*I>+lqU5FgL`DDfJ)y?jiBPO~n5Yw`k z@$3+-AKB^fQ}e<5Rj&7*pI9@?tX%bAzIKjGT*@A&W6$*seFY1*Jju~X43*B-SwD0@ z`0+=()b1s!b>Fit3ex*%-i)V?mt8a0AA}i%~CN?pxS;@S#*>ApsSgYip zgthJOM}2sg8Mc02{L`Eqy{Le#RgvdKcYfKR5_{9k(Mvh1?1u@Ht+{>Mrx>#ff)Bz~ z1$1h}Y-Z{eX$+oTX8t@~*uy(&#mllua)MW-H=cH4`pR1eTaG#TE@*MC@`ss4&GHvC z(#>MEa<^?gV7ThlpZR;sPl&GwN%HtPK+yEA>I1=$^0z{|Sr4s*vZbu<{LBfl{3S}R-A&I_$t?7+KW23<_~Ar3 zz2OB~&g+X`*NzxoDg1d2BeL+xJ@v4-pdAJq#!N73iW&0MWAg0@ODm#lRc-3Nrv@)m ztruLZ@?@X(shpU2zeI8U0Kw+I0ZVOayT|{oI6eONT^yzknITgW__kt$-3f)sbDkJE zEwCT#p&owr#CpwVV&a=qa`kocZ;Pa_G&d_7`%HL@y2{F|JyQFMTjg9LXB=z1#`OZoOzf1>vQ$_#Wy;Js!F6;=LF`?(;ha)w&f@{FH)$`WWKtjtj6-W zt_hW{j{{7?0xrg$Hd`67FrO>5)?`=VcvdeHeI_Vl_2&$Zq~xxuL><(7w=oG0Zd+`azn z1@FPxkQ;ARz88C^D%}0{V0q;`q1m%#cKVI08Ls!K;GT8yks|+bVI{5++x|IrYLw)! z!cVt~&&zEMJ=EvNX!)q4lWy0p9#d?6S=L}|%Hi8d?jNNcI#vgy7`>lXRLEIo=zRI` z{_i_G59-c(_xMMG^tj6np@&SagqWzWn^h2HA~!a}KXZZkN8uOQPJxCy=b7)QX)ml?eA zp zesx>5!NB{zS|4TZohU1FN^YMd)ERVbR;zMYu%SoWN;$bIPme>TQ(c@Iz4se^^xhcx zGNoBOH0epw-lHjFgy(KLrGKGZa`?`m&#$KIh!>isYm7Xex+TrIT(x3Da8|TRx9fzOV4wFmOP=OMLBE6}iv* z04H)x_Ih70FE9T(FGj#kyRx*jv^?#m?y7}@RgT$?QmHnn5K=dn9;>@^?3660%Q3T8 zs_E$-xg>1zSe}{dE$YH_??V%a``2$t`b9^|N zq1^uD;B1v8QES8Pev3KG8y3@G%;l^q_8O)1M$Y)Nw3O9=ka#OIy<-WsAEYx-PT-{$@^VbHLmm!uo#^q&={92|IiHf}2O+}IYbli25Nn?zKa@7p~s zotuZhUNUa-?p=8qBQ`t_*K_Ho^vd1RS{x~fK&?b^>(Rcx%> zTzF7!yUm-@G|tlejz8_9PNEiZTQVkS`<)eyUtQF%{h<7j;~V3Pt5qYE@`vx^-iWmD zes@o!e9Pj1JN0Th7EYtAhk1OQly+Sx8N%q5cbTT=9JvIYkkO`kzTwh^WD}4Lgl8;rCM=!s;;c1go-H3YU#+P3|p4)C~YNab_sdM?f{+yTd zSHD=+vHwEot^%O)5x1T@!if9tb?ae?PA287;xaOS@BXttGg1~vlxs^RMeUxZkvytEDMr-p&V5Vg zfr|~DBDB6Gth?ATUh#{URJEm1h2w()4@Vi9DIP^jA0BpE%Xu6-&C$3^{qSePiO+c>&vDCy{H>T2pJ>1yka*VffGggk{&t%Qy~=s;@dnj}z)8R?Gz$Z(rx zWY9TW1mc_-ffy-)A^k+f(vY$gVng|77zY1x#sxENev4sBp~gDsXFR|BgM$b$0{x}@ z`v3bX2nwtUs|qtMoh;PdL@v79?1m7(S8USN7GipiCd5!bzra+8>HA+NhzK#9Uig4s z_`qH`8P&h1yCRDI1<=A{$}l1z{jK;vetYPz0#cD?jSra{72rfJ1y^{!uzmmxDSwU- zGe$>OY*2*CtQT(E3ui*37Djt>{WX0V&1n|8_>T_$qlZq+j|z>5^v8|haGpQc*FTbn z1^zE&CW_~W8caYW7o7}X{9}T7IE9(;{EjO;W*z94H!dD5-jBr>usD8mK=ipRj%SbX zc;I+XNVE8A7RR$n^uM$Gu;Y>yhgTThYozesULXN;yvMG9J&uJ7=J+75(*|LP3gKN6 z!{B+w1#`S=+^~R(T35yABnOO$Kdg`AO2T|kz$Z~5oU8-$F5Jy6L4PX6Igr0I#Zy3k zHpR0*_M*57_z9%=HIPFo4k8mn@mTP)p5orXGbsKY^4v}F+aMpNI2qIP6eoI>6ki8% zA5dHY?)$GO9s~I_Q~VC_-xN0^GMp##Nc_rCJQrkDil2l$btt|axGBX&fKR1(*L8_u z+$p{f^7N^0s7SxR{>r}ak6h4DP9ix?G#@P z;~)z2M&_jxxGcpDz|RPZllzlC#XZ5#42mxYKXZU%HJlo`J};#7AAx=xi{qyTa$i|S zaeS}BC6UGPeZh}U8D;~EV?X$L0GBk1>vdr!m*PIa_fUKe@In^Haeo0n#^N~cX5gnN zPS(#k7RP?bez?No*bo1U0IDk~eggd8U~%jx0@lGJ7RP?bec>6!Yrs!Ei(@~@z#CW` z`w@nDZ=(2H(C?sl66Av(iunbvL+mF8IEUibfe)m31#o#5$9V?xy)uk4i(~)aKwpjG zO~A*nIDQV%0Y4L29Qzp#{V}7sJa9YUWSwV&e`l6H_HPCLeOMg(C+{-?C~gG)BPi|w zJc{B|fyYza3-}6(djele@d)546b}QQ&f@HTWwAK!7umPFC>{@f3Mjq__(6)V2Ywtl z8Rr!+&evERRQV(Qn8mRwd7f#ZIJqvh_rj&&A(*V&Ll9&T#rFeOq&V5PDikOCd@RLZ zgCA{**8w-Ccqi~l6#oU>n&P9OpY{}20zMNs8Rt^i??IG)DCmb$oUG$$if;n_Sc<0t zU(MopUu!`Ag%l^($rBVO*S(7ruZH|9DV_-H=N`qU1Ah*jj6*(uRWZz0N}t@n1n{Cq zh1_TTxU9^gJkT53)bSgFz-W*+)c=OX(+pyoll}KweJqT_7h?JPYJx7H9Ws6N}?!704MB zF9mrg)MVTkSXaj>{eCzIDyJzf0-X3Gyz}4^fwjs#o9Q*kMKXhFw{vNm|#YJ$ksQ6G^5cmR$D*=z7_)y?66gL4LPjLg_2`rAAq+jb; z9QVr)^fyv`4)ATj$+(#z8(a=j`gx##gyLDi&rrMqcnQTX1Fv9ljL7fLyA&tuu$JOv z9e$L}6=8qfp|}C?J~$vMblg>No^YaH(+el> z-)MbBxbG3}1NIXrPM$Lzd*R-_@W@{Ha*FSVxakx>4*W31$-Gqc!r%158MrTz{t`d^ zf#d52Zj$F8g4mTA zh3EFd4*@6RFb?Fiz3}Q@{7Az7B6*T^tV?kT=&uFE7k71&NuxMmrV*~M#6RJpaGjv} zh+g=FUbroAG7f5xhf6QqzZXBdDQ*RcUZuD_a2QVha~&)J{u%O(f$o7h zC-0eUD1I8`X%xQ#@(hY!2ic9{4?vzv@fRTbLru?%5^#K3gG(Eq=u@1`mo3G~^@`Dq ziVfp=`=FY~?W%)Fi|Zfit;zF`;W3(i-aKzc(zfW`^`rs50fxPhB-abBD|3&{g9U9>q8XU#L-v8QobG^d@{r}Cl;er2E3=IzV zC(cn?q>1r-zU&(j7Umz0x)C11^VdY5VaG-BynPpd92g$08OcRo#<;v#NWed6egM}y zj0`@E-T$UiLPmJ*L?~l3kj3HtLQWG-`FGWLJb0_#`SgfBb!zxFz}H z=T|C4M9@EuV`WsdyQ=QLv~DUo(^Gs~xDgP093S7q==cSQ_0<0uh)=G^NR3~JJ(2%G zjh^DiK>Qd`#PK=kAoisHC5ZJD{}#kgqB>0M!7cwidQb72A-)mV`qrE1-h!6aCRa+vCSWYKtR+?k8|T#n(r60e9U9uc@BwBO!l9 J%77A?{{c=4kLCaX literal 0 HcmV?d00001 diff --git a/perflab/matrix/matrix_test b/perflab/matrix/matrix_test new file mode 100644 index 0000000000000000000000000000000000000000..9fbc0a4ecbe7a259d29048a1c411f07d9786d4f3 GIT binary patch literal 55496 zcmeFa33yaR)<0Z#?d~kySs`KVK-ftLtLzZg4h97b8We~j3q(Q^lMdpJ1k@N&aAh16 zMh8bnM;T{%6~>Xp1$A^#XV7s$+=I)As59>Qe!r@FyE_T^pZ9ye=l#Ctd%SRO)j4(Q z)TvXaPMur#_RZxp3uoz?CfvgilLeuv$qF;oDG2@5$sn03#tJ{K{Y5`v12!d2l}}X( zO4X5Mv`@Vbo@mzly;I&X?cvb5sVgB<);rnpaGWY}Q<0=#*0Y#(;Ew4PT@{_1y5*7f z@>IF#kbdk_csGr$N8KEpq{=~R^Qm@2PAbgZ|@sn;>hcIe%dZSY{q^DpBFbxwm4D}c9oej4aF9IQWo^_2y`E`DSE>AyPr>AwUo8vW_FYyUB< zx@y^&QNyasbE>Oq8rS5k89OFt%&6Rk+T0P~Cj{%0pX@g?T9Qs&I?h2oukA7Jc#Au& zfN6gO0j9J^Nof8_@NWPgloahRZ1zNa0w=+*JPCgAN$}U51Yds={8cBxpLY`c*T4sH zc0A?)aiV_pI|=@#li4*f}fb&G{PCN;II`GLNQ%vQ+;c(|s8S=!R z+W{}is%y(umMpIbhpSdqh?0_qa4F!j6)Q_BORK6yWqn13s8~}K77b-9D#}Y5D#E3e zl~pxW;R{7$Lv=+(ooEP`*EWX5s?ut(0tZ-W8QIi_OIDRu)v&yxzFt(;)mPPoD@8+n zX-zrE%GQ*YfKh36)kPJeOjWL|YYdl($_whN!WDviXxwAO?81U+(@RF=j?5hu$wza; zPl+H$MzJGuM~jkzqPZn#ykdD(L%5>8Xzujt+M0@@(q+}`>GD;zHIAGmiu8np0&~m2 zyfQJjb<8c@`KNVAiRb*Y#Ltns{v@1Pg(+`~9S$z7sw$CQ;*iZ9T}`pufqySy zLpJo~Zs?@ToEMKV^T?z)u?ZNdrG=;D4rp&jY=_2{nD}4YgQ%rw9?+ zuqUjy9Sk)+?%gjLw~c-V=(c`;!8Oo(DsqHZuk3MHo{!mj;@vP4TQO*9j%k_HH5j89X(gV>j-lR zJ33Fo7ZK*tb#$tP>j`toI+`cp^9Xb4KN^(qGQwQCjwS&{`rkUJzu})0G{3Mk)ckg+ z>D^=Vie?VqGyLh$#z{-i>9+K9CJXUJWp1GNddU}fsppLr^%NTSRX820p9~_Z-q&^{ z(0eTj_d6-*H3*Eg91kS*9GxY^@xbC| zfq&t6U{QLGgo|IQ3=BJX!7vxwJBrjKKOwiQi~_vKUFzkv*Yis|i`z z6WGKBg)B|4D-r_s08%7u2eeUpz%ZW_R(ev&A|AWh2uVIe6!$+$4MG$dIIqGjemKJ&9z5bT>O<}$n6rii9zdPCHSMAAn1ZpR_gCrC1N0rwZFueS>7f?YnW#a{ zMp?}hp(`HW4zXREI{5&6v>gVs7v^zErfc2fA?-(ZxAsLM7nLhM-U(Li^mh*sN`IJG zMcD&8=r7n=e_#HKa|rrY8vG-{s>OH~02PLy!i?5Un3a#vvF>Vxi+z|EuD?yKTI07( zA8L)CG#%9%-)X%PWNH$`uv-pxg)NxIp(>1?bX>j*-NL05|d9~N(YX~}p<>Me$LhNOijbhQIZ8wec) zuI3Yj7(FB)we zBK$Te3Nstnfe~!2%gzgQj|jRLXpZZ{oyHaQuXvmTb|d?d zBb=AF5bam!Q1is>5TlO2hnk<8+5D7JW?^$-_P{@T(FihbOdH&L*0CVUjVJz7W#HDitZ=mOWf zAo5h+808qA15ps(bkSayOb%u7UNUcWNuD^K59j=PsO-y7%f#%~JF$pJ%wY^*zr|go ze?D@-8ir&AzYv>YCgIPmBgRP9M<$DHbGmjM;mf{!obQgfMSxMs*R&xLpdT#CCe$G zD`7h!r2>9b4-^KcoCdccIA$=UrXWvj_|w<1#)V^=tq~vim?+VF4NP=6tZT6^CI(g6|%%W40bdTOh}^P%ht`sG~Em@yY(ks3a9^IW#TDnEt!938@qi z6Du}2l7k|`e>=K{W9uq%2dFzLaw$xr^;>AlJ_lL89L7*f;tzRPpSVc1rZCWJ!mdH|cJaq^?)_s+dewZyNG;Z2@CuFFcj-Lj zmjXq5MD&Gw$bDL4*uyJP_5c^(;d{xXaZZ$E25 z1UG?^95&&+9-RYRNeTWVq$N6_5x|1|Nk_A0Y8>e-+a9 zg~lCh=z@_7HNEu}*Y}W-5z-C-TmNOKx#ukMfmZ2$TP7XLg`(p><;YqySb|%$U!!+h zz-|}rKqFJ8EdW9tgvu+l{?iFvn!hh-{sN5JPG{w|u+^G_BISEqCf!T+*8&0dZ;;|C z>lq|G(22L14X^%aP-_>M#YaP155nt1g20BKlc2>~hU%el*3Ur~*m#wyw1brzpKIL@ zuMT5w`+MsllMJkQCcN< z3=Q`?0n4Osf!rS&H>o8D-5eTfp0o}b)O`(j>MD~nq4oQZC5L?u2WjzG87IvJG1EsS zm*z=J99ZIEY1Tv{VrGQ9AGG1cz;)MBS!GxT-ZG>^=RyM?fdyya5uC5YY{A54Wn2TR zOz_~6X2UN3f%9$D_zbp*v-qD#vE5R<7x%H=(73&gFF4Bzkc?**y@T6|$B{mV^EK|r zK#P0jeoAcnA3aSe@9r$+yP*0Q=i@kEj}<);G+x6_7&_s64A0t^|$|#{3i9EK>cGgqZle4cnl)ql=_F7i;qzLf^mDK z{$Z(uzX1{Z@f1?%d=XCG-YxphfiXUdzs6X=k1hIp<&3~U+#N1@{rC*-fXxUj-4iN% z5wSz0AOAwNCvhJBd`951#410W(+q^82NSV#FrIdcP-ElhdVnzK^M<%_!_E&~5Ah^& zkaI5-*ub^vc%TGP4YvW4MvtLnw_*ULTsw}o_4V0OiR@+@pVl>e06^C@h8I{zuI=LjIzl6_(G zt1&IaWa+P4#%tqR#%hzXikPb1LX7j#0)Pcgd(8rJ2Pf+d)2lwCKue|ja*M4x@`ql@2@xcSP zzIWBGqc{ER(?4DD{%`L3;1>^ku=~dM@7VhOmIwQlfBL6u-+lB-WF5jk-}>IwJNuQ# zyfEhSd++L7|400S!S~nhesA+V?{D4q{>?jslY;$nM^p|CzW2!Q-n;!Og&+3bhNcgG zzuhTwWI(;GYmeTyr4Iyn|F`SjyW!eC?@`JtgTdgWlN{ikSJ|;X6oSf~xSW_pl*7D= z`pRHgZDS4IHx1(D+4F{_tw!m{d$hG1oF zy{aDUS53W^)m8Y|bzkl373C^dx_UX@r}ZuFmpii3x2kka1Yj3tm4-{JgFw~R=LUmw zY5K>yxVk75u%{nKsz8sdcP zjZ~OV_MF$eN?$M!|Kr5;c?*x;x%q?Zu6p<3T~Ji+n98}Ki*hAi+1&fL{_*I|kEzm1 zfy&TjC#5SD7q-_rEFh@J>nqSdsDtC9NbA`#mF0WTlgM>pO?d@gPzGH?Df&-!t*QvG zs4WlD;-}YyeR%>Sl2<9FUs%?V>k|bW^txO*N&r^l)#h?F6v6O?broQbdebTz!ZBJ? zaD69jQpeF_=~R_JHs!XT@p!~ZKbxZPi=6UMZE~IrgCh!SL77zzt z!)6Kc$6jx1GvO?9#VfR_#-{|QCr*Jkmh02<`~jH zBQ>$P%1xXMq+75ZGy~}$vFBBX^v_7wBmDyDR;1q{-HFu1PTJ#06Og`wG!5y;Nb`{L zhaQKJrXjrro2CPi&ckLk?gho|NEajJf$e2^XcsLI+L|P-dxFQi34JCWk7~uk&{cm@ z%<#y~scd9N;avMFWGCLAG|QhcClG&ucdeMxZNlJ@*?ov+{lz%fK*xQFnBh-a>i5d` z&cgtkfUhMUM+oQDI1dBY18s|G{-o>mY5t7sjA{PhMsu2f;I&r1KkrIA-#@m=Gs8bm ze?87W7MOhhKu~}ZHKzH!`SJJ#m=J%%`2fcBd8FjEk-V-o^8LXpP1I?kzP3$m3caYPgd6X);;U$Uv{CntWHQLJUQ{p|ow-{3~o5WmmFZlQG+W zL^JM171HO+QEXwdt2?8M=ITtgzZkTWv4f=~QaYg@7y;ZEwso#QsmPfV^b7N1Xes|z;AV8d?F4RY z2i)Vp4efw?1-Kw^?mP74_c3q-f#Z%T58?#;#V+9XFpq zRn-}VaseMT7YPrJLkPHWz^x|&cF?5j>)c8tXIo^)-;VLHaT5ON_(=mlY2g3=8gPFH z=Kk)>o%_<~qWcN13ZzpK0vqx22oF#x-uaP-`}?x)4($3?yvQdH_jhLWV?5mQFMm+s z#zFW%9!~fpymUl3oJ4%(l=EJNM;}K}zNpQEzK(~xrYlJOE|&S$(-j8Gj6CK!^|_|= zkk1E@;w2Y(tZ*E(i+|Y3xN>v7=i&NsxBYLU;gclA6G|CQ6+872981{MGeSTK4|dA& zMxlh=eyLXe_pk2n$2+z>#<7E0PI{)3RyyfwC%w{1Z*kJQo%D}R`lOQ{a?A)GQxCHhq7_iQh4 zeLlg#kWUvAVIhh>!?D@X?%e56N6}vmDn&onnS;EwInkl$kM(is*9H~UC+^(oP)X7E z!DB+je{Eky;B$3;Li&8XX{6{~6~EAd&IQM!rbyyms&n9WY= zhq~q?vAmwyOiWK9lAc~m^S+Alv2+i;h^5)DOUU{RNiWmFuZ>Z~DhI9fKE{^JF!M{% zwDGg<%o|&9LB)4aqc`)xKOy&uIl?<0MEW(rn4>(akO-Y}o2U40MB>#iB`{V3NjiV^ zVvdtQhF$<|nd2o8)Okm2PLRMrowpq3L{NwYrZj%@33SSf|qr%muPelYTn+6$Ms-;A)*O2bqf#uR^j( zzn{R;t{0$Wi~a-2N;0niuwCy#m7FW3yGQ?;WTn2-k?hp(CfTy|D?#?KzJx%T6m^f@ z0+dNWJP|rN-!~tbHce!E%n)*(bbn7^`%UNX3i_9!f(EMu#*^NQb`gh9v&ozu$GBfvJi`rjx|&v=?` zlCaq=PcNegMe(e58_t0ubp;YnR_gUg(v!4!?=#5g=ToqRxLni}dND9sq6EAm{%%08 z;*{#sL>Ij3E&K*+`A$a*JuLRzXKZquyHrFXX7eB_BEWkhAAe}k)B1*0jP2*M4diYD zvX{;7`HZy=(jHAYf~nI{%Xp7P&jZmnPG*g@RkEh=-G`FC$qusAL9QlchJ#$>AfHCs zx3`0wuaJYc!$tTe=?N5k2o1qbwRfXM4@XuEy%CJ;I4^Z^h}g$Kp@t#nufS>j6T5mq zvQlIHFGxY_$`D;`U@Igx0JrXu`d3AnGtl>oNdKoaTyV%e5v1DI=MY7ByRzI*0+uJ> z^s?qf-B`-|>r-&XRf&t`l&J==3}I1>hg^QFn` z3|#;xNx~(N^D@ck3xISJ!@fa`_K_o+=4Nh22@h?(F%&9G^uVc)LEV8k6O#Zuh|I4D zWCDQA3(mZ}YyhXHtEw3)5sCePOhb=Sa3&QwrQCqS1i!`s?*%v=4Qbj!ZMZN!cqNuc z5xntYiC(}?!ZAAqFG4r`qJpNC+MDmW{BCcqrY(#AWv7o%zq*#@eMMNpLH*aBYMN+a;c?5<{7<4GZ$H0)Zx&SSy z%!zhn)C*YK$}#dz!06Xylb{bqxd%=!{Yu>kq9y_yOBB6J-4vqc1DuO9!4InQrP-bb za2ZjfaH*H5Um?f2Ai6c-dd@#nk;U=6O3?OAK78(^Qk;`ckN6E}xG&|+LHRj-=og3J z%);rVdpu`2JF^7fLZXJ_a?Vt8xCXhkI5Ei<%enCgfZyUw@S)lgFCEn}fbWwgES5^y z`a!o{amH~XE=^>ot^>$m#_Z-yCmmBOhz4L1`f6O~>kTNKBO>U@XcC16+twxav47>3gNwmW8xIbrNDAR156Z7(ioxAGgUl$e=d(vxu3h)D*au6pDwNvRTpq--hYi9vBxMwh)P9vn9s+0iL@Y9vO=1rv@n0|h)q z2=1S;GefCfF96f?ZD4tHylY5Cd%B99brmn`DooZ@n5?VlSy$1ruFGh-w~Cfi81CpQ zWVSM(HY2AyrLx{Zv&Lrhwn_tOB;uB^KpG`1)GPN0Il!9U3p@8xcA^21>2NlLf|1tR%E z0_o`wAn!$0&opH8Bui)SlRS)1tWjX(4Q8@|%(dPDjD(Yui5ui+UWlxbNGfplAx%J7 z_po%ZF$Jg;;`Jo+Dl#<9@ksh|uIRdX7E!vn5yY8Zt;=sfn0y8Rt!u)QWGk!!P-q#( zttjynq9iMs8uA(Mo}VNMJU0W`$3Y%(kg0!%(7vEXB`|#Ueu#{hIpO~oE~yMK;#SJ? zZmNZr1KO)h2MBf6m==T^zNLZIZX7h->3>oi>ybu(AI^i@1Y}c_@jn4Lt zeDVNT+e!)3n{c_F2}wF;u4(3VW55{QHKG(Lev#N-6l~#0Q`!mO7Qx*MD@Ztn1R#CMf+PNYBIpF zL`?zec8OXFkb6I#OMtp7k=9{sK6qJ|yB*EJ_sWAt+Vu)B-$7-IfiI(AuR~)nhSTmd zyePKcMZvvb_$!=)SdefG7-RFE%MuSd^qe#HB%oG|yj^gfhwo=%;t6aivv4rd~`v#wTRn?Q6qPU9d5>tu6x1Kf^NTI=elwVvz92_uq6C8mP3)-}>v zrFC8G(CBW)eUy4Bf1V}n)<%63XVki`aVm8-S;_Mmr&x0{eH{U=WMC|Lrh{Kod5idg6DiAw$moH#mD6?zZ;iFtNgUk4fcxN-!x)mocpAVt zIDNmuAQ!0d=GNgHnT;d+Q{~_?&Rfa49PdJjY5{l^QSqoSH~t;8@FKwHi5dvhX%h7g zz_)Npt0;1;V$Wq=g3cWoisW$#EJs?!nNh2l;Lzx9#!-zp=OC!%2!4Y4AK`SYV&`Rv zA*WLN5!?w!mUI}Sve4z}W;m&MEY6fxv(hQo+>A4w#nKsQMz;AKGT*dobIp&pd8ON? zn?YrHlyo)SuFQZ!Zqf9fvG{*H~&i9D@}e8!fjSs6R*ToLcO z3TZS$AHoo7Q_>9fBja|_svaiF^RElw2@#y=JVh_s)h+EBW$ISzVege@nHaVmt%Ntgp9~jKL;AfyAdOy*J%uMjo}p4jq1}K4QX3xFFmOxZDCL9M=O#-9fClAbA@ti z>24-Qs1cESiE`BDy~KF9^WO4a!u3<`>gGOC7{{sY?87@7>FSmusO}eR6|&pv2E$f2 z7`D2>u+NIj<9hqn%r;58CR+vviz)f4|Y5Q)5P5Q(SZs(~r% zBp9TWG+qyPR}fA;* zsafGq1HvoU;x$A@FC;hFn6T(-{ST2)X8HP22SVY0oW&+7cb}j*gWJhgg_c0BuaJN$ zGpXExqU$!)ouVHlntBi!?*ZYvOZQVm3ogdwaLJ4Cs~v4N#G7pM4)S46Smd4A24&Q< zG#JDMhr4g7ZT&Tbt$9^3@~7`~?!r*Ages_)CXI{eEIK*bdXSrP6O6|k1NyF+(r zMc!bxH`t&1Ufyy|E9M=Jyn}GxHr2F68n#g~H-qU!O0m@O((;uyz}nVgv;Zk4W>Jc6 zXfh3_lwx8I&4f=Ix!vMhjlgZBccqC9j8jvsX)!fnS__PMSgN(pU@cKQfSR62%Xtvsy+mCG)C`F_0B|49LG1T(XI<0!qG7GTdkx4uRVa89 z6tCg*gaI5!=AQ&E2VfFST&&)x!Vj0XFHXIaf3s&6g&qjB5gcx{werb^)@!uU-Le!qji3C=iCn_(;I4>*$E#55tN&&3G~9j5iCmJN|6=7-exo+hibkXj`cr>06$I_jnqo^et~9^BPX+TRubP6P$62oayiQmQE`f zT9FTD(cQ; zkB~26qJnKx1>2?ywoMgmn=06r4iv#QYb7|rHuK%Q@_iQ&GuTE;(}3(A8)s|R05i%n z5m($G;NraRqQOH6=XDp2ue&&JyJ!Ul9JqWVC8+Ub7w26UjqkcRuexX)Y3EHBZ8P%D zi!Pdqvu^|@r2Q5=o!4A6zUJb*<)ZN|7bniv(n0CO*;*s=1yats9!{K1p%G_mtw^0X zTl)j@bK@_>b&YmAuJcls;JQ}h`z`YWE0JF(*H974XD7aa z>IUO%<8I{qTX12VEoTdmjI$H(B@N^3MEOz+<7|U*wxnh#D5>#MOK;?RF|hXB>L)&d zp|#~nx*d7rPF#Bb1{W_c!k~dzUt^Z0Bs1$X9-fB;hjliPsSYx!UOmCErlV4tgY>${ z22iFu$enKWyMgQ?9}&YHy2#%H+0~)k?jpwknc*O}xX9T+c1xj*K0~+-d4cSaR159< zjIW&MC-PCI<%8Asba)(dN-Pt|;%;-$8K2SWAcG9?ZG`xIp6itspQM59cKJ5elSI;R zMG}MPyCm|6i&Q09ya})$)Du4^o&zuXa7)GOk&dY^cOtx|ZQl;Aub1~_IRsD%{LgTZ z`uMuS>%Hjpv{GT82k38$9ZF6=9@^G(NXf?(f0OaGwour606kmOiITr!%3dk7XS+>j zO%Top;r_x-gxtdS1y~kgnFDC|j82sN;gfF{%d%abmjU{X-=Q2s8F6S^)6w5#QvOEr zO!mWz;?zevlvJOv$73XS*bXY%M?PusJ%GwP+-Cam4n?b#x@^soZ3u3V@X)rt1>L7q zirS3L;5Qb{jKG=vEEv=VS#}n{`8cuRe!iT17Xhfl8P84mdbyTNaAC=

54$)J=uHmr2S_DSPs4@x zq_UAKWFya`fxobkYaB}1$R8X)Hu9JQ6r-lw$kPsvcBdb40NF_Yp3vxVM_GoO8Krt4 zv1+N7OV>EtM7H0x%hz`X8JII9mrC zc7Xn<+5}oC2CwW4S8cNs!#%vUoGj8^!?{EDPTqA4{0KF))1;5^JB+48#O`tfO2@1X zD#UdWJQXXS&5!Aw3*->Ul!Y^{uC@W@$3B(=EW$bHg`ld1_(C4>6<|5b$7;8t{AQd8 zwGUH@0g2iWH*kPw=#EAhF+7i|-DJeT7ua|_s1kWSiiiZy5cxt$cNHcWDrhiNFwnzm zs2IUeF@m9j149J|h6*XrI<&eavmA-Xi4mALuf_#67$NjzzLy&x48{n__`n$Dk*byf zf^mwk2~=JgAsAyNkR(F{W1IvsWQ<^pmw*Zqj0qALD5C^pq6G3}m|#qjz*re47?UM1 zRR#)1z64aHU`&(1JQ*q&(wnoOl7q zRMcQB?YbN#TV&W^lw_U{V7rVPjB}-Q_sGD(DD}-k9Thnk%hDMGJuE{9qfE-JVh5wb zdofBL7ak{gxCxblPVf-N;K7`Xa+ByZjvl2-$CSoljzK<-!GpPsIR+1AHL0m2NzL#? zQl|>@dJ=NXvz!QG6Y?+~#-OmbX-N8bOimoj<9`NaUF2q37qyw@;bxlU>CFwizCq3Q zrC|bUqhQ~fCt)xu32i(9uLQi>d;&fRzsknyI2F7znR^hE@?M;Y_&Bu~K+GXP z2XPMIErm9DIQjGYdjBTXsi2y|PUj_ID&w@DLQ7Mn$O{3^CyFl<=1Wu!!1IV2hI-Qi z)Wt6VHsSPFA=V}DtQ6=Tr@y9gm6#!`RRQE{b@m`sn~}_Z+yL-eoCB``z_)Mw6{66` zlHI`m0q39%5WYu$U~p)6@KTug5GeIqQ1CJ+{*F`M2H-h2n`!BcjZQ@?vvKNg0H{P}83FEnT!GA`1U?6_6`3tKtsek9h0LQk zlWnMNYCpx%5dby`q%RFNjML%+_grL#;>2N6kvX+MY9j;8YcHXYUW8IE6e9;R z6#OihMM|K|N`4Sj1?3 zgN8_;D1(MKXf$m@n=g%Nh_nj6B{%`6G^U}_n63nP9?nea)NRM46UvcV{0O&BAK;W) zw7Vd~lUmF|rY8YvaUwEfaRyjZs&YA?B{-!j*E${Ok5V2|mG`3bE}T-8&m;2}oIDb^ zliiygU)xtA#N8@E$0?@lcyBrRoR0UF^A69o*zw+S!lC0-b(ZY-o8bH!PTBE3vg1SX zoG%Hd?6}*GS%HT7%8oZ@;1|+3WykMAW*Y%^{83~cA;6BmjLb_o1FR`KuHjkkzk!h* z-{W+kUB@SbVk}PC@iJseaq`G5rv3O6wM?X54zIr#N7T!~^$%q>g+26-;m~?#5<82u zsZ}D0s9~vVL>DS9^+M5A8j&#RGUS~g!-I{8k>X@{u-0*B+a1wf02PnE$>`ZgWa3qT zps4`CB5DE7gz>oG!Bnu<8& z?-P0%T%@eGarp{osz~F{6ZEXqPb8uI&5x!+8&rFp(yGu#Q=yHfUf0u9hy)~;ENEaF z63O)pR7s}>OwolcXyX#51LZUJ%maj@>jn%5p5K1rsCB(U^R|O@b47L+c6%L0oZpK# zE(3XXANKzj0QusH!R^oNkwpC-;7*)omFVZ;06Ys|KN07NesM%3bVK)Wii|7SZiej+ z=Um`;Xj{={MmpPE0=lzs$~H6P(}~pp>xp8U8S=&Is{yXZi8eE3o7(~0Ld1C@Q?@w{ z?@|nx?^9?6LeIDiwyq}%!#_TLLj1V+f>b^}F#;k!%Uh6&Y9Z}X?ebI&Su0z_$4`zo z{4j_vF%@6{mLRRyA~s7?eDtpWH4PKgH!2o6B~wRxalm;y62@y)dDVgk~cY-x8D`7xpLFzbU1RPO*Nmuei^9_cDAC&B=hM_Tw$Iabct&{l@^)P|=n&>o%^Pi+*(FN!}ao-Fg? zfhP*qoC_@|F*~Y!1hv|vfzL?hSaDP=bx`se9goR9hQdtDtnt&p@odeV8&Rt_+8Kjm zjIk3Ws%cIK+lrd3r+rn(gULdo_S55JQ^PfHObyHe)J}A*#UGief%b~Z(x9meYEp^^Q)a&a=pMMSWZc(|5ES zt!(fT{UZ92R_hvbf7wIFav_Qs&<=t5GB6_c!md$69u%)F#2gh|kMr^-|9IzB6I0w*LE zoQ_LRFgi`J>%~>5kMa_IJ1#xhvF=1IJ$Nn8HBqc8?fZI_=By0^A{H3Y0j3Hk+MpB8 zm%(bj3~_zp&_I77uFf!iKxFSZE)gG_B1LFAhzK{V4e)|)PBr01f#`VS+>P{;{ zY~TBz#6AZ^9p%8>NOxjAKW2bhKxL@qc6dN$5VLSd%oG3Yh+3){ zamTRPh!3_;WLfylt=lRGsC|5o*rmPIKoQtEQjvMQa{URe!O1z$$&n?x46-KEC%NH0 zMCs(418UYMx$`DD8veDnQx)Dj^|O4GJI$R4>8=a!;w}`iVMzjM@!Pl9#?hzD$oOH^ zR#6oe_ij;(UI^=El-@mR(LIzY_f)R3S4WHP-PxjpQB`Btqq9Z#=^U@a1ILE`+3ig_ zV(L-jjQCg1-~Q;D7|_m=l{*>e&hV4C->BIRacBOCTrhT^PU3)FlcFI@=ga6@Xi*E* zBNl471_Ey#u@Cb^_F=hWtZq%SbQO_A?@g6IchH<3wBAl;78e~>&S4H~d~{e7r2Sy@ z6FZNF-@SVZxVDw#I-%tD1C;_%+TB$lvVf6x9`mTVj>1-`dmqNd!MzQYv%S5mjfNC* zg>yX-THt3G_;(VPQQMmKtv`{8)V)OXM#r$hh;t>*OL5+eQ`dUyfw&yxguL8|jCLOj zeus;(iFzI!jlT=c)-1zrOtrmpENz%I%Qm_eOVp{UR-CQP zLE)*$Bw0NQ*IB7^=2%(lESuOBRx0SScDWGVgtdClv4+jD{OhcQjdS`~+InlWZOjNt zcHZTwX;w0Rxv6Qu`mM|*mOmV}SUVF0{;<^*g@)bW5Rp>0v)(=wwkKODs;U7=JVn+i zgp9;uyc&y4*y=0ci*{MVn7PC1&g6c}V=rPak{4M%dl51}sESKeH3&_uxw@Hs}T%=i=kzTwm*OH~SaelE|N~tNRly4$|b&9EFFSoqtnPvY{rqFEC9Hok^ zR2-Hqfg8}eX4$OcO+hR1b1LVcju8&pcids;EwWNsL^jDqR>Ccy4qFMR;UoHft3MLM ze#D_7&YlR`JjvCvo6sfJ^`<(q+#plv#Y#khRtUNx4t*cnz#FjIAq?#25H1b)B+Fh8 z@FpwK_~qp=5@JW8(y3u!A8^N#rjdCL$IpUfKQ@%IBep)c3kHH3_1M#`WbmUftmm;` zwvuEzNv4^yYlCCEHb)wXZl4uvB=@t|#gNJPo6xpdy$NdgF*9`mlVlpU;*LYU+Em%= z;eT ztSueFzme0Wf8e7zBZZf2T=ZOAOz9d%GdGzRYdHLa`)d`WFUK7(v1!J$0@*aGXp>2g zVf-F8(E|$e(Hw?Joxp!|vHMsVuxAeNe^hfhYgSt0(`8i{8S1Y7(YkOub~n(a!}9u9 zS~+v9gi0%J^2B%X^{GWxHwb{=N%ewc(%@(qZ~{TiN&_nRE;O&qOlrT_3fdKJiS`fJ zA)E?+J(I!_8^&pxnwpV{Aq>i~nwSc|EbYqP?CqOsr5WWEHT7w0AefI^C#|rXCKZ;3 z5~;6UN?$8sn`uUAl%8|V=&6X4U1CSMwOX1~?#PMOkn60|*IB+p*2MLehn94MG$idb z@4LZV>vUsHF}mcv%QC!$(tt4t-R!gN`y)Mo{FK={r<3s0q%iL(;WtU)Cu(QAGn=kf z&DnFn(Kt`D(q-SVKoG9Noa+SyI?;8GDpI;pQ%1LQVrT9|tUH`oIk7W$B4iv%Z^ovV zLpzTsKl7e74(c3O{4UVOlS0Gd6oixL27z*lsS0B~JsReb_ju6C8H<6x6GL~ly(!XZ zHFVDKov8a9yqnzSOZ2H$fAlPwL%&-(J(yXRoIA)YPqX1OQoYN)%dLQ&h!O2da1lVd3?IIm z)rSj_-+Ms%i3VK-4rQF_Ckh zv&r(mYxObwBuX&4YF1z4++xiBew!>f;B$6a@nDy^$@1L~_*4s@s^|tp%DZ!jvFt?v zKnG{Tl}xwav%Cfj+Ug=D+J)UMjGQr!Max01Rs!9AQHe0e(l=Yl##yM4&XLuB3xDFQ z8pT#T>w1r?MRpxHG@b3;X5WE{XY_(Ipc7dv#ly^Nl&gS_WdU*=LWx@Tb^A}=CJPVK zxET3hnKmq#i<05JUZKO5e)co5G;o`Xu!bt4vto%hM;SRpG$YJ$No#v!h%M_GOHX67 zwwEiT?&T2Z{kk?a(TWbp;9~VeM`$B_AVvf|?HO;RDa}Yx<9Vtx!g{|{X?FYw8spWa zu!;YM*~Q6st>vYI+3f`*WoOa=YiMjyi(4CHF^q5w#!9+<&BAJ_)2nmgM(_ip@lBL5 zgN48Zj+arIX3M!7W;Sf)FsZbpVv1=JsgjsQ z)o6sEWuVh9wlU3!3qmqX*;MJ|%y6;Qbu*)(q~j97XBMWt%hK}fftIBjv=DM*?Ln7p zWk8m_L%MImjEdrDGk)HX4GmCkgE6E+86F zAKI}Od3VESB;EnQu(wGSCd$RzN|TNUfi8M)S&O38vHV%~#ZnBYcAvOkT8m--nbv7% z&>mD+L`~u$C+-0-FpM;9d;6kAvYh5l4BE7dbL|XSwOfa(jfgyBtGZ@x!)>D5Ru(#A zIL7TRE;fQ&3J}MR!e2Nsu4)Hz8wqNuPvAgJR>IIMVEmQAT-%73gGCiGG0UQ2<|;9jqy_)`k`7mT^)&G=JbfL;Q0jJ_6;HQb$f$RYHLjT5z~9ET?6Xs2 z$4AbfiB@2?m3+44n}hXo71q*It#~72)BW%fwVc03<{W!dxY%A~tiD6K7aPM)#cmQy2gl}^}%*4??s8>Un*rX3uqx+A7pKvFo56fk}sPB)=&T`tz+ zMy!=`g-S&a(5QB*oH@O*Byi>oJK4 z@DJP}n^JQIEjCO<3*H-K6CUW9&P6p9ifzih?frGTrq}@t+pR&VqLPx@%F2?kC@DC- zq`soGyreE%FG|W*lvdS2Zm0;0^4b!z6=h|$tLnt6(v=k@_|ror%NiRllyz2>)~~EU zmt-d(aoNhoI`*Xme=`Vw6sZA}m7=n)9`aR+hGk?$U0hIK6|RuVkQIaN`i8&q1HGV! zrApq0($y78870-VWhKchd|c`)bn;bR>n+LT5kdsrqd32GoI`mYpH0v)5jWh)!-m$*x6@HfOTL+}^H zr2K^V2d%2ZqNV}{1~SpuP+d_`2R&lIhL0E(R^85qw*7&AFx1R>dV6oWdia;d-4ykiE933u-f`nrT8OeQnnH)sc3-Fl|vKZ z(n|b=ud48cC!l024PmrbQUd!btF0+-K;LV~ty;<+Q9p`6w5$r>>;Wy*B2Bk?7XG?e zBmR8ag+l&yF_b}M%(;e@0{U(Q7ykV*0fMp;d?RLlV`=$38gL1?!)Tb1BO*ruZbfpb zX~3U^gFKk;5%y@i5;~KN@Qs=1h{80{B4=D!%$`|PQdBT^Ci;X%q8d^PsVw4dMs%4wPkQ&X`KNE-{xsU@?js<%aZ4Lw3xT<80yk02Q$iV})PE^R)Ko+iXYc~k2T;(MfsydiJ z71gvs=(!q%0t=~3%1W3FTT#2JV%YK(c@3uwtE;a)4@=gtvWC*SI{sybVQ}$?5S9&F zRT{3ZTEnVfB3Go`GBK>Np?+9()v{sB%gTnqsh5lyl~Y%igTM9HxF%oV0X>lA;1F zU```n-Ee+mX+3ijE7BsDw8$YXa!HG};T?7NFrNWKJ#QNdZ}n(r znzwnLW%8`&8ZWWecpvd*>XAhrvzg^_2Cq74_ZaRyWS0` zzeP5$YRjwnsC7_sIA|TVtU58G-!}8G^|s|ej?d0}IR@`mxk?8eRN^776IwCw2eZ5rS_4cpqKm&M|M(DPgzq0SMliMT7^FHzf zdVQ3ZJgE|;0`F9I8Ja?d$eU0>b6&C+X5S%NsX1hVR&GwXL9_2Nb|mLYflxA1TV{?_ z$dm|jx>jaRSIAT#-SIaEjP&kE$=#8f+kKlkatdlrnH~pjE(%o=B!~-XRKJ|7Ijj~! z)UJ<`B$Lg0?P`3I$h=x>?oiOIZDzq{Eruy;#4t45cZN3K{0_W9 zIzLJZtxI_pfVkDrmYA)o&H|^-0$E2}LUk$=TyOr&q!Itj+-ch0@&?fZMrHmEdH6Nt ze`h}L;su@wIET6CKkBudE^ZxxN{5ZOJ0e~J-T0Xq3sVMta|qX$O{o24^F1?Va|mB| zRiAqWqeW9OT<@7*v-oQ$>IB80g2+(%8Lq6N2gadCPi>yrv#;_y%9u<~rvg>Qyg~aE zpFc4_)jq@bs?5){2h^B44AH<1S}VTzVzz4QsN{9VRV2U|US&K0L@eE(@R1(#Puf~T zTWqdXbQnNMw^(4)PL6^z8p<`Gwt27iCyk@BM|)m#9K-Y4i(14nya>r6j^Vg=y-wom z_0M$}(C7MBy3v3Mdli}ke3j8+NVvtg&cO4}ZRT~xZH8BVt(^0EJ7qqDgUj|E86W|e z!iQb2->XA&_v%l}HlEfG5j=!8*yL5<1W>Y=Ctk#OyQ={CnD=^)`Me3@ncF68yU}aj z5Vtcf0dZ==W^dsexGkD+XecKC* zd)s?M99EIb%$;$6kQul9x8CDkc%tL7e5YH^PoK)e+|Tkdr~F~J{NYIXt1K^f%D21a z+wr|9RiEWaG2cAikw&-0=2By@&Xhx+Y<6u&$A7)iu_gcrsO9&oR2w+JQXoE0N!U)zHC9-W1)u#=2fs zy573o^2D_o=acewYde6W3fPWnZb`J>_0~Us{Mb1Z`T@R}kh%J##gE!={75m{fCy_X zd;2NLvLn>x-vLgOAW=NbziJzF_?8X&Ax_RiaEa0xXz3~ ziVJufFLS+lnX3qVAuFFLyJ`Mgf5_;L%^XaGhnWVNN)0K;)zN zH<=0eqk3)K@U=F>{U*MP%i3n1b}qc;xflGW^uh-B`Dn>7%n=jp?K|=FS3K2Iz&Z8Bw%S=o(9@!`wRnL1T*&;FL|PMz$IFzZ7TYs`n{7mT8@ zc6?#KzY0n7$u0Is@f&N&mS)|sE!FmS0(8aNO0>@pOTeP(M*RRJDEqkmBB zVGhx4`UWnQQPdKJ!iw)wffR8731u!?0KLoV(6&qKs8`}6s@CA7*<1=3(M8EfxfgOm zu{{A^iM|`L0v4gy^dq{q42_tFDKzhd%ulqhF{n_SlLwZoCh)iE(k`iNNg$OV@H(&= zpA0b;Dg!csDN-2cR?IYGX>P;7xeLD(Mwr)OX#kfU)S(+W5j)2isxp1@ldnBc=JOb}i$oP5RH7+^e@DA^y0= z(1^LU43g#}hx}XGJ-W48jKs|uNmW)Bsf#Jb!63(#xr1MJ1#dB~(AK#(Mdmu~I-u~# z$?co_{L`vnFQ zpB0AsH|f9B<*=B))bGMwiz99fS+kB|XjWvfZ%M{y&FY9De3|j?%i1k8e>kgKj5~Oz za))u3LUgY1Gac^hXNog!QzU2p03G4swtkA-#3x=>m@D7Vv=yr0kCB1`t;#HVQq!tL zTzq^&LL%O8mh($hj9z0T6#nQf$c^3`GZ3_Pi^as|Z%jgI|5(h)@`!ImcHYTKPlh+k zS0wC68@wZVa34g-kY&XRt6XKO1IX-19lRq&l_i*(<=E^qo@;o+M~jq zP_WO~O0Y`&f@W}ksR1!kD^nR^k91I`NI`2Uy|Bg9YcN{G51U1dr8e^>RL-D>4SUqIA7c| z{2mvR(yIq{UOZl}&liUu8lT{>BVR+oS2kj&-2cmy{OlE+ZKaXPwW7K`T)n0}yk@PO zp3w?xxii&{MYFcC17;)ks@hHA&Dstv&Yjbk(mS*VG>@Fp03OixX#f2g4QD7~lq~I& zoUmeaE-I3ZXhkO|ny-rHcP_HG;}I+DlU$v=h>zN5bb$Y3`y{6r``qB#n&50diG5z< z5cPAWto`r4%G^xc8^9sMOncFvrol!DDwCB4%8jPdAGcEXH7cm=|4UXn^)#9ooIjjC zOwT-RL3?xm(x5If3Uy2rJZ%QXg5Wz^Co;#_5Y@IMwMF& z=SfDcamJPusS?LFV@u|%62~-SO6={S5wuLQbn>y58Ksn#IknPR)+*17xXtrwBTkb{ zVvO>Mh^GO7{bDX+6&l7h(z!xE;3h0lyiq^ z0yW=3VGG&W|HI#VUP0Ht;=eTCMFKILe9Z7)+HNi01-WUs8{F|HEmS!l14P3v^O7ss zH*>pokLEpJTr4Dqd$jwtI0qtw`w?*@NO+WbIDwGk1uaox`hFLPJg=b21)mz(X}Et) zn`^I;-ILSN6p83m;#_po?UP7DpVEjymtth6G6f%^(%S7*_Rh^lRSK;=zfRh!u6ZHLbP|7PEMmfW zTSP}lVuqwb*W0&wc!7FpfsyMfM?}T&p|Xl2WhbQnuaw!xOLIqb|LO7%%PYPqKY-KO z0X#tA3F^BF%ekh9%EEBY+YxUn3#VQnoOZR_r((!>THF94j*m=;@HW z>@5+{QRwC0XZ_uneGe^FW&Gu4mzU6&@|2mShD$HXMM=Uv_zBLJJjB-nFP86pCGZq* zDbG{_5riX7n5~fFpK}oNFOEknUT-+NGI9BbK=e7AG4U*L@$b)6`hkpOEHKR(m8n0b z`u>Ehlj@(-A5&je)fPXTZ<+XWs?RBxi669hz2Wma@$KcV{G1?~c(ujr4X1-9zC}(h zs4wS^CVmY5i?#nNC2+t+_1^1`@t61#|BnYqne>xtvvBUu}V zZ0fqjtIV7^n9(oA^kwGaL=iumfY+cMQtn+r_=ovnv`+&4GR1#O)wUb@wHOaP_GT1+ zTyd^R$CW&Dit`&gee-E16!)ibmMOw(kT0h~lW`*f$8FJiy`z5o`K><&UJXARwO-B( zrY^Pqmm2=P%nF}ZKU_{rj%Nu{kJ@>Q&BQ8~=JpEpF%yG?4x}agWN{adZY$BA(r*5g zZnr4zFEI0>#iJfvT-JRn*1xh8l=LH4K4!gSI#$VdzFw+1&ihLE$(6vLDS^KLT-wn_#cWAkZm9Db($f02t1!dFh1i=vOUbj?}^@Vt6O z^%Dt)%PqZxbLcN=w^#SNbPBbR@T1jtSakar@M`GS>s0XEC$qjvJEv+KqL0&NmbiAB zc7s`f^OETMv#sB*_zw-v&+90kga+V}FOO4)qME9Iw$33BAwLQ7*^(U(S}*qd1FFxp z>)M>U=}^3Dr30w{q~e^&nlqqh6z3OmT5U6?xWB;mv%qb6l>Wi-Ic4#Bb5#46{WYzA z{1pd}E6!{9TU!2;7S}{XoGO8T0$j$Ge}1?KJQ1;HRF>>n!9+w>pun}V;*=XU;hviX zNB{auaV`dK+hMz^YD3GqX~mA~I_oEN8%9>CpIz#Q{Ur-UOTPX@`eaE7>0EeLF(xqi zUCIO%0UoDtu?9%HiqiWm%g<5n934+th;gQ#BmG&DUV&z)G0OoY%c~TFE7<(lwsd!A zD_EOgM+a>kLHmw$cU#c08ARPJT|F)7uAryAy-$dx>6Wgxz|ufdTe>=1T7z}Tb;%7; znqd%t%Lc-pWuFT5IaFNV1Qztp?f{(2+#pzKxv`zyt)MODb_%mx$a5gThuY#c#^o%~ zU48y)NS$=Gwlv8Iq1v{3pmgWZf8)J{Qj$ zPRAPt``ETG9Dy9KjQ%L2J9JQ>+`2Cx=8$;hNR#ga+{X?4!XW;oM@Rt{LU63qj=3Cz zf&k+(Xxp|qK%>ZjEjnC5+MX1a^hoAu&~oDrgjZ{1Az$~^iDT)_I;s%b>tw&p5Kl4DT608S?OqP10uPT=$QUnZrIeYK~Qmk80I zw?iS+iP-ho**xtpsXgUt)vQ$Mm%k4FjZN+R^1rF}Y=1ttQ1*HTEI*6! zu4m8p3Y2x){?zx}nU6gCp$$%xGV2+z|LN~jM54$41$j}0K$)G_+-B9hd*E|Dzu-` z_ct@m01;L@5pHQuc>{Z*_I&?yP3@;cOBbOX}sx<$#1XR-AKYpdG5-69by+aGzzO;)csHnTVq`9kv{kWcxPUv{1 zXWzcDggusyvf)=5T#?}0H +#include +#include -#include -#include -#include "rowcol.h" -#include - -/* 参考的列求和函数实现 */ -/* 计算矩阵中的每一列的和。请注意对于行和列求和来说,调用参数是 - 一样的,只是第2个参数不会用到而已 +/* ????????????????? */ +/* ??????????????????????????????????????????????? + ??????????2????????????????? */ -void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) -{ - int i,j; +void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) { + int i, j; + for (j = 0; j < N; j++) { + colsum[j] = 0; + for (i = 0; i < N; i++) + colsum[j] += M[i][j]; + } +} + +/* ???????????????????? */ +/* ??????????????????????? */ + +void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) { + int i, j; + for (i = 0; i < N; i++) { + rowsum[i] = colsum[i] = 0; for (j = 0; j < N; j++) { - colsum[j] = 0; - for (i = 0; i < N; i++) - colsum[j] += M[i][j]; + rowsum[i] += M[i][j]; + colsum[i] += M[j][i]; } + } } - -/* 参考的列和行求和函数实现 */ -/* 计算矩阵中的每一行、每一列的和。 */ - -void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) -{ - int i,j; - for (i = 0; i < N; i++) { - rowsum[i] = colsum[i] = 0; - for (j = 0; j < N; j++) { - rowsum[i] += M[i][j]; - colsum[i] += M[j][i]; - } - } -} - - - -/* - 这个表格包含多个数组元素,每一组元素(函数名字, COL/ROWCOL, "描述字符串") - COL表示该函数仅仅计算每一列的和 - ROWCOL表示该函数计算每一行、每一列的和 - 将你认为最好的两个实现,放在最前面。 - 比如: - {my_c_sum1, "超级垃圾列求和实现"}, - {my_rc_sum2, "好一点的行列求和实现"}, +/* + ????????????????????????????????????????, COL/ROWCOL, "?????????"?? + COL?????????????????????? + ROWCOL??????????????????????? + ????????????????????????????? + ???? + {my_c_sum1, "?????????????????"}, + {my_rc_sum2, "??????????????????"}, */ -rc_fun_rec rc_fun_tab[] = -{ +rc_fun_rec rc_fun_tab[] = { - /* 第一项,应当是你写的最好列求和的函数实现 */ + /* ???????????????????????????????? */ {c_sum, COL, "Best column sum"}, - /* 第二项,应当是你写的最好行列求和的函数实现 */ + /* ?????????????????????????????????? */ {rc_sum, ROWCOL, "Best row and column sum"}, {c_sum, COL, "Column sum, reference implementation"}, {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, - /* 下面的代码不能修改或者删除!!表明数组列表结束 */ - {NULL,ROWCOL,NULL} -}; + /* ??????????????????????????????????????? */ + {NULL, ROWCOL, NULL}}; \ No newline at end of file diff --git a/perflab/matrix/rowcol.c~ b/perflab/matrix/rowcol.c~ new file mode 100644 index 0000000..990ce83 --- /dev/null +++ b/perflab/matrix/rowcol.c~ @@ -0,0 +1,162 @@ +/************************************************************************** + 行/列求和函数。按下面的要求编辑此文件: + 1. 将你的学号、姓名,以注释的方式写到下面; + 2. 实现不同版本的行列求和函数; + 3. 编辑rc_fun_rec rc_fun_tab数组,将你的最好的答案 + (最好的行和列求和、最好的列求和)作为数组的前两项 +***************************************************************************/ + +/* + 学号:202302723005 + 姓名:程景愉 +*/ + + +#include +#include +#include "rowcol.h" +#include +#include + +/* 参考的列求和函数实现 */ +/* 计算矩阵中的每一列的和。请注意对于行和列求和来说,调用参数是 + 一样的,只是第2个参数不会用到而已 +*/ + +void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (j = 0; j < N; j++) { + colsum[j] = 0; + for (i = 0; i < N; i++) + colsum[j] += M[i][j]; + } +} + + +/* 参考的列和行求和函数实现 */ +/* 计算矩阵中的每一行、每一列的和。 */ + +void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (i = 0; i < N; i++) { + rowsum[i] = colsum[i] = 0; + for (j = 0; j < N; j++) { + rowsum[i] += M[i][j]; + colsum[i] += M[j][i]; + } + } +} + +/* CUDA优化的列求和函数 */ +void cuda_c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + // 分配设备内存 + int *d_M, *d_colsum; + cudaMalloc(&d_M, N * N * sizeof(int)); + cudaMalloc(&d_colsum, N * sizeof(int)); + + // 将数据从主机复制到设备 + cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); + + // 定义CUDA核函数 + dim3 blockDim(256); + dim3 gridDim((N + blockDim.x - 1) / blockDim.x); + + // 启动核函数 + cudaColumnSum<<>>(d_M, d_colsum); + + // 将结果从设备复制回主机 + cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); + + // 释放设备内存 + cudaFree(d_M); + cudaFree(d_colsum); +} + +/* CUDA优化的行列求和函数 */ +void cuda_rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + // 分配设备内存 + int *d_M, *d_rowsum, *d_colsum; + cudaMalloc(&d_M, N * N * sizeof(int)); + cudaMalloc(&d_rowsum, N * sizeof(int)); + cudaMalloc(&d_colsum, N * sizeof(int)); + + // 将数据从主机复制到设备 + cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); + + // 定义CUDA核函数 + dim3 blockDim(256); + dim3 gridDim((N + blockDim.x - 1) / blockDim.x); + + // 启动核函数 + cudaRowColSum<<>>(d_M, d_rowsum, d_colsum); + + // 将结果从设备复制回主机 + cudaMemcpy(rowsum, d_rowsum, N * sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); + + // 释放设备内存 + cudaFree(d_M); + cudaFree(d_rowsum); + cudaFree(d_colsum); +} + +/* CUDA核函数 - 列求和 */ +__global__ void cudaColumnSum(int *M, int *colsum) +{ + int col = blockIdx.x * blockDim.x + threadIdx.x; + if (col < N) { + colsum[col] = 0; + for (int row = 0; row < N; row++) { + colsum[col] += M[row * N + col]; + } + } +} + +/* CUDA核函数 - 行列求和 */ +__global__ void cudaRowColSum(int *M, int *rowsum, int *colsum) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < N) { + // 计算行和 + rowsum[idx] = 0; + for (int j = 0; j < N; j++) { + rowsum[idx] += M[idx * N + j]; + } + + // 计算列和 + colsum[idx] = 0; + for (int i = 0; i < N; i++) { + colsum[idx] += M[i * N + idx]; + } + } +} + +/* + 这个表格包含多个数组元素,每一组元素(函数名字, COL/ROWCOL, "描述字符串") + COL表示该函数仅仅计算每一列的和 + ROWCOL表示该函数计算每一行、每一列的和 + 将你认为最好的两个实现,放在最前面。 + 比如: + {my_c_sum1, "超级垃圾列求和实现"}, + {my_rc_sum2, "好一点的行列求和实现"}, +*/ + +rc_fun_rec rc_fun_tab[] = +{ + + /* 第一项,应当是你写的最好列求和的函数实现 */ + {cuda_c_sum, COL, "CUDA optimized column sum"}, + /* 第二项,应当是你写的最好行列求和的函数实现 */ + {cuda_rc_sum, ROWCOL, "CUDA optimized row and column sum"}, + + {c_sum, COL, "Column sum, reference implementation"}, + + {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, + + /* 下面的代码不能修改或者删除!!表明数组列表结束 */ + {NULL,ROWCOL,NULL} +}; diff --git a/perflab/matrix/rowcol.o b/perflab/matrix/rowcol.o new file mode 100644 index 0000000000000000000000000000000000000000..abada5fb685be27aa907a09aacab91723385c4db GIT binary patch literal 6096 zcmbuCdt6k-9>8aL2+C_EkQ9UkO`dLmh&(cum+Z=dq-FR>SYVOY0>W+=$`YZ{)euZk z>SB6PBTZDG6j2f(AE5G32`e+MkH`lK89qSm&hP9D9FOb$iat}QD|oO5#v<6){r+wtd9(Fqi>(8UtDlfPh4<9PwIO@b*gEeSUn_BpOdKj zCF&t*;VOtJUfBdOrRpC8PwGGnsk%AvB-2GnVIG8+s()r>U{%fRL};xa&v&g4?{#rO zqn;+O)VD!(LxNRF!7)7vL`O(c(4Z&ry{P(pQB#mSTPcXoNK+}&1=%VEV)U$xe+Xph z32e>`2ch6$OE*E5JW-w{Pmh-ik`uyEVr??cJ1x$^_up;F;nU>ZK!woy|b-fjnk3pwex3ei@WQ3Xp2sSmD`=orA`)3 zd?)=Y+1+jXC)W8pM=Ug7So2MIQuxkeK0DLY;h!Y0A8g&g|8ixkD}T4Yv(0^-i~aYv zJ>}iL9BX#f;86RnqMySh)9t)=mKQv+n7RGePLtFO&s#J4jFdl}Ke7Ez(Kwf%qPGYu z##<+?uI^s^ZcFfon-!wECD!5u-F06&mDc1WZd>9y|KPq)tL~3`|IGH>N1xUCUj6d5 z!|vA$iw2&gwks{OjHdi+xoGqy&*VKiIhOr9?;N>M+1B6u$yauYq`~uR5|8fwDW>b1 z9rAWCXQrRdUmU4WJ*P~uE-_)%T;lcT*4uG@AN)EOQ(?hW$Hw6%J$<;Wj3Q(SNEdoW?jOyOgzdauh zc@td{TKfdN6^+(YT`VlmT)Q2R6SUd%SAS*ZtxJzznqjr-&m~JfH62j&9PXLeS|20u zv)@{G@=#E7irL-0gO77Mj)a*kDZdbKHNPY@bk&aFx$}w)-D{)js}jEp{CoYT;*NwJ zxdl`6=bval5?a{(y{_JaEvvW2$jmQSBH=Oz*|`ug^=6t$utyxwv%pRb^(l(Hh&u%T|;=Y&tD5 zKi>JvK&Lp{ul`|MY)WL!_DwAds*hBiDOy;U`hKIuOEn+{{^Fx!qzG43t|GilP zUD3F}Af<8M#k`KBcC(PdTA$w@O+GrtVWV`DgHHF%@vA;A9t^hJi*GlDF5YoJF1`CruVn|;o}Lum5z@;)nzGup>%bq*zl!W@ zOqR|+|J(Z)9vIbcN%?BiJMV?03>kh`_{LeYfrr8RADACJ6qIN7ea+ZoWK47%&d-k{ z^0aBbJO6gYZL^Iiu&=SXz-p3O-7jvLYb&waQS|kSjD;Id$vqF(_)iP^W^72_0^cho zTf&YH`TTXM-2UM2%Tk0t#I9W^tTamNTN9gFKe>Ne;~mTI3~wH2Qja&6t*zeREv)X` zKi%eSRZP%OWZ&I^$VUgg2gm=}+T~wXJ*L{Bd4utvUI9CD7xGG4U6&LsUN&t~RI_p9 zz^&+W)yPjrQQhtkZ5&h+-CS?=^qb$K!(LBFNVqlp9sQ6qzMQi2ZKv8II_I>xJ9&4# z>eSLUJ3`noM;Phmg>}J>AAKDH|Lf;u}&?o&S`Dy2v2Nx47c)|RKP>` z+T6yVu;5@nfm2xITh0OxFQKQr8}uJ+jXERzd%PP-}Kp)ODr@3^6H`fzn_6b z(H-v9v#q+=LtD{hM)iD)E^N_SiY^+Xg(9Sjj9$P^U%+8Do-ewHb?70@5ZRnHE;@J6 z6W~;5<_|N7Fcyu%cTLHm6?~+?rwm~{%8HMHa|RZPs`MD8ERI?*66TjZ8roCfx-3h0 zc2IyDGd0c4iPWh*^{3C78(+k-XFnw|i?icLvpCq(eDsOIyo`(5G5eW<~S|*SR^q(A67~(=W*wQ`#n#tmjcOB7}a=1OwGdY~D92JMt@#b;3 zC(*y>@Nv*#Tv{}O858u!n&_7~JeBBIIlO@Ak2u_x#5W)k<^kfrL+ngByn=8`4zDBJ zhQo6Sr~49(Pv^^pW6vkL2ZwJXaeO$OU=+aN0i;1jI0(9ypeco8Kbq*vSoW|^BET3I z_y$6O{u|(nuD^{OPS^V9EDrN|0$)(JvpB@@U?@Tr98TBs9u|i<^NIZd7Kb99~BFB@X|L@GC41{q!aL2H_ys@0}h|=zR7PdqWQIA>4$; z+4E_|;xG@RNIq*0*CAZM;_Uf!VR4A_GO-tOxCP<<9PUnd0EbT@T*~3ighz4sF~aFL z5atK=Zd&N~7|e(()8s%5z#cAdqQh|#3gz({b$H!To<=zJqnv)fM5LXYS3o%Qh20-7 z4yW%C5@zQrVAB$2E3-5*iPn_z93>Ja$doc9jLXhO!mJEXz|uqL@vQ2DRe73>v4)y3 zEhAo*Mxi`TwKOI|^rh)4VP;lFraViTOLEDRViL1t3OSjc|69O5 z0MCT37aCARD%{@&0W~1f9!v0NpADdCEI=)Viyg5I?3@3qKdeuPN%ucoWeUwt_e;uY zek%QOgEoH=F?fqQ5=r-X>OY70bNgRP42I8N#9u%p6|->#&M_hih%AMV3oZ@J9U&_$ z;Th5V^!$~88Iy`O5PLp(nfz%-3Fnxh7|gZhkA@c*7e4uTKz{J0{xm=A!`kvkkqNnu z+u4QsQwIMv+WdW>Q@GIoIE^-vpXwWMg)>B^Nk;nZP7R6V+QPX^+xU~rG!E1s#t(Zl z?LQrV1!mg%UrO>H(}WqmQ-2D|aYbAHQj(vpAIJ}9Lh4WR*I)+u-Dv}^Ly09Xbel#U zzEd+g4{%=8=I;)h7%uc2r_o08Q~y@n!1z-GA`6K>{pX|2)Q`$8;!oSD+(rDUP9^oD zdJnFEDSPjLw*ADPK0jbk==e1nLhvIAF}eQT#9vGeiRAhZVy^A^@%b7DnjfB@CBCRX zeSX?9}puqp4Hh&T6KRwqn#`wma|Nj75 C!+sM0 literal 0 HcmV?d00001 diff --git a/perflab/matrix/rowcol.y~ b/perflab/matrix/rowcol.y~ new file mode 100644 index 0000000..5d3310a --- /dev/null +++ b/perflab/matrix/rowcol.y~ @@ -0,0 +1,240 @@ +/************************************************************************** + 靠/靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 + 1. 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 + 2. 靠靠靠靠靠靠靠靠靠靠靠 + 3. 靠rc_fun_rec rc_fun_tab靠靠靠靠靠靠旷靠靠 + 靠靠旷靠靠靠靠靠靠靠蹩靠靠靠靠靠靠靠靠靠靠 +***************************************************************************/ + +/* + 靠靠201209054233 + 靠靠靠靠靠靠靠 +*/ + + +#include +#include +#include "rowcol.h" +#include + +/* 靠靠靠靠靠靠靠靠 */ +/* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠蚩靠靠 + 靠靠靠靠靠2靠靠靠靠靠旷靠靠 +*/ + +void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (j = 0; j < N; j++) { + colsum[j] = 0; + for (i = 0; i < N; i++) + colsum[j] += M[i][j]; + } +} + + +/* 靠靠靠靠靠靠靠靠靠靠 */ +/* 靠靠靠靠靠靠靠靠靠靠 */ + +void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (i = 0; i < N; i++) { + rowsum[i] = colsum[i] = 0; + for (j = 0; j < N; j++) { + rowsum[i] += M[i][j]; + colsum[i] += M[j][i]; + } + } +} + + + +/* + 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠, COL/ROWCOL, "靠靠靠靠"靠 + COL靠靠靠靠靠靠靠靠靠 + ROWCOL靠靠靠靠靠靠靠靠靠 + 靠靠靠靠靠蹩靠靠靠靠靠靠靠靠 + 靠靠 + {my_c_sum1, "靠靠靠靠靠靠靠靠"}, + {my_rc_sum2, "靠靠靠靠靠靠靠靠靠"}, +*/ + +rc_fun_rec rc_fun_tab[] = +{ + + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {c_sum, COL, "Best column sum"}, + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {rc_sum, ROWCOL, "Best row and column sum"}, + + {c_sum, COL, "Column sum, reference implementation"}, + + {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, + + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {NULL,ROWCOL,NULL} +}; + +// /************************************************************************** +// 行/列求和函数。按下面的要求编辑此文件: +// 1. 将你的学号、姓名,以注释的方式写到下面; +// 2. 实现不同版本的行列求和函数; +// 3. 编辑rc_fun_rec rc_fun_tab数组,将你的最好的答案 +// (最好的行和列求和、最好的列求和)作为数组的前两项 +// ***************************************************************************/ +// +// /* +// 学号:202302723005 +// 姓名:程景愉 +// */ +// +// +// #include +// #include +// #include "rowcol.h" +// #include +// #include +// +// /* 参考的列求和函数实现 */ +// /* 计算矩阵中的每一列的和。请注意对于行和列求和来说,调用参数是 +// 一样的,只是第2个参数不会用到而已 +// */ +// +// void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// int i,j; +// for (j = 0; j < N; j++) { +// colsum[j] = 0; +// for (i = 0; i < N; i++) +// colsum[j] += M[i][j]; +// } +// } +// +// +// /* 参考的列和行求和函数实现 */ +// /* 计算矩阵中的每一行、每一列的和。 */ +// +// void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// int i,j; +// for (i = 0; i < N; i++) { +// rowsum[i] = colsum[i] = 0; +// for (j = 0; j < N; j++) { +// rowsum[i] += M[i][j]; +// colsum[i] += M[j][i]; +// } +// } +// } +// +// /* CUDA优化的列求和函数 */ +// void cuda_c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// // 分配设备内存 +// int *d_M, *d_colsum; +// cudaMalloc(&d_M, N * N * sizeof(int)); +// cudaMalloc(&d_colsum, N * sizeof(int)); +// +// // 将数据从主机复制到设备 +// cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); +// +// // 定义CUDA核函数 +// dim3 blockDim(256); +// dim3 gridDim((N + blockDim.x - 1) / blockDim.x); +// +// // 启动核函数 +// cudaColumnSum<<>>(d_M, d_colsum); +// +// // 将结果从设备复制回主机 +// cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// +// // 释放设备内存 +// cudaFree(d_M); +// cudaFree(d_colsum); +// } +// +// /* CUDA优化的行列求和函数 */ +// void cuda_rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// // 分配设备内存 +// int *d_M, *d_rowsum, *d_colsum; +// cudaMalloc(&d_M, N * N * sizeof(int)); +// cudaMalloc(&d_rowsum, N * sizeof(int)); +// cudaMalloc(&d_colsum, N * sizeof(int)); +// +// // 将数据从主机复制到设备 +// cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); +// +// // 定义CUDA核函数 +// dim3 blockDim(256); +// dim3 gridDim((N + blockDim.x - 1) / blockDim.x); +// +// // 启动核函数 +// cudaRowColSum<<>>(d_M, d_rowsum, d_colsum); +// +// // 将结果从设备复制回主机 +// cudaMemcpy(rowsum, d_rowsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// +// // 释放设备内存 +// cudaFree(d_M); +// cudaFree(d_rowsum); +// cudaFree(d_colsum); +// } +// +// /* CUDA核函数 - 列求和 */ +// __global__ void cudaColumnSum(int *M, int *colsum) +// { +// int col = blockIdx.x * blockDim.x + threadIdx.x; +// if (col < N) { +// colsum[col] = 0; +// for (int row = 0; row < N; row++) { +// colsum[col] += M[row * N + col]; +// } +// } +// } +// +// /* CUDA核函数 - 行列求和 */ +// __global__ void cudaRowColSum(int *M, int *rowsum, int *colsum) +// { +// int idx = blockIdx.x * blockDim.x + threadIdx.x; +// if (idx < N) { +// // 计算行和 +// rowsum[idx] = 0; +// for (int j = 0; j < N; j++) { +// rowsum[idx] += M[idx * N + j]; +// } +// +// // 计算列和 +// colsum[idx] = 0; +// for (int i = 0; i < N; i++) { +// colsum[idx] += M[i * N + idx]; +// } +// } +// } +// +// /* +// 这个表格包含多个数组元素,每一组元素(函数名字, COL/ROWCOL, "描述字符串") +// COL表示该函数仅仅计算每一列的和 +// ROWCOL表示该函数计算每一行、每一列的和 +// 将你认为最好的两个实现,放在最前面。 +// 比如: +// {my_c_sum1, "超级垃圾列求和实现"}, +// {my_rc_sum2, "好一点的行列求和实现"}, +// */ +// +// rc_fun_rec rc_fun_tab[] = +// { +// +// /* 第一项,应当是你写的最好列求和的函数实现 */ +// {cuda_c_sum, COL, "CUDA optimized column sum"}, +// /* 第二项,应当是你写的最好行列求和的函数实现 */ +// {cuda_rc_sum, ROWCOL, "CUDA optimized row and column sum"}, +// +// {c_sum, COL, "Column sum, reference implementation"}, +// +// {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, +// +// /* 下面的代码不能修改或者删除!!表明数组列表结束 */ +// {NULL,ROWCOL,NULL} +// }; diff --git a/perflab/matrix/rowcol.z~ b/perflab/matrix/rowcol.z~ new file mode 100644 index 0000000..5d3310a --- /dev/null +++ b/perflab/matrix/rowcol.z~ @@ -0,0 +1,240 @@ +/************************************************************************** + 靠/靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 + 1. 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 + 2. 靠靠靠靠靠靠靠靠靠靠靠 + 3. 靠rc_fun_rec rc_fun_tab靠靠靠靠靠靠旷靠靠 + 靠靠旷靠靠靠靠靠靠靠蹩靠靠靠靠靠靠靠靠靠靠 +***************************************************************************/ + +/* + 靠靠201209054233 + 靠靠靠靠靠靠靠 +*/ + + +#include +#include +#include "rowcol.h" +#include + +/* 靠靠靠靠靠靠靠靠 */ +/* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠蚩靠靠 + 靠靠靠靠靠2靠靠靠靠靠旷靠靠 +*/ + +void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (j = 0; j < N; j++) { + colsum[j] = 0; + for (i = 0; i < N; i++) + colsum[j] += M[i][j]; + } +} + + +/* 靠靠靠靠靠靠靠靠靠靠 */ +/* 靠靠靠靠靠靠靠靠靠靠 */ + +void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +{ + int i,j; + for (i = 0; i < N; i++) { + rowsum[i] = colsum[i] = 0; + for (j = 0; j < N; j++) { + rowsum[i] += M[i][j]; + colsum[i] += M[j][i]; + } + } +} + + + +/* + 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠, COL/ROWCOL, "靠靠靠靠"靠 + COL靠靠靠靠靠靠靠靠靠 + ROWCOL靠靠靠靠靠靠靠靠靠 + 靠靠靠靠靠蹩靠靠靠靠靠靠靠靠 + 靠靠 + {my_c_sum1, "靠靠靠靠靠靠靠靠"}, + {my_rc_sum2, "靠靠靠靠靠靠靠靠靠"}, +*/ + +rc_fun_rec rc_fun_tab[] = +{ + + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {c_sum, COL, "Best column sum"}, + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {rc_sum, ROWCOL, "Best row and column sum"}, + + {c_sum, COL, "Column sum, reference implementation"}, + + {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, + + /* 靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠靠 */ + {NULL,ROWCOL,NULL} +}; + +// /************************************************************************** +// 行/列求和函数。按下面的要求编辑此文件: +// 1. 将你的学号、姓名,以注释的方式写到下面; +// 2. 实现不同版本的行列求和函数; +// 3. 编辑rc_fun_rec rc_fun_tab数组,将你的最好的答案 +// (最好的行和列求和、最好的列求和)作为数组的前两项 +// ***************************************************************************/ +// +// /* +// 学号:202302723005 +// 姓名:程景愉 +// */ +// +// +// #include +// #include +// #include "rowcol.h" +// #include +// #include +// +// /* 参考的列求和函数实现 */ +// /* 计算矩阵中的每一列的和。请注意对于行和列求和来说,调用参数是 +// 一样的,只是第2个参数不会用到而已 +// */ +// +// void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// int i,j; +// for (j = 0; j < N; j++) { +// colsum[j] = 0; +// for (i = 0; i < N; i++) +// colsum[j] += M[i][j]; +// } +// } +// +// +// /* 参考的列和行求和函数实现 */ +// /* 计算矩阵中的每一行、每一列的和。 */ +// +// void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// int i,j; +// for (i = 0; i < N; i++) { +// rowsum[i] = colsum[i] = 0; +// for (j = 0; j < N; j++) { +// rowsum[i] += M[i][j]; +// colsum[i] += M[j][i]; +// } +// } +// } +// +// /* CUDA优化的列求和函数 */ +// void cuda_c_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// // 分配设备内存 +// int *d_M, *d_colsum; +// cudaMalloc(&d_M, N * N * sizeof(int)); +// cudaMalloc(&d_colsum, N * sizeof(int)); +// +// // 将数据从主机复制到设备 +// cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); +// +// // 定义CUDA核函数 +// dim3 blockDim(256); +// dim3 gridDim((N + blockDim.x - 1) / blockDim.x); +// +// // 启动核函数 +// cudaColumnSum<<>>(d_M, d_colsum); +// +// // 将结果从设备复制回主机 +// cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// +// // 释放设备内存 +// cudaFree(d_M); +// cudaFree(d_colsum); +// } +// +// /* CUDA优化的行列求和函数 */ +// void cuda_rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) +// { +// // 分配设备内存 +// int *d_M, *d_rowsum, *d_colsum; +// cudaMalloc(&d_M, N * N * sizeof(int)); +// cudaMalloc(&d_rowsum, N * sizeof(int)); +// cudaMalloc(&d_colsum, N * sizeof(int)); +// +// // 将数据从主机复制到设备 +// cudaMemcpy(d_M, M, N * N * sizeof(int), cudaMemcpyHostToDevice); +// +// // 定义CUDA核函数 +// dim3 blockDim(256); +// dim3 gridDim((N + blockDim.x - 1) / blockDim.x); +// +// // 启动核函数 +// cudaRowColSum<<>>(d_M, d_rowsum, d_colsum); +// +// // 将结果从设备复制回主机 +// cudaMemcpy(rowsum, d_rowsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// cudaMemcpy(colsum, d_colsum, N * sizeof(int), cudaMemcpyDeviceToHost); +// +// // 释放设备内存 +// cudaFree(d_M); +// cudaFree(d_rowsum); +// cudaFree(d_colsum); +// } +// +// /* CUDA核函数 - 列求和 */ +// __global__ void cudaColumnSum(int *M, int *colsum) +// { +// int col = blockIdx.x * blockDim.x + threadIdx.x; +// if (col < N) { +// colsum[col] = 0; +// for (int row = 0; row < N; row++) { +// colsum[col] += M[row * N + col]; +// } +// } +// } +// +// /* CUDA核函数 - 行列求和 */ +// __global__ void cudaRowColSum(int *M, int *rowsum, int *colsum) +// { +// int idx = blockIdx.x * blockDim.x + threadIdx.x; +// if (idx < N) { +// // 计算行和 +// rowsum[idx] = 0; +// for (int j = 0; j < N; j++) { +// rowsum[idx] += M[idx * N + j]; +// } +// +// // 计算列和 +// colsum[idx] = 0; +// for (int i = 0; i < N; i++) { +// colsum[idx] += M[i * N + idx]; +// } +// } +// } +// +// /* +// 这个表格包含多个数组元素,每一组元素(函数名字, COL/ROWCOL, "描述字符串") +// COL表示该函数仅仅计算每一列的和 +// ROWCOL表示该函数计算每一行、每一列的和 +// 将你认为最好的两个实现,放在最前面。 +// 比如: +// {my_c_sum1, "超级垃圾列求和实现"}, +// {my_rc_sum2, "好一点的行列求和实现"}, +// */ +// +// rc_fun_rec rc_fun_tab[] = +// { +// +// /* 第一项,应当是你写的最好列求和的函数实现 */ +// {cuda_c_sum, COL, "CUDA optimized column sum"}, +// /* 第二项,应当是你写的最好行列求和的函数实现 */ +// {cuda_rc_sum, ROWCOL, "CUDA optimized row and column sum"}, +// +// {c_sum, COL, "Column sum, reference implementation"}, +// +// {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, +// +// /* 下面的代码不能修改或者删除!!表明数组列表结束 */ +// {NULL,ROWCOL,NULL} +// }; diff --git a/perflab/matrix/rowcol_202302723005.c b/perflab/matrix/rowcol_202302723005.c new file mode 100644 index 0000000..b504582 --- /dev/null +++ b/perflab/matrix/rowcol_202302723005.c @@ -0,0 +1,69 @@ +/************************************************************************** + ??/??????????????????????????????? + 1. ??????????????????????????????? + 2. ?????????????????????? + 3. ??rc_fun_rec rc_fun_tab?????????????????? + ??????????????????????????????????????????? +***************************************************************************/ + +/* + ????201209054233 + ?????????????? +*/ + +#include "rowcol.h" +#include +#include +#include + +/* ????????????????? */ +/* ??????????????????????????????????????????????? + ??????????2????????????????? +*/ + +void c_sum(matrix_t M, vector_t rowsum, vector_t colsum) { + int i, j; + for (j = 0; j < N; j++) { + colsum[j] = 0; + for (i = 0; i < N; i++) + colsum[j] += M[i][j]; + } +} + +/* ???????????????????? */ +/* ??????????????????????? */ + +void rc_sum(matrix_t M, vector_t rowsum, vector_t colsum) { + int i, j; + for (i = 0; i < N; i++) { + rowsum[i] = colsum[i] = 0; + for (j = 0; j < N; j++) { + rowsum[i] += M[i][j]; + colsum[i] += M[j][i]; + } + } +} + +/* + ????????????????????????????????????????, COL/ROWCOL, "?????????"?? + COL?????????????????????? + ROWCOL??????????????????????? + ????????????????????????????? + ???? + {my_c_sum1, "?????????????????"}, + {my_rc_sum2, "??????????????????"}, +*/ + +rc_fun_rec rc_fun_tab[] = { + + /* ???????????????????????????????? */ + {c_sum, COL, "Best column sum"}, + /* ?????????????????????????????????? */ + {rc_sum, ROWCOL, "Best row and column sum"}, + + {c_sum, COL, "Column sum, reference implementation"}, + + {rc_sum, ROWCOL, "Row and column sum, reference implementation"}, + + /* ??????????????????????????????????????? */ + {NULL, ROWCOL, NULL}}; \ No newline at end of file diff --git a/perflab/matrix/rowcol_202302723005.o b/perflab/matrix/rowcol_202302723005.o new file mode 100644 index 0000000000000000000000000000000000000000..b28983c754c3b55a497b9dbd3581ab131eaa2dc1 GIT binary patch literal 6120 zcmbuCd0bT09>CAA2+B4RND9J$hHTDfSOw7-HkpZ$NYijh7+{cX0Aa=lW{FVhs060C z^dNmGktQmTiYSVZC@zSpX{Bb0;f6wm3odnjch14gZc!(ArdK?)A*N(;3&fOZehfaU2Qg%tw&0^o7o|n{5MHMFnU#T6wQvxl72|mUEBtwHOA4C} zw0UI#P3l`xtV#}|e>cylEdSbX6(I$Se++T!Hm&}?bD_~2vhOPP*3|YI zdMFp#+1k2ycX#`EKOB=K^Ynhwz3|Ud?OzYmlNogI$8Czg@4l~GZ?cWYy>;tdtX%jm zhSzegp58UGA<%X5Ov{;d-$o`!Rvz}ROxHv%Nm!zE?cV?WdS>VbyTk z+-PkJK|OwTAg$9OWyJQk$U?t zeT+YCsC{;*`{51y^Mwt@M@+jzyt3?V9+n@tS?BD2d*{Ou&J%@CZ5kzBp7k+iV~(_a zwsp+9=;x&yt%T1ujQgN8y<+REUw63AkahXqeAA;z9Z?%$aU;xp+n4D(A5X}+Zfq-cF%YDQ*4xJaD zZ_oV2uCMyBUEHZWlShM=uXN~Q?WZTNjPv6X&&ZZ&tIzJcUMLw+c*1t7$7r?it*Im3 z65D$OyhY8nW8JK*Pj=o7$_-g(emPK;b?4I4H^$j4{bSCYmF9iQuKisj+Z$sA;~h6P z9NibvmTGZ-NB`5@GY2Bf=2V;ux?WHk9=>!-=#;6&#-jS@#+szv!GEt>TXH6GOJ3pF zg6XXt2f~Z49@IB@SiWpytlaWSO+irAnA*Ya+vnYW-}F@4-K-?_?PKP?A(7Ya|LJ_L zdXoJ+4-7J5I}qYmhgLqRnD;LI;wQvjCy5$rxn$hHAkC*LXfhB&Q}t##77I&t{Y&r143t+%_l^$F{f>%UlKez;WB zF=Lj5NmiokachP&NEczaW8h&iKcV9@p#lgCvhsl`eI9y*HN7M=P z0uldi)m@7f9W|JK`fw2d+G@ePVlO(k)v@!!2Pv z&js^)4n3L@d)cI1?Bk0fLPN(3Tq2^Txe7degkC~VftSe3SL7uc2dNG4CBT;{+(bKv^a2utWE6yT;zO?z`8b!t!j>AG{{i&^%t&uEd53O9ZX zi-SGQM^_M5?8R&#WJc_P560IiqHDjZh*@E18%y{wB7wn0%Lr0|{*WUKL)-`lTiORe z6ImSceopkc9PUW;EDonTNzLJOy!jmNMf8IlJ`6gH%W#svMbCHfT(Pb2zu4lgA7 zV-B|`@r{Utd4TvI5<7DauOi%<Gz7pr}O2;vF8)rlfyTWIQ|??Fbd-E zAkrWw90c7<(3HxtA58RlEPL1|lff7lcqc-E{u|+o?!Pr0PM@U>EDrP8iZ3XeSsdbc zG8CaI4yXHhJBve{>BN2yi$k0+!W%eTOt|*X=tci$5q^SWKb7!{99~BFB@X|B@M|m% z{R|-d7U3Y+&z%8L=zMk)dt(moBHWC{+4E_`;xG?`NIqK**CSlO;_Uf!V{wS{Cb1WC zxE0}n94;a}h{K%;mvML-;n5s^m~i?Ig!zGQIW6>k3}(cYX>uY4U=LR)(czd0g>w4b z(S1kx60I5XBa(8u?qb5Zc?E<+U)cTe;c)sKAz@CQ624o)996bfCexZqk*h+&M7c_i zgz-5!NSK`o3RrpyJzrM+v8qUyGuBWOre`L|(|t zo|2=23``8INwz#=uHt2uo|2)UPPiAe3OJV06EcJR%9V$$y)t}=z@r{7D;X?`jLaf2>@F)^4%9f_phcu?iI_2|Xalk5lZPpgc9d|A8H&YR zSN<$`fN|lImj~nrU+Pcu!}~y2{%A5GH*h<MHody${&t8s-h zM5akj`Yob{L~?E6T&8RM`4(CS>JQ_GZ!_&b9e)*Oy86GKt~HG>QC2CS8nj6>lsZ34EFF}#`TBS1oz#85y~g; Z4_8oFg*$)n2dc|IhV-AFYiS6s|38w&iWL9= literal 0 HcmV?d00001 diff --git a/perflab/matrix/rowcol_test.c b/perflab/matrix/rowcol_test.c index 6b67926..e6a046e 100644 --- a/perflab/matrix/rowcol_test.c +++ b/perflab/matrix/rowcol_test.c @@ -1,9 +1,9 @@ #include #include -//#include -#include "rowcol.h" -#include "fcyc.h" +// #include #include "clock.h" +#include "fcyc.h" +#include "rowcol.h" #define MAX_ITER_COUNT 100 @@ -11,9 +11,9 @@ static struct { double cref; /* Cycles taken by reference solution */ double cbest; /* Cycles taken by our best implementation */ -} cstandard[2] = -{{7.7, 6.40}, /* Column Sum */ - {9.75, 6.60} /* Row & Column Sum */ +} cstandard[2] = { + {7.7, 6.40}, /* Column Sum */ + {9.75, 6.60} /* Row & Column Sum */ }; /* Put in code to align matrix so that it starts on a cache block boundary. @@ -26,7 +26,7 @@ static struct { #define WPB 16 int verbose = 1; -int data[N*N+WPB]; +int data[N * N + WPB]; int *mstart; typedef vector_t *row_t; @@ -37,137 +37,122 @@ vector_t rsref, csref, rcomp, ccomp; static void init_tests(void); extern void make_CPU_busy(void); -static void init_tests(void) -{ - int i, j; - size_t bytes_per_block = sizeof(int) * WPB; - /* round mstart up to nearest block boundary */ - mstart = (int *) - (((size_t) data + bytes_per_block-1) / bytes_per_block * bytes_per_block); - for (i = 0; i < N; i++) { - rsref[i] = csref[i] = 0; - } - for (i = 0; i < N; i++) { - for (j = 0; j < N; j++) { - int val = rand(); - mstart[i*N+j] = val; - rsref[i] += val; - csref[j] += val; - } +static void init_tests(void) { + int i, j; + size_t bytes_per_block = sizeof(int) * WPB; + /* round mstart up to nearest block boundary */ + mstart = (int *)(((size_t)data + bytes_per_block - 1) / bytes_per_block * + bytes_per_block); + for (i = 0; i < N; i++) { + rsref[i] = csref[i] = 0; + } + for (i = 0; i < N; i++) { + for (j = 0; j < N; j++) { + int val = rand(); + mstart[i * N + j] = val; + rsref[i] += val; + csref[j] += val; } + } } - /* Test function on all values */ int test_rc(rc_fun f, FILE *rpt, rc_comp_t rc_type) { - int i; - int ok = 1; + int i; + int ok = 1; - for (i = 0; i < N; i++) - rcomp[i] = ccomp[i] = 0xDEADBEEF; - f((row_t)mstart, rcomp, ccomp); - - for (i = 0; ok && i < N; i++) { - if (rc_type == ROWCOL - && rsref[i] != rcomp[i]) { - ok = 0; - if (rpt) - fprintf(rpt, - "对第%d行的计算出错!正确结果是%d,但是计算得到%d\n", - i, rsref[i], rcomp[i]); - } - if ((rc_type == ROWCOL || rc_type == COL) - && csref[i] != ccomp[i]) { - ok = 0; - if (rpt) - fprintf(rpt, - "对第%d列的计算出错!正确结果是%d,但是计算得到%d\n", - i, csref[i], ccomp[i]); - } + for (i = 0; i < N; i++) + rcomp[i] = ccomp[i] = 0xDEADBEEF; + f((row_t)mstart, rcomp, ccomp); + for (i = 0; ok && i < N; i++) { + if (rc_type == ROWCOL && rsref[i] != rcomp[i]) { + ok = 0; + if (rpt) + fprintf(rpt, "瀵圭%d琛岀殑璁$畻鍑洪敊锛佹纭粨鏋滄槸%d锛屼絾鏄绠楀緱鍒%d\n", i, + rsref[i], rcomp[i]); } - return ok; + if ((rc_type == ROWCOL || rc_type == COL) && csref[i] != ccomp[i]) { + ok = 0; + if (rpt) + fprintf(rpt, "瀵圭%d鍒楃殑璁$畻鍑洪敊锛佹纭粨鏋滄槸%d锛屼絾鏄绠楀緱鍒%d\n", i, + csref[i], ccomp[i]); + } + } + return ok; } /* Kludgy way to interface to cycle measuring code */ -void do_test(int *intf) -{ - rc_fun f = (rc_fun) intf; +void do_test(int *intf) { + rc_fun f = (rc_fun)intf; f((row_t)mstart, rcomp, ccomp); } -void time_rc(rc_fun f, rc_comp_t rc_type, char *descr, double *cycp) -{ - int i; - int *intf = (int *) f; +void time_rc(rc_fun f, rc_comp_t rc_type, char *descr, double *cycp) { + int i; + int *intf = (int *)f; double t, cme; t = 0; - if (verbose) printf("函数:%s\n", descr); + if (verbose) + printf("鍑芥暟锛%s\n", descr); if (test_rc(f, stdout, rc_type)) { - make_CPU_busy(); - for (i=0;i 1.1*(sbest-1)+1) + if (smeas > 1.1 * (sbest - 1) + 1) return 120; - return 100*((smeas-1.0)/(sbest-1.0) + 0.1); + return 100 * ((smeas - 1.0) / (sbest - 1.0) + 0.1); } -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { int i; double cme; - double cme_c,cme_rc; - int EnableScore=0; - - if (argc == 3) - { - EnableScore = 1; - verbose = 0; + double cme_c, cme_rc; + int EnableScore = 0; + + if (argc == 3) { + EnableScore = 1; + verbose = 0; } init_tests(); - set_fcyc_clear_cache(1); /* Set so that clears cache between runs */ + set_fcyc_clear_cache(1); /* Set so that clears cache between runs */ for (i = 0; rc_fun_tab[i].f != NULL; i++) { - cme = 100.0; - time_rc(rc_fun_tab[i].f, - rc_fun_tab[i].rc_type, rc_fun_tab[i].descr, &cme); - if (i == 0) - { - cme_c = cme; - if (EnableScore==0) - { - printf(" 最高\"列求和\"得分 ======================== %.0f\n", - compute_score(cme, cstandard[0].cref, cstandard[0].cbest)); - } - } - if (i == 1) - { - cme_rc = cme; - if (EnableScore==0) - { - printf(" 最高\"行和列求和\"得分 ====================== %.0f\n", - compute_score(cme, cstandard[1].cref, cstandard[1].cbest)); - } - } + cme = 100.0; + time_rc(rc_fun_tab[i].f, rc_fun_tab[i].rc_type, rc_fun_tab[i].descr, &cme); + if (i == 0) { + cme_c = cme; + if (EnableScore == 0) { + printf(" 鏈楂榎"鍒楁眰鍜孿"寰楀垎 ======================== %.0f\n", + compute_score(cme, cstandard[0].cref, cstandard[0].cbest)); + } + } + if (i == 1) { + cme_rc = cme; + if (EnableScore == 0) { + printf(" 鏈楂榎"琛屽拰鍒楁眰鍜孿"寰楀垎 ====================== %.0f\n", + compute_score(cme, cstandard[1].cref, cstandard[1].cbest)); + } + } } - + if (EnableScore) - printf("%.2f\t %.0f\t %.2f\t %.0f\t 0\t 0\n",cme_c,compute_score(cme_c, cstandard[0].cref, cstandard[0].cbest), - cme_rc,compute_score(cme_rc, cstandard[1].cref, cstandard[1].cbest)); + printf("%.2f\t %.0f\t %.2f\t %.0f\t 0\t 0\n", cme_c, + compute_score(cme_c, cstandard[0].cref, cstandard[0].cbest), cme_rc, + compute_score(cme_rc, cstandard[1].cref, cstandard[1].cbest)); return 0; } diff --git a/perflab/matrix/rowcol_test.o b/perflab/matrix/rowcol_test.o new file mode 100644 index 0000000000000000000000000000000000000000..d214ba422f5888a0214a48ba51a5d31fc8e6681a GIT binary patch literal 16840 zcmbuGc|4WN|G*!+Buf-Yb)-egmbF3@bre}jn>ad_Hd`D;8xaw@?G%a@TBJo3ZA3R( zMAR)RZMG;pU}`_k2E|`ONanvk_ryKSNYRguzLKv1EQr zm@U~$fLU)9H@Zx^)K^~XG5D2q$$ON7 z-|a4Wg<-I|OMb~6z^l9D7qP}&@(XKx<@Ku8>h(cytQ`*Yp*`@kP(oaWAyl$S5IegOOErbGKe%?QZR1%`4?b zmx@H+6xsZv7X%jZDyCTQJpNhyl2NkO(ceYnxA^q-_O7jOOZ`1A-1r~yAMN7rMYCOC zLOCF+1mWbNsA7skHGa2J%8h@7y7W7ew%cq#m%U-nZ`q6j;|51%G|ag1@9pE8*?KTx zpsq_^5geo2L_*(yS!Cx5t_(2y>}v4k%5SR2(a6C8h_Z`!XIjHS=s@odZd?@jX1%u6 z|6``jytxVDqqW$UZANJs6ytcJd&T6pT!lP|ZIm1RqL>@c7W-HeU_Ag;uqAae#%YKf%W!DSK(rDNXmtoef&o@GA1>u@_T;3f$BA8#{?!%eaWhlIJ(35IOr zG4Dbz05{&O8}Cu7teen6fY7O_z@9i@BFb7OeDCeWv+*nXwxm9EZ?-Uf>9%^n$YG$N zqqqrRvb?$Dg;N*0K{zGRWoE?Urac4MW|Tt9(Qm!I!bR1mf@(`NPyi8^gX$%4{VB{F zrNG*%;_$`6u(17a6@ZY~5+JZsjb{p*b8<#|w`3+ZAK228o7tS3+_<%0nw@%9+vL9aj?HT#W@qq)`$KpQ{@U8Dl+k=4s#{crtw+rN;t_4{%3QDcmU z6&p0uIrYGPm(FI1C930@=r4U zU>%>Ixz}bFe{Mq$pMThHW2fEe2>GPyvM;al3_hIDI}q~zZo6!P<)<-s(>kV1{rohf z^64>6$rJ?>rnc%xWN6#WBU-5@(b5)g-re44zbMDxi|bj3VR!Z%80@!d)^3md*h$?F z)a_FhSJeh@s!Y~fusdg8!uC|VT+P;HNA5k>6tOFQ?C%gFCp$dxntbu2jA-jw(OXsD z7(Fh{m=nEqh|$Wb3eRjwciG8CkMCr-MQ@Z{XB@Vt#CpIwRrTr@=N#^HUtE&>V}ZiA zE0T^uj||;Z%_}$;PhNN*xpAK8ndP1H+KnO?7@IBJBwoB|K+o=5Oj^wgg>Pwpd2%M{ zY`$o$x}C>#9+cGFtGs4WwAWYDQD;wy>o5!=KDsf&GW9;dfwPH zQ7Uu7Gvc;8f7UuR%0VXM@7Y%McY7X)E}E<6_ak#z`sSXv z+S-q@L+X+T?#Z8G{{Hdmfr{h12S|Li79DhbR`FpeW%-&5%Y2T#j;PkP;Y!rj==Mq* zEu5iuA}>Blw4nRbFs5L4bkL9U`QEvWOHR5wyT}}GnHzcYkA`F4A`k1FpL9xlorlP0 z`x_^9SGhbty4cvRZo1=1*SHTyi*~Mh7%9qsv&L2=si*4EzPp!Y%Cl_l{#;zR)MkIp zE63|`hG!KoXGQY%xLIy?IO?F)wXN$f{|6OE_Q>k#+B{_rlw0;I%^#n#c3#!?iaP0G zXSl(YhjnYbn78sSiq0KpRo9$$476!78ELw;VpvY=E}uQESFiYw=_(XiSi8u`@bzQC z_xiHupX!4~WM8!KupXcx2MMCu99W9brNpd z_t;cf&2>H6z0WYFrY9MT$4?ZPN%#3~k@n#?WO(0x%9o6ltWpx$qN;q_c3rV#?0!p| ztrN62osXQIqnb3qop;6eqO`$aw^io?H_kp);1;(!YJqOe?X@Zv5r&bD-E(dhN=yB$ z>3>B{`fgobx5o=R57#!)u{pg1p00Z4;Mrxq?Ag0Tfw_+3RDGX1HWrpYm}qxXF5kIc z*SVg(3jjqB!CSRK?JR{umHMthxXc*~TCjs$0pMSSLj zhPV>{*n+{cj?bTLpYk?K`gw4czIu;|+1?{2Jm%m%^%>I&CwF^Qep0Eo`n;oK+JcAUOkapTxgDM{SvAQ0tU>p=l_D*UDj&{f zpZc7@T@-9Aw`|(j^s>A;XLJ1zX5?&IBcS8nrs z^Qrx=`sDsSJn5@unUzhRO6lVSF?Fxg%r{@v@J$|@Gik}~-dWXpTF+iKGM0_Prf!VR-*i7o+zx|6LBjHC75S*Z;QJp5ZmAa>v5y1J$hu?w=|ZouR+x=DhjU zCPQDy&Rfl#jFi|K_1f8RS4ABE*rWB&&aC&iSo^7~W6~$dl=gJp(9Haer#xeq?|d}4 zAh$8~`&qumB-?Hm2gx7N$C(19?_(Xz7jwlv|9L(4&!FsWCHZZQK^Jcv?HzNHJFKT& zpuKpFlR-n-{+r(8rAnjfJha+9UT1G{?i$f}b;fz23U0P;dX#pr1_70VfmT^!%DHNlALXlIA%qSGH3<{Z~WV+wIc3!tBil+!?-Q zwed`OnZmHNio09f^|xNiT@VwQ9ivz=ag5v;CDA8u&JL?>iM3MgypR%8SScCxA>-mC z?v@HWzLm1%`3(;(rmi+$xb=O{^Uu#uojG+)_rBWe)T&Q%1q%zb&ibY9b99P(eBqXi zdi_-qsg&Kd8&VHA9{*}$JYtBTqW4Kw_oi)Oe#v6?LB@}(|C*z(qCHvVrxO3hheD+d zGDlr>y5{Pu%xdi5|KY$NwKsp{AMd8U7}LKbIMQv&nos#7n{V9G2>7u2l4-`d%(n`u zK@S@~h2C`F`Tv}7e!p_o&Gjnz#fw`8etS7WY;twu%i8x95kD4GzTDf;`D04c19`=v zKc`HsS|Ov@@YQGN)2|kCw+mL@e`@?dH^QdX_1govgW_}JzC3tzO(eDQhR>7^?evTH z-|cTbV}7XKVT#$a=E4&PH=eE5$*hm+Jh`dsdE}f#gC*yU6YSUL&22Wg`7wF*?qrpv zhst|ACkHTDGge0*F)*tYxz#!T7QfT4X#LOctBsb7n04lmgZxbOt5HL*Ok8ujpVx9f z#ccN$LFu#MH6{Ho8r-ZmbibXk(zlaIc_nst)7}(YTctr04%(aLg$2!i8ztIyUN7av zHFfUOcB$Z_Uv^%Ii?_5s7`j3_*sm<=ii~f(`m!hdr(I(QEqdg&a{dV&iN&vuKl!=t z&qc{up1bZXG(5Qc&veP%=`-HQE;yck_J+29SV&!>O-`4D|CiL%>n{R^ntH6=Teo(j z@p>6U?Um&xu1~%wF7F+4_1vQuuDR!KUo2B@=(5m*6h|TpYA+-8Q1aEwubk( zXMUY(se3`(lKMx98O1w8$~%{6Ut6m%HtL#<`K35{tIZ>Vq`%*t?q?TRrL!+WP9%)-LWF-I12NT>Fi$zd8Ek z28UG@tuOzsEgoQe^?26M;S-;Bz70q!`aGl5^!YNCecrA+;&moAMX1!gEQOWWvT;Hd1Zuqm(OTHxpT?=QX92vZD+`cq57iR)J-IsKf| zt}vD8n3%^yUh`#>D(qHi7_8w>w`-4?`a<66`v|84s)ir`R#199_~X4R`#aWu^eu1> zk<$(NBRynfSKbpRg`~7~?w2Aq#0z#;fA26nw?!?&_vZ#lBpidrd_>NAqHS_Y@+RjGTv@i!2AQZn+# zk5glk{bN^OaCcEE8JXJRm$7J{)0(YOzVUX+wdTBewkwa{o?@;WF7LJT?UQLm+oz~~ zogshc?Vd}%AO6;8dibQ&Cr0dOd)xTBjkQl^R`PaUQe0u1qZ{kPcU`c=W%lwJW!;TIhaKd}ZOeewgNn9p}s93eJ9*w?AETS1{Y5ggbi%x=?P^)= zN31vJXfzh(Zi>BTF>_LE^9t#u>f_vA$eA}BdHI>w|H{GVS)Tb@nwwwhh7?qVAO9wH zr#E+^>Xj#q#uL$$LUVQq}i}Q(hkIgzBmzmq-Zd`MuW|rr}d0Rz8 z1BPf0)pyG=ckel}^5q$hEVH}qUW!Ha&^#}0aLCBbN?!+fEnmn*s|>HBxBt5C`ncQ+ zsMnWmI`T!r#mIW!`v-HYW0`+=u9XQvEx48Ep>9&v)LD9oOHb{iDL$Z{znQ z6}}sy|93&$?3|#Db#W4Maj*1mcvdEU3q13XQQS*6)Sm69hHsW-`V6kHsA(Eakd@{*{Y&8zMNm&d=8j6FI- zIZ;p(EFIJD`0X6yQ?2*2>k@)*?3AciyP16E%)L!Ns&X!_O}!kNSJ<$jsNv~Uqf^!) zFK%rYOUU*$R?Vz=edhkY4>%k~K}6$8{Kl=%AC z8$W!h_9%_-!_AMFws8KrpW}DcUg(r^Qsncf^)eAH9K3$Q()Mn5mWkF^unEyv>zeaU zVzJgIIlf8tQ5&UQ10L3^oSa&~`=RC=CNG(E$Y^_({W96=sm2B_Z>Jx>)3e3;R!R53 z?1M+#Z9eIB<$9&7a$iiNxXv`8^zxSt|O?gto5 z%EF8hS?9utgeiy&m6ej-0hA$EM|GMow~CK$OFC5ucChu%T&k3V(=B zlT+O;HcgI`BtA_}D^bE)PCr)CTFxw5%0|vf^r?)T86ehjT0jG60*h&K($@VMGhn_4 z`K^RGs7Zpp62+|LII-fuB3i;m&T6HMoE8Aqgeev9c7S zPx1MP!*xrhC9EL9l#>vY`K&C(SW$dA;#`Ucu(AZhzEURY1Z-K7VPE?ad>tqcB-SBb zcTs>Vf|dI#NefGHm?IfaD10H!>T`s8xOy)Tq5oY%%&_M>D5rxTtFM46kiikm@x&v3 z0&uK~EZFQEI70uq@I(WN-2*^g2!4uSCE|p8QUX9k`;=g5tRFi=k0%RxRLawF9Z7D7iWS4*Lnr|@dQ300$nf0E6_hE zRN%u4@d^y~VRY@MPlN|UcrbzoV|Xxt2NQTOg$FZum;?_7`s|UdH()IdSW^Sm)<9^? z8XA}~-hphRj2CQ^31*M}A%dmB0tN=d3kK1I1pD|h!R*mDD0oGHz?TuM@b`grYkY$F zfgwIjh`>8ARKWNzUFE}@?&QkjhlYeRzFy&80IdjMyq5}=GQmp&yqORm0S|JXm!Hql zV4l}fujM|VS)Olb08g-#&+vGlaj#W8ujQ+FzDrm5fq?`G?}Tu=hJzpYzQHEEvq3(B z;CNX=&LKFyo&<$jN>^BRHx5hTx=r8^JlCjN9!c_(+0Fg7pa#9nV05!@URl!TSW%C)+h6 z^x+&~eJjM_c|MBZd_o^ym9T!`f9T&J^v3{w@X3iyl>~>^ChX@C;YWku9fZCT!M_sv zS_EeY2FNh}aReU(2LL3P!D}dPmxDOe*CzNRLZ4hGwgiXwYS@nx!O8ym5gdLp#QJLx zhwYN_=Mwtx8ZErOf_<3a9Kz2{f{!HlU4oPA;T^$A{T_mo`U+_4hjHo<{?!nNW*mag zAoO(!?m+145!{{PupO9I5*&`_7qotI2~N&SDdNz6H2C3hensdHCOCd>h4HHpyob<- zdldE~jsk-jIbU*!Lq9_Z{lNrRBlsAC4<)!h;xHrqFDLX#|3L&N{l^oW?C(y3ll?6u zIDGEF{VhctW@J3~2(C})OM@~@@H#-=J>vT$$l=cdjW52lg$&*;!?gtyj2R}(g%Sh) zqDGkDX^)mF3$zi3`uN!jA_IcMx;iAAP+SwqHWYV7`^#*K>mWXl;^rtQH;P9feLlrE zB6$VH=ORD&`UPW$zbvG;fzrog9Y^uo$WJQ8QxQK#@!^P{r}!BpU#B=c_h7m!%-9%V zocoddgyOZxe-p)v(R%nuaX&O};1-ZgcpQ3=Jb>cn$X<=&1&EKO_(3EaQG6JR$C~1= zQ2cW!J{EB|ioFZGX zpOHL;;_Hw+lj68NM~W*V{~;8|&qo_6?vCUvir+$hE>JuWjoW34+aX>|@wdoN6UApC zSp|(N9-m^QKb+zZk*rPeQsid^#f?xrYbb7l^tVu47Rjj;Z$R={isSpjs}#SF;lLH6EXJXnfSrbq$K}z6&Q!#IZhp6xJj3IanFwW|aOeI9}h|&&+AG;ej-69HsvQRqP@3;qwbl>6HF5cpjsX{$zr~ zdl-0650fRu;j<`A(+LiJNg+QD1c!d6LS>MgDQ+snjPMUVh4DjuKg9WzJ|FS51c&W< zBEFg6(9b@^V_@m9tSZOaZ}R zhZbHl(ERoi9Ioe)Xuiat021^Ki|U9=6CBpz>)Jqq!y4={vI4o{Myny17h?i2l5b;|SKZp2Tg2Q$j(Re;0 zI4pJ|{W^-{`>JM&k4EQX8^u)+@1gj1#D7ve5pgl}(2nQ54#lZK@gInr5FBmEpcmkq zE5YGX5dNP6!_1@jD6~$3DUP2Dq6rQ|+yR|~bO3QUmhjgNAxMV^edL|6+o} zAxT5yP)6~6h*wiQAMuA2KZAH3;&?pqJzN{X$@{=B1jo9t17fg~AmRST3q?j2aqJ)8 zSBxk0VZSO-JbDx_N8FO)9f(h(cr)TN2@Z?+ymlryY*z&>3RjBDBhII|BjWh)`e8Nn zV~V(d(yvB5jN+w;Z$uoAPZ#h7({_TBt{Fx!mcqB*|aVvhTU zpI;##N%(n4_#rud|A6(eA9UNo_7#_*JN&H=>Z2~Ptze&U9AFpmd-ySIt#HgBUyfvL zil-nMz5|B|`#C8TnTaUJ{05Todo0ZFBW_LUYhpcobJf`rZ`RKzs$o#~~hqI26h8=O7Ll$;~K^*OooS@xFxT0sF!C6}6N; zeoka`L&E*xXHMM^L9j4ij!VHleoJ))K4Ahzm;GF-%MS@*bb|w-0M!k2!6#l)1Rr>j z=oKP>buUL0rqfi$#V|6w1Nka@cp__Vi*oCpd5Z9f#z^D*ei+yO>=(E!A1FZ}F_tkzCY9HIf_F$Lj z_D=xTSO3dV`}p}0sIdv#WBDj3^wmE52ZNw45(Ny~hu?&-J?=kzj|5fpaRn&D#DO34 zK^jMH?m)(BcpQoW>&xD05U2p@nlSryr0uIf4Qu~PLs*bQ9~$v@8eEOr#X0 #include #include -#include -//#include -#include -#include +#include +// #include #include "clock.h" +#include +#include /* Use x86 cycle counter */ @@ -27,203 +27,195 @@ static unsigned cyc_lo = 0; /* Set *hi and *lo to the high and low order bits of the cycle counter. Implementation requires assembly code to use the rdtsc instruction. */ -void access_counter(unsigned *hi, unsigned *lo) -{ +void access_counter(unsigned *hi, unsigned *lo) { - long long counter; + long long counter; - counter = __rdtsc(); - (*hi) = (unsigned int)(counter >> 32); - (*lo) = (unsigned int)counter; -/* + counter = __rdtsc(); + (*hi) = (unsigned int)(counter >> 32); + (*lo) = (unsigned int)counter; + /* - LARGE_INTEGER lPerformanceCount; + LARGE_INTEGER lPerformanceCount; - QueryPerformanceCounter(&lPerformanceCount); - (*hi) = (unsigned int)lPerformanceCount.HighPart; - (*lo) = (unsigned int)lPerformanceCount.LowPart; -// printf("%08X %08X\n",(*hi),(*lo)); -*/ + QueryPerformanceCounter(&lPerformanceCount); + (*hi) = (unsigned int)lPerformanceCount.HighPart; + (*lo) = (unsigned int)lPerformanceCount.LowPart; + // printf("%08X %08X\n",(*hi),(*lo)); + */ } - /* Record the current value of the cycle counter. */ -void start_counter() -{ - access_counter(&cyc_hi, &cyc_lo); -} +void start_counter() { access_counter(&cyc_hi, &cyc_lo); } /* Return the number of cycles since the last call to start_counter. */ -double get_counter() -{ - unsigned ncyc_hi, ncyc_lo; - unsigned hi, lo, borrow; - double result; +double get_counter() { + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; - /* Get cycle counter */ - access_counter(&ncyc_hi, &ncyc_lo); + /* Get cycle counter */ + access_counter(&ncyc_hi, &ncyc_lo); - /* Do double precision subtraction */ - lo = ncyc_lo - cyc_lo; - borrow = cyc_lo > ncyc_lo; - hi = ncyc_hi - cyc_hi - borrow; - result = (double) hi * (1 << 30) * 4 + lo; - return result; + /* Do double precision subtraction */ + lo = ncyc_lo - cyc_lo; + borrow = cyc_lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double)hi * (1 << 30) * 4 + lo; + return result; } -void make_CPU_busy(void) -{ - volatile double old_tick,new_tick; - start_counter(); - old_tick = get_counter(); - new_tick = get_counter(); - while (new_tick - old_tick < 1000000000) - new_tick = get_counter(); +void make_CPU_busy(void) { + volatile double old_tick, new_tick; + start_counter(); + old_tick = get_counter(); + new_tick = get_counter(); + while (new_tick - old_tick < 1000000000) + new_tick = get_counter(); } -//CPU的频率 -double mhz(int verbose) -{ - LARGE_INTEGER lFrequency; - LARGE_INTEGER lPerformanceCount_Start; - LARGE_INTEGER lPerformanceCount_End; - double mhz; - double fTime; - __int64 _i64StartCpuCounter; - __int64 _i64EndCpuCounter; - //On a multiprocessor machine, it should not matter which processor is called. - //However, you can get different results on different processors due to bugs in - //the BIOS or the HAL. To specify processor affinity for a thread, use the SetThreadAffinityMask function. - HANDLE hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); +// CPU锟斤拷频锟斤拷 +double mhz(int verbose) { + LARGE_INTEGER lFrequency; + LARGE_INTEGER lPerformanceCount_Start; + LARGE_INTEGER lPerformanceCount_End; + double mhz; + double fTime; + __int64 _i64StartCpuCounter; + __int64 _i64EndCpuCounter; + // On a multiprocessor machine, it should not matter which processor is + // called. However, you can get different results on different processors due + // to bugs in the BIOS or the HAL. To specify processor affinity for a thread, + // use the SetThreadAffinityMask function. + HANDLE hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); - //主板上高精度定时器的晶振频率 - //这个定时器应该就是一片8253或者8254 - //在intel ich7中集成了8254 - QueryPerformanceFrequency(&lFrequency); -// if (verbose>0) -// printf("高精度定时器的晶振频率:%1.0fHz.\n",(double)lFrequency.QuadPart); + // 锟斤拷锟斤拷锟较高撅拷锟饺讹拷时锟斤拷锟侥撅拷锟斤拷频锟斤拷 + // 锟斤拷锟斤拷锟绞憋拷锟接︼拷镁锟斤拷锟揭黄1锟78253锟斤拷锟斤拷8254 + // 锟斤拷intel ich7锟叫硷拷锟斤拷锟斤拷8254 + QueryPerformanceFrequency(&lFrequency); + // if (verbose>0) + // printf("锟竭撅拷锟饺讹拷时锟斤拷锟侥撅拷锟斤拷频锟绞o拷%1.0fHz.\n",(double)lFrequency.QuadPart); - //这个定时器每经过一个时钟周期,其计数器会+1 - QueryPerformanceCounter(&lPerformanceCount_Start); + // 锟斤拷锟斤拷锟绞憋拷锟矫匡拷锟斤拷锟揭伙拷锟绞憋拷锟斤拷锟斤拷冢锟斤拷锟斤拷锟斤拷锟斤拷锟斤拷+1 + QueryPerformanceCounter(&lPerformanceCount_Start); - //RDTSC指令:获取CPU经历的时钟周期数 - _i64StartCpuCounter=__rdtsc(); + // RDTSC指锟斤拷:锟斤拷取CPU锟斤拷锟斤拷锟斤拷时锟斤拷锟斤拷锟斤拷锟斤拷 + _i64StartCpuCounter = __rdtsc(); - //延时长一点,误差会小一点 - //int nTemp=100000; - //while (--nTemp); - Sleep(200); + // 锟斤拷时锟斤拷一锟斤拷,锟斤拷锟斤拷小一锟斤拷 + // int nTemp=100000; + // while (--nTemp); + Sleep(200); - QueryPerformanceCounter(&lPerformanceCount_End); + QueryPerformanceCounter(&lPerformanceCount_End); - _i64EndCpuCounter=__rdtsc(); + _i64EndCpuCounter = __rdtsc(); - //f=1/T => f=计数次数/(计数次数*T) - //这里的“计数次数*T”就是时间差 - fTime=((double)lPerformanceCount_End.QuadPart-(double)lPerformanceCount_Start.QuadPart) - /(double)lFrequency.QuadPart; + // f=1/T => f=锟斤拷锟斤拷锟斤拷锟斤拷/(锟斤拷锟斤拷锟斤拷锟斤拷*T) + // 锟斤拷锟斤拷摹锟斤拷锟斤拷锟斤拷锟斤拷锟1锟7*T锟斤拷锟斤拷锟斤拷时锟斤拷锟1锟7 + fTime = ((double)lPerformanceCount_End.QuadPart - + (double)lPerformanceCount_Start.QuadPart) / + (double)lFrequency.QuadPart; - mhz = (_i64EndCpuCounter-_i64StartCpuCounter)/(fTime*1000000.0); - if (verbose>0) - printf("CPU频率为:%1.6fMHz.\n",mhz); - return mhz; + mhz = (_i64EndCpuCounter - _i64StartCpuCounter) / (fTime * 1000000.0); + if (verbose > 0) + printf("CPU频锟斤拷为:%1.6fMHz.\n", mhz); + return mhz; } -double CPU_Factor1(void) -{ - double result; - int i,j,k,ii,jj,kk; - LARGE_INTEGER lStart,lEnd; +double CPU_Factor1(void) { + double result; + int i, j, k, ii, jj, kk; + LARGE_INTEGER lStart, lEnd; LARGE_INTEGER lFrequency; HANDLE hThread; double fTime; QueryPerformanceFrequency(&lFrequency); - ii = 43273; - kk = 1238; - result = 1; - jj = 1244; + ii = 43273; + kk = 1238; + result = 1; + jj = 1244; - hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); + hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); QueryPerformanceCounter(&lStart); //_asm("cpuid"); - start_counter(); - for (i=0;i<100;i++) - for (j=0;j<1000;j++) - for (k=0;k<1000;k++) - kk += kk*ii+jj; + start_counter(); + for (i = 0; i < 100; i++) + for (j = 0; j < 1000; j++) + for (k = 0; k < 1000; k++) + kk += kk * ii + jj; - result = get_counter(); - QueryPerformanceCounter(&lEnd); - fTime=((double)lEnd.QuadPart-(double)lStart.QuadPart); - printf("CPU运行时间为%f",result); - printf("\t %f\n",fTime); - return result; + result = get_counter(); + QueryPerformanceCounter(&lEnd); + fTime = ((double)lEnd.QuadPart - (double)lStart.QuadPart); + printf("CPU锟斤拷锟斤拷时锟斤拷为%f", result); + printf("\t %f\n", fTime); + return result; } -double CPU_Factor(void) -{ - double frequency; - double multiplier = 1000 * 1000 * 1000;//nano - LARGE_INTEGER lFrequency; - LARGE_INTEGER start,stop; - HANDLE hThread; - int i; - const int gigahertz= 1000*1000*1000; - const int known_instructions_per_loop = 27317; +double CPU_Factor(void) { + double frequency; + double multiplier = 1000 * 1000 * 1000; // nano + LARGE_INTEGER lFrequency; + LARGE_INTEGER start, stop; + HANDLE hThread; + int i; + const int gigahertz = 1000 * 1000 * 1000; + const int known_instructions_per_loop = 27317; - int iterations = 100000000; - int g = 0; - double normal_ticks_per_second; -double ticks; -double time; -double loops_per_sec; -double instructions_per_loop; -double ratio; -double actual_freq; + int iterations = 100000000; + int g = 0; + double normal_ticks_per_second; + double ticks; + double time; + double loops_per_sec; + double instructions_per_loop; + double ratio; + double actual_freq; - QueryPerformanceFrequency(&lFrequency); - frequency = (double)lFrequency.QuadPart; + QueryPerformanceFrequency(&lFrequency); + frequency = (double)lFrequency.QuadPart; - hThread=GetCurrentThread(); - SetThreadAffinityMask(hThread,0x1); - QueryPerformanceCounter(&start); - for( i = 0; i < iterations; i++) - { - g++; - g++; - g++; - g++; - } - QueryPerformanceCounter(&stop); + hThread = GetCurrentThread(); + SetThreadAffinityMask(hThread, 0x1); + QueryPerformanceCounter(&start); + for (i = 0; i < iterations; i++) { + g++; + g++; + g++; + g++; + } + QueryPerformanceCounter(&stop); - //normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ 3199 - normal_ticks_per_second = frequency * 1000; - ticks = (double)((double)stop.QuadPart - (double)start.QuadPart); - time = (ticks * multiplier) /frequency; - loops_per_sec = iterations / (time/multiplier); - instructions_per_loop = normal_ticks_per_second / loops_per_sec; + // normal ticks differs from the WMI data, i.e 3125, when WMI 3201, and CPUZ + // 3199 + normal_ticks_per_second = frequency * 1000; + ticks = (double)((double)stop.QuadPart - (double)start.QuadPart); + time = (ticks * multiplier) / frequency; + loops_per_sec = iterations / (time / multiplier); + instructions_per_loop = normal_ticks_per_second / loops_per_sec; - ratio = (instructions_per_loop / known_instructions_per_loop); - actual_freq = normal_ticks_per_second / ratio; -/* - actual_freq = normal_ticks_per_second / ratio; - actual_freq = known_instructions_per_loop*iterations*multiplier/time; + ratio = (instructions_per_loop / known_instructions_per_loop); + actual_freq = normal_ticks_per_second / ratio; + /* + actual_freq = normal_ticks_per_second / ratio; + actual_freq = known_instructions_per_loop*iterations*multiplier/time; - 2293 = x/time; - - 2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000 - loops_per_sec = iterations*frequency / ticks - - instructions_per_loop = / loops_per_sec; -*/ - printf("Perf counter freq: %f\n", normal_ticks_per_second); - printf("Loops per sec: %f\n", loops_per_sec); - printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop); - printf("Presumed freq: %f\n", actual_freq); - printf("ratio: %f\n", ratio); - printf("time=%f\n",time); - return ratio; + 2293 = x/time; + + 2292.599713*1191533038.809362=known_instructions_per_loop*100000000*1000 + loops_per_sec = iterations*frequency / ticks + + instructions_per_loop = / loops_per_sec; + */ + printf("Perf counter freq: %f\n", normal_ticks_per_second); + printf("Loops per sec: %f\n", loops_per_sec); + printf("Perf counter freq div loops per sec: %f\n", instructions_per_loop); + printf("Presumed freq: %f\n", actual_freq); + printf("ratio: %f\n", ratio); + printf("time=%f\n", time); + return ratio; } diff --git a/perflab/poly/poly.cu b/perflab/poly/poly.cu new file mode 100644 index 0000000..73347fe --- /dev/null +++ b/perflab/poly/poly.cu @@ -0,0 +1,325 @@ +/************************************************************************** + 澶氶」寮忚绠楀嚱鏁般傛寜涓嬮潰鐨勮姹傜紪杈戞鏂囦欢锛 + 1. 灏嗕綘鐨勫鍙枫佸鍚嶏紝浠ユ敞閲婄殑鏂瑰紡鍐欏埌涓嬮潰锛 + 2. 瀹炵幇涓嶅悓鐗堟湰鐨勫椤瑰紡璁$畻鍑芥暟锛 + 3. 缂栬緫peval_fun_rec peval_fun_tab鏁扮粍锛屽皢浣犵殑鏈濂界殑绛旀 + 锛堟渶灏廋PE銆佹渶灏廋10锛変綔涓烘暟缁勭殑鍓嶄袱椤 +***************************************************************************/ + +/* + 瀛﹀彿锛201209054233 + 濮撳悕锛氬鍗婂姞鐝媯 +*/ + + + +#include +#include +#include +typedef int (*peval_fun)(int*, int, int); + +typedef struct { + peval_fun f; + char *descr; +} peval_fun_rec, *peval_fun_ptr; + + +/************************************************************************** + Edit this comment to indicate your name and Andrew ID +#ifdef ASSIGN + Submission by Harry Q. Bovik, bovik@andrew.cmu.edu +#else + Instructor's version. + Created by Randal E. Bryant, Randy.Bryant@cs.cmu.edu, 10/07/02 +#endif +***************************************************************************/ + +/* + 瀹炵幇涓涓寚瀹氱殑甯哥郴鏁板椤瑰紡璁$畻 + 绗竴娆★紝璇风洿鎺ヨ繍琛岀▼搴忥紝浠ヤ究鑾风煡浣犻渶瑕佸疄鐜扮殑甯哥郴鏁版槸鍟 +*/ +int const_poly_eval(int *not_use, int not_use2, int x) +{ + int result = 0; +/* int i; + int xpwr = 1; // x鐨勫箓娆 + int a[4] = {21,90,42,88}; + for (i = 0; i <= 3; i++) { + result += a[i]*xpwr; + xpwr *= x; + } +*/ +// 90 = 64 + 32 - 4 - 2 +// 42 = 32 + 8 + 2 +// 88 = 64 + 16 + 8 + int x64,x32,x16,x8,x4,x2; + + x64 = x << 6; + x32 = x << 5; + x16 = x << 4; + x8 = x << 3; + x4 = x << 2; + x2 = x << 1; + result = 21 + x64+x32-x4-x2 + ((x32+x8+x2) + (x64+x16+x8)*x)*x; + return result; +} + + + +/* 澶氶」寮忚绠楀嚱鏁般傛敞鎰忥細杩欎釜鍙槸涓涓弬鑰冨疄鐜帮紝浣犻渶瑕佸疄鐜拌嚜宸辩殑鐗堟湰 */ + +/* + 鍙嬫儏鎻愮ず锛歭cc鏀寔ATT鏍煎紡鐨勫祵鍏ュ紡姹囩紪锛屼緥濡 + + _asm("movl %eax,%ebx"); + _asm("pushl %edx"); + + 鍙互鍦╨cc涓璸roject->configuration->Compiler->Code Generation->Generate .asm锛 + 灏嗗叾閫変腑鍚庯紝鍙互鍦╨cc鐩綍涓嬮潰鐢熸垚瀵瑰簲绋嬪簭鐨勬眹缂栦唬鐮佸疄鐜般傞氳繃鏌ョ湅姹囩紪鏂囦欢锛 + 浣犲彲浠ヤ簡瑙g紪璇戝櫒鏄浣曞疄鐜颁綘鐨勪唬鐮佺殑銆傛湁浜涘疄鐜板彲鑳介潪甯镐綆鏁堛 + 浣犲彲浠ュ湪閫傚綋鐨勫湴鏂瑰姞鍏ュ祵鍏ュ紡姹囩紪锛屾潵澶у箙搴︽彁楂樿绠楁ц兘銆 +*/ + +int poly_eval(int *a, int degree, int x) +{ + int result = 0; + int i; + int xpwr = 1; /* x鐨勫箓娆 */ +// printf("闃=%d\n",degree); + for (i = 0; i <= degree; i++) { + result += a[i]*xpwr; + xpwr *= x; + } + return result; +} + +/* CUDA浼樺寲鐨勫椤瑰紡璁$畻鍑芥暟 - 浣嶤PE鐗堟湰 */ +int cuda_poly_eval_low_cpe(int *a, int degree, int x) +{ + // 瀵逛簬浣嶤PE鐗堟湰锛屾垜浠娇鐢–UDA骞惰璁$畻澶氶」寮忕殑鍚勪釜椤 + // 鐒跺悗灏嗙粨鏋滀紶鍥炰富鏈鸿繘琛屾眰鍜 + + // 鍒嗛厤璁惧鍐呭瓨 + int *d_a, *d_results; + cudaError_t err; + + // 鍒嗛厤鍐呭瓨 + err = cudaMalloc(&d_a, (degree + 1) * sizeof(int)); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + return 0; + } + + err = cudaMalloc(&d_results, (degree + 1) * sizeof(int)); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + return 0; + } + + // 灏嗙郴鏁颁粠涓绘満澶嶅埗鍒拌澶 + err = cudaMemcpy(d_a, a, (degree + 1) * sizeof(int), cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + cudaFree(d_results); + return 0; + } + + // 瀹氫箟CUDA鏍稿嚱鏁 + dim3 blockDim(256); + dim3 gridDim((degree + 1 + blockDim.x - 1) / blockDim.x); + + // 鍚姩鏍稿嚱鏁 + cudaPolyEvalLowCPE<<>>(d_a, degree, x, d_results); + + // 妫鏌ユ牳鍑芥暟鎵ц閿欒 + err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + cudaFree(d_results); + return 0; + } + + // 鍒嗛厤涓绘満鍐呭瓨鐢ㄤ簬缁撴灉 + int *h_results = (int *)malloc((degree + 1) * sizeof(int)); + if (h_results == NULL) { + printf("Memory allocation error\n"); + cudaFree(d_a); + cudaFree(d_results); + return 0; + } + + // 灏嗙粨鏋滀粠璁惧澶嶅埗鍥炰富鏈 + err = cudaMemcpy(h_results, d_results, (degree + 1) * sizeof(int), cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + free(h_results); + cudaFree(d_a); + cudaFree(d_results); + return 0; + } + + // 鍦ㄤ富鏈轰笂姹傚拰 + int result = 0; + for (int i = 0; i <= degree; i++) { + result += h_results[i]; + } + + // 閲婃斁鍐呭瓨 + free(h_results); + cudaFree(d_a); + cudaFree(d_results); + + return result; +} + +/* CUDA浼樺寲鐨勫椤瑰紡璁$畻鍑芥暟 - 10闃朵紭鍖栫増鏈 */ +int cuda_poly_eval_degree10(int *a, int degree, int x) +{ + // 瀵逛簬10闃跺椤瑰紡锛屾垜浠彲浠ヤ娇鐢ㄦ洿浼樺寲鐨勬柟娉 + // 浣跨敤CUDA骞惰璁$畻锛屼絾閽堝10闃跺椤瑰紡杩涜鐗规畩浼樺寲 + + // 鍒嗛厤璁惧鍐呭瓨 + int *d_a, *d_result; + cudaError_t err; + + // 鍒嗛厤鍐呭瓨 + err = cudaMalloc(&d_a, (degree + 1) * sizeof(int)); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + return 0; + } + + err = cudaMalloc(&d_result, sizeof(int)); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + return 0; + } + + // 灏嗙郴鏁颁粠涓绘満澶嶅埗鍒拌澶 + err = cudaMemcpy(d_a, a, (degree + 1) * sizeof(int), cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + cudaFree(d_result); + return 0; + } + + // 瀹氫箟CUDA鏍稿嚱鏁 + dim3 blockDim(256); + dim3 gridDim(1); // 鍙渶瑕佷竴涓潡锛屽洜涓烘垜浠彧闇瑕佷竴涓粨鏋 + + // 鍚姩鏍稿嚱鏁 + cudaPolyEvalDegree10<<>>(d_a, degree, x, d_result); + + // 妫鏌ユ牳鍑芥暟鎵ц閿欒 + err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + cudaFree(d_result); + return 0; + } + + // 鑾峰彇缁撴灉 + int result; + err = cudaMemcpy(&result, d_result, sizeof(int), cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + printf("CUDA Error: %s\n", cudaGetErrorString(err)); + cudaFree(d_a); + cudaFree(d_result); + return 0; + } + + // 閲婃斁鍐呭瓨 + cudaFree(d_a); + cudaFree(d_result); + + return result; +} + +/* CUDA鏍稿嚱鏁 - 浣嶤PE鐗堟湰 */ +__global__ void cudaPolyEvalLowCPE(int *a, int degree, int x, int *results) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx <= degree) { + // 璁$畻x鐨勫箓 + int xpwr = 1; + for (int i = 0; i < idx; i++) { + xpwr *= x; + } + + // 璁$畻杩欎竴椤圭殑缁撴灉 + results[idx] = a[idx] * xpwr; + } +} + +/* CUDA鏍稿嚱鏁 - 10闃朵紭鍖栫増鏈 */ +__global__ void cudaPolyEvalDegree10(int *a, int degree, int x, int *result) +{ + // 浣跨敤鍏变韩鍐呭瓨鏉ュ瓨鍌ㄤ腑闂寸粨鏋 + __shared__ int shared_result; + + // 鍙湁绗竴涓嚎绋嬪垵濮嬪寲鍏变韩缁撴灉 + if (threadIdx.x == 0) { + shared_result = 0; + } + __syncthreads(); + + // 姣忎釜绾跨▼璁$畻涓閮ㄥ垎椤 + int local_result = 0; + int xpwr = 1; + + // 璁$畻x鐨勫箓 + for (int i = 0; i < threadIdx.x; i++) { + xpwr *= x; + } + + // 璁$畻杩欎竴椤圭殑缁撴灉 + if (threadIdx.x <= degree) { + local_result = a[threadIdx.x] * xpwr; + } + + // 浣跨敤鍘熷瓙鎿嶄綔绱姞缁撴灉 + atomicAdd(&shared_result, local_result); + + // 鍚屾鎵鏈夌嚎绋 + __syncthreads(); + + // 鍙湁绗竴涓嚎绋嬪皢缁撴灉鍐欏洖鍏ㄥ眬鍐呭瓨 + if (threadIdx.x == 0) { + *result = shared_result; + } +} + +/* + 杩欎釜琛ㄦ牸鍖呭惈澶氫釜鏁扮粍鍏冪礌锛屾瘡涓缁勫厓绱狅紙鍑芥暟鍚嶅瓧, "鎻忚堪瀛楃涓"锛 + 灏嗕綘璁や负鏈濂界殑涓や釜瀹炵幇锛屾斁鍦ㄦ渶鍓嶉潰銆 + 姣斿锛 + {my_poly_eval1, "瓒呯骇鍨冨溇瀹炵幇"}, + {my_poly_eval2, "濂戒竴鐐圭殑瀹炵幇"}, +*/ + +peval_fun_rec peval_fun_tab[] = +{ + + /* 绗竴椤癸紝搴斿綋鏄綘鍐欑殑鏈濂紺PE鐨勫嚱鏁板疄鐜 */ + {cuda_poly_eval_low_cpe, "CUDA optimized low CPE implementation"}, + /* 绗簩椤癸紝搴斿綋鏄綘鍐欑殑鍦10闃舵椂鍏锋湁鏈濂芥ц兘鐨勫疄鐜 */ + {cuda_poly_eval_degree10, "CUDA optimized degree 10 implementation"}, + + {poly_eval, "poly_eval: 鍙傝冨疄鐜"}, + + /* 涓嬮潰鐨勪唬鐮佷笉鑳戒慨鏀规垨鑰呭垹闄わ紒锛佽〃鏄庢暟缁勫垪琛ㄧ粨鏉 */ + {NULL, ""} +}; + + + + + + + diff --git a/perflab/poly/poly.o b/perflab/poly/poly.o new file mode 100644 index 0000000000000000000000000000000000000000..1b650e094e30917945ca4d03ef8e9455ec3709fb GIT binary patch literal 5872 zcmbuD2~?BE7RSFt5d>wm3QC_5tRlq_P(i6;ScQNKSeGYC2#^F65)45RZ9xHBN~wC( z3MxH-qM)?osVHtNrM7_30515fwYWUNXRRWNfL7n!Fr%L%o}TkM=X~7x|K{GgvwcfQ z_6hXmaySer9LAk_tq5fpjp^&UP*)W)R*WT+-kmtpFQU5~Vw~y%v%F6XmdW@gW7Bj# zvaOk}eyKkVHYt5*Qu;loGjPUF3HLKa z&Gm0F+@OdcZ=1CD%%|yBad&LqG$B}6P!Q*z88w|5m3PD8vGvA)yxg*8e$LItpMS|& zr&QSb1&!t%jjO6md~#h>>MvTZim+dK_?yGQd;0uOZP?%$U3^o&v^wa%{YIM;Hl1TM z?)vU2-eK1{|2mho&Hkukm`0eTukDVo`{npiy}Vy5a(gUlV{e%Vc3)ZNQrPMK^7z3Y zfAvq-B-q=;E?2i#RN6JIy;zuVA-$*c>7N9 zx_;ZnV|)H>8+&(za82Iv=h@2&Z58MCUCdLbt`EI?ZuaOZqp4q~J??c=PBvi2X>`X6tITwgGzWapYdY5V?T+gAn$%$>M0 zm8Y&zEB#a-EvQJIr1?Trq%7j@GuJONtiNvLP;i4&Q=hZtsc(1T)m!32CI3hgt?)h+ z@p0QH>dRfuYfE~srRsgX?NP2{OZT@IHec-={-h}XMPt;h*)!Hyh$lX(34LUd(|f;t zn%8Md@#9nZp*{zm{pZYWU(p-~?dQ_SxJwfqmt~H=x$0mKN2Xo%(gCew|JoV6aSLX? zq23bY+;rg8xn|XW}$+tO_bjGT+<Tb-EL z@qWni$6wi|UG6Ra+MTiN_-=i|+>(NdsU?*@EIA3z~n=Cr-AK2npt1XUX%#!Mc^^L!m63nzTjsKCENsZBRH#!3-zSlrv_SzQI=H46iq9 zT?@nbU<*uu>mOnJE(7@nGo3F7Y9I7!G;g*J(vAL225Jza|2EX|q3_^^@8HAV!A;)5 zSuc6)VT`OfYy4zw;AREB0Q-^ku|(QTw;TNpLfo!<@$w^>2&E!kEq?VDX5!#oES4oG z#A-=6@kJKalRYc)Z~#7tciYS9sU%%LClDRE+P$h#D4GJZ0iT+C-v{$yW$ zDZUcxxfHjBfuZtoztQa#9dLzYu!n%gyhp=v@ zxE>zQ?-WnRx))n;8;0v(xHyi?YddZmjyV|iemSuGhoClc4g?e@Ii|5Vtfw{BvsoPa zA?L@7;^f?Z%;L}wng0qFhkm@#1w}$}_kPSMC?1NrisFkgU(4cD=;tivbre5| z`DKbXVSbh34VeGN;_UtEU~$O%0Q>(=@f(;wqj)dYFDd>MYh!#^;XFhCT&&G0P7d`r ziVw&71BwsDdJ4tuv38~Sc&vRWF2dTM;=Wifp?E0PArxPZHOvtT%on~BNFnbBSQhN6 zYqG%(;1ADjXRIA4z5;9Za~S+1F(>awx_=&Shn(!ZQz=f)Im3@%7YnaSe!N=MZ=*;} zElpH2{78vf!tlf6;~Bn62?9Kw_)bVlV~kG|L@$Ezg~ ztFV?U68Lc{Wt>!{UWao@qr@_mBvy*o~9co zIS+pZDB#O36On-X`~QuHa|eBrbBLStPpXi)A?;1UHVT}G$)L#}Mqf12^F!#gx?Xbo8RXovf9}JX#I{q{+hoq=L ze)v8jX-IwvVgu!m!#7_&YG)S`PZ<2{A1Gdk7u4e~#-B#wNr%{Ogh`jwX+L}Ck(!K2FlOFKPkenBjkr~VG>XB z??(*s3rGX1L-`! double CPU_Mhz; @@ -17,7 +18,7 @@ static int coeff[MAXDEGREE+1]; #define MAX_ITER_COUNT 100 -#define REF_CPU_MHZ 2292.6 // 这是我的处理器主频 +#define REF_CPU_MHZ 2292.6 // 锟斤拷锟斤拷锟揭的达拷锟斤拷锟斤拷锟斤拷频 /* Define performance standards */ static struct { @@ -26,7 +27,7 @@ static struct { } cstandard[3] = {{4.00, 1.75}, /* CPE */ {50, 43}, /* C(10) */ - {57,31} /* 常系数多项式计算 */ + {57,31} /* 锟斤拷系锟斤拷锟斤拷锟斤拷式锟斤拷锟斤拷 */ }; int coeff_const[4]; @@ -82,7 +83,7 @@ static void init_const_poly(void) coeff_const[i] = rand_div+10; } - printf("你需要修改poly.c的const_poly_eval函数,实现下面的常数多项式计算!\n"); + printf("锟斤拷锟斤拷要锟睫革拷poly.c锟斤拷const_poly_eval锟斤拷锟斤拷锟斤拷实锟斤拷锟斤拷锟斤拷某锟斤拷锟斤拷锟斤拷锟绞斤拷锟斤拷悖n"); printf("\tresult=%d+%d*x+%d*x^2+%d*x^3\n",coeff_const[0],coeff_const[1],coeff_const[2],coeff_const[3]); fixval_const = ref_poly_eval(coeff_const, 3, xval); @@ -97,15 +98,15 @@ void test_const_poly(void) int my_cal = const_poly_eval(coeff_const, 3, xval); if (fixval_const != my_cal) { - printf("常系数多项式计算const_poly_eval实现错误(x=%d),预期结果是%d,但是计算得到的是%d\n",xval,fixval_const,my_cal); + printf("锟斤拷系锟斤拷锟斤拷锟斤拷式锟斤拷锟斤拷const_poly_eval实锟街达拷锟斤拷x=%d锟斤拷锟斤拷预锟节斤拷锟斤拷锟%d锟斤拷锟斤拷锟角硷拷锟斤拷玫锟斤拷锟斤拷锟%d\n",xval,fixval_const,my_cal); exit(0); } fix_time = 0; for (i=0;i*FELI6 z-!CU|8O9Y(X$~hOsl!xMNiz;-NVOEiUy@Wh)Tnf*MC^l;)upI+!tx8h4H{SAMm?1Jc3w|2hd9 zY+LI_rb^pd6-D+DB)*I6Dl8V>6{PVs5eOTg4%i5`?}&OCxy;$BIjT@()lo7*DoSjEihtack;OYpzZb z2*0DYC<}!A&;|eSwge)l34{;%WMYw0!UbIBAEN-g^LzZA!`?x)2G6qNfn*L)(tX4a zBh=YC$SN1$sI=q-@;_OM@fjLYC>1H` zEHu-`j*~)z&lz=IsWR~leeNh}+HitE-qmI47v~5h_^3cwB7n1}__dZm$d6G$Vr!&= zajl@a836IGs~}C3OEhdIiOv-?T8Ite0q76DHK-r|-d=hrw|oAGIvV#43JvGaqJLG# zM0^9>D&i1Z2z4T|;hkM<6##2IcDv>lEarsLO^K#{s!w7L5`J4*d2K;Tj# z+KB#LV9VF>!{ZeNEiIAa|E^)>VUbd6n3j&AaR8J5;jz-A==lF_=E4ai(;D_iKn9J# zpr{PA{kU_-@wNoztEWfKAY8Yz?HKY5sahKoMQ%c?|Kk~SxctQLqqr%uCPv2Qbbh|b zGF%ql7q52i7ahHR0PUad+`)T~<0?;~B;O{{VbNA6LaSyEik6*58;fgaw5I=&^!QKO z(9lriP})lEJ~}0eSDq8?s|)a0G4!Psd_@tc#kFqx*KU`T!uWVfn%0)({&^HFJB})4 z2+f3XjPR2`)ufL9TYcK>qR6ddrGM>Kk;d5Qf03Yi@}<4S&Zlki^xu;R(LQ0$C^~~E zPTWNw;Ri7dK>FLA(Dkz6{t;0@k!bMg>-Y_b;WwTpqMa$~N^)L3#ADEY{7AI@+sJS4 zG+^@U8XM{T-xlCB5>=Vr>O{yPgIcH>{K;z)AZxFyQ0nLUSQx|E=@b_2aFCz5@L0}PoN08ctZaL&iUR;mv27Gz( z-^G_qNVEUxm=>h1?3gyAR4sr^_ynnoIj|*&FB9FE1cCfLgtxdsAXfcfg957uQDx^% zaKqAcaBKK_IHwh8J80s@J{2{FFI-qgO+eB`6^_3kDYXCLWc4ws0h;P0x-F$U2_G3( zoaU!3N}3L6W@IFYFGBOnCZ$6+@cgnd>YYH?f6#^Siw6Y z{-UHn?5jXnE)Z5ZHa`*+H)sipUrP$+mP``}D+PKbX?AkLQ9lG$pQEn)Ia?73`2yfZ z%sW6dgEB4>O<`dAB&rA9^`gW7Z5h?bRCxh%Cf~Y?VwJdG-k|{_Y9L>f$l=hW#8#yQ z3*=mwJ2~O;;FRX4E#oA7b@# zaO~#m5t}yRa0<9G_-J>I*HzOQ)pxej zO5=VVgtD0%BHmwl`@mybkEcJ4BhGz1*Szgivs2*knRYE=*_$-V)MmE)Oj6|jR81{mjMW^c1SAbGUAnZPKy z*iQC^iTi-!p&j(K-HmcaN=zABJveArhoOUt`&>IccSwAhg50-Ay}3;%0*g{R56!qG zHFLA%*r)PhZ?m$kjz|`b?L5$T^Lm}x!l3H7_|8MS{44b+UoyCF#q|lY+s3BP+b;Q! z)tUL)vz4^Pwyd3bwegnOqveuwt!}K=u5UblP;7g-f$4!#y=eyPoC@S$1sl|GskbNG z9acFzp2ElRv~uX6v53kPY3O{m(IzZ z`j14ENq7ArKAGV4ViiDbm#)zB#RsiHpIo$? z9_jkeWR;ddug4AV(@9%XQP=O$=uamVXVh%%dR@6+V8*y9y`H+c>=_WA3SEywl8 z9=+wce(n;JRIANyM?9bACYcWIwlix_o#dj>NgLN}_T1RG!?yFcA=x^|!ZH(PGzIkv zS}~(8ZpxEd2~}HFDWAg6(*Bx;Q0p=$U%~A&*x-O@5^I{`TX-kqsf5M#B@szr0@6XTju@xH%*STkgXf1iNdP>8o{PmZcXC+N@u`ipSQLwL3cZzt}p86G1YUlOG z{QIQVeC!(!9fzYEWS>pSe4noW>%@y@iHo5tw90F(<)q82oS(eU8`Zj0ZlL8G&Bd+b z#a;5MWP?(Ein*PdknuvZa>4_R71G?(gg!>sbc=>-&KtHkJO8tbYh$8#oYb5(!jJ;> z=+RuhUb?f`?GNJ@uL={MUpbI-y4}6P(`r&L+0#?c=Uf{xKV$vZC9(?M8TxfryGF15 z=-J(?Yp9m8 zj(uXiM#wLC=6`41yfwd`dS{>P@>6z>)9t+TYfei$PRVl5bnNp~ukg|>hb;$_j|Ogk znd|aT?XbAKwMp|IehmIJ>-MPy6=weFI-l-}Epd4_VweZ2blK3(v( z>XB2l@KDf+lZmryEc9>KI^P;LJ#cK@r_GKxB24<^44!bNThqLy3r~O6=)IuJ;xQjo zPWvyJAsZZ-cWILQ)pt^%bF7=kIUUS!mY!}|Q_;)iv18!X=Nq0~OEY}-BwBlH-q*)R z99Q*U^1(uT?$8qvw+*+I+>g7ox&LDGTKhA4C*RdHjEYM%O#bGua(C&2PSLAI)ST(t zL3`UOPUlUZPpcm*EZ&*p@qE+qv@|PA#m)78PgY4zefCo0@E~{1(aDSUchC$w9uU1_ zgu@NP$T2Uho~CxZEi~-$*2LsS=sx3Hj^nbg1#OKSHeu`_m!&Irf4rz}9Qrl%;;a5% z9;;jj)Q`7aFW%Yuf=$J;{3HH4$&)8PzxZZ^x^$47ch=m^GwwbcbMW)Hr~^uQard7O ztQ^qAUBhj-=CGI>mG{?%xck>Syy>rFOmdSs}XpwXLo0k!0_O{*&*8T(?mk z*5#=1mD#x3UB{fKXBq@wFxN@1-7V`ZJKpD!te)H=iO!mFl7hlU$Hv&%H*a-xIbL<; ztyM(8%{AGoKU3nSb@2=my4b&J9(Zu1&Gq6q8~0`F!XH~q@OfoD?%bAq z^W`tyCVn2?@$=|!)^3*@vvyP!`sfw6???U9W5@vx~_VI$M|JJ`1__+@qvoHvoNxb5Ip(goY2(0q!J2AY@9J&QxOXYS*|TWIw53zd zj&c5#6ZbLv`?3!fj&7L?YE*8loo;E&?atJC+%0-S*Dx!a<9T;Z`aL`9v(R9}s^&_m zrIV7*7yNQB$e$H*>t30sRcxomyN34tCd64j*`(h6W#4nE+n1d%oS(DM;&fguC)94u zaW}ke{oPj>*IfI^>{pLm3o*?xT+xWii_5D293M+$|4hehNu2Uvu8Nz=i1TOqXWemr z71DRpA))rT&KqQ+E*aF?J$auRsQP1r$E=&#JMxQblO+eZ+ovBpJg943^1;w|@$tG} z%DznzuRUQmR=%t**=}*YX3a~v@LAF4`hPliWy`~#TMsR~w#C{q)xkbDbB5YbVV>?j z!>%2ccg}Vi^~$^4d#B#Fx%cnetv?a6;@7lwOJo!^r#-$DHp6q?qi4oJ+r1YmY<5WM zz0c6it@=gR(WA_@y)*2sTnlYiKc3zHrN{871*P4FH#&G_o$aN!H}LcfkL6{1s){m) zUhfzkG4YOsVUx!Dy-UV4*9~(Of9*2d=G3N_-XFhOi@#YkI%d-sn}}i61=6!T;uMrp z=B{3mWw_e#+)?LYwLPWk9M_su@pvv(I2-{)_Oq~v5Pjx~NIFle4=SU09e&YJl7-t(Rt?GJJrSf1bQN>ewz&AYRb1Pn&7kVnDNSO_k*mN+*TupAwg0WQvNlS)y%&=5VuQvDC zW-;@_hXMOz4ApCPCiGf$VN74m)4DNMt9qIDJk|2fwBrEn4-MP9Z$7`YIrL|%_-u=ij^d4O3)EH>zc3y) zyk(7wZ!e40Rq91~#gFgm&)n+uIJB4G$f@H>kueYZ-}$$H#=o=2+M>OGv*G;c+|{F7q{5GU)xTKd z{UJ1_qx+8@tv1JF)|vHgxGdC(jD4l|?0Z#<{qXBuju|wcyJ&f8;jZi>vwCXu?|0pH z#=iciCa?F%cpUPp@u}B(%ZGImAzHIcYx0ZlCVL;cKm1<(q1dw})9>9bZ+iK-@rTrm zXYz|Xt(_*6=@4X5IL!KGWXFQrCkGv>{$A4eSh!=b#e2EPmV+*H`oB;f88x<>|JwfR zYMwv$UsQS7CC=rCweHp2mMW#di7wX547U$X?l`=8WBs9kR?p7OTV+$PuJ&OwmYI!egNBj1E1`hNG`W4ARB$Y6*F6dh(xqXD$^L z6;&1$bK?7FW-eQ{EOA-Z$Vl0~m*p-RtQ>exZNBM|`!g4nrVP|9t-Ux{Y-B>#$X`Y) zI0^ZzJ|^2YCwfV)t@RwS^9EYv-K%Q&rN27c4*y<-ap>q z`{NY(jghC7{WjI@GIg}>yL^M*$?K8zCk3-jjZ#wlm2IncEG?Qm!|M22+xr`S+6*{2 zBSk^ZP%^PNZOcNhy9EkM{N_FUIr!y|YhSmnzH&#)W@h{`Et!=1Lm69^2wdL#^idf1 zQ^9$|qR6Ik&n0%sZ=bQsD>*t_>s`XkVw5ZA~@Ezr9db?(4noDs%JY%eP2e?xmsRRGzp@ykhNCnb7;oGP>DxmY;9^Q&PFC zN4>Mk%W0m4@5eVpzs$SMBl4&D*crVdZLvFS}3YuPjvVaU#Z61`>}ZuXxxD&3-Ljd$Iwk;dWmi$+WJ zNmC0HREDb7mIKySDQOv9mj@T;nV4w=mJe@STewFh)a9Do>d(cm z6MPp)#%O#pGx;)5>rHKDl4-z=A1>vQ-{+i25Xc0&)Nf3v)?WQsF1+)FB`M>B`d&;q ze^Go?;*Y(_LWfy?g5>ml4@QM3>Afmh^!|QF|699?1qN1@X1PQ1>b7(>yuIRu=8i6R zoX_1dGCZuhF(Ybaf0up1er3-GPM1C)-@N?5)7Os9%if>)?3(L2Z=Yjg?k;ZKh+l`3 z7D;t~^?tyCQPV&AHT)3dDu(TrdotHx<=N-%o*m8}=-OlM?lI+y_T7opG7Xtvs^E_*b9G)YCoot}Ph$ZO)S_3-2eo_Mr+!y&t9= zuqX;R{p`g8?fXI7w9O4-FXeiCZJ6BS+~|t!k`F`O3f9aw|8yg1taiqvTGjANs$pY; z4lSF!a%e-;kHVW)8c`XixOEr&1mC^Yqk0c1d+CxEWSJ9NbG6dEe!s#Hoj>#~ zYjWK5WPf*cw_2NR=X=>z1@$@oSpDR;5xrV01o#B+@a(-P|JT!pjr zvj(533CJv;m$b{vtIX7B^5TtSeJ)*k*=tXj;7M^nlSc43*GJ1bAMW%mIP>!LNt;Lc zDDOM+*0(Uu?@vWx#USU;VU3ShcJ8Yoos#jU09=kr~Rw8u5O*` zf4%3|?BjED#{BGgY*|;+`H2L?Yget9OvKE zSX0;k)af2lW|8r^PTh9j(XbBPyhn9R{?P>yX&!GQry1NiYOOV6`1ujPS~^s29<{UH ze&*fV?E4YO9mgA5MC{Bw)^1zwdq}M+ez)VEa*2t-lJ*lM03xV`*%&Lcvn2dxT3q~ zkE3fmP>Nm{?{ci~IS{9g-> zo;jrvo$TFQD7~d@WyZg=E;bg2Xsq_`8r#!p^Pbv-{JU4`F3h^o^ijQZ@^qE_A#zK_ zR3lEJZy0{x-=yKj_WS;pmR2lRVx!>j(Z1gdU(j@FM^Gi`zo#qUm?FJ*K&%z(x;^>_o>PqT^;po zYwvDOO9%Ol4R~;Dal;&61x2YT3Q~1rKHu~TZI+h{x=>PjcE|pju08DS#J0zN&nc?7 zq5Mwyg=wMKUFYde+-Q6IAzHel$GPfh8JiiJ7#eGt7@3$GnHX7e96pV-&(b|!BV6`3aCh;5=OBuSyF#CH%Vj+b~PA*X0$uc%8~wUx&{3<4VqP+a5- z7wij1r$-s#oCndNJK>yrHaOjk z7r_Dk{)OV>+OZSb4)4sU3Oqlpwh-jt!@{5>4iv9K9HDlhKF>lX=KWf73O?bTtuQA~@CL#@=*Mu+ClrPnkFV&bQCOk1Q z=E9>wyl}fB0z>AC+KgYX^a}G25AedRCjw|O?}@hfAFpKcCg2dxDT?y1Y;wW=-VssZ z{Lp{NBqiFq#YY4h-R<{lk4iBm6o1ZWb4TwOm+KWCRxx z86FuL#D($gL>2Oi^!DMrydomKedl`l&YtTP;2nr;p#%I7`0WY%@vdIZvti(nae_~0 zd@73W101IW{G}UlWL(eZXe?Fk}c9i}(r5{0Yaah?v7WT+BgJXGByK1Q*bk%8hv-!(Zb9*$ z6sP?*km9%~2lTteb6ydZV1I`|HM*uX#eEE_{WE7PG(5BU41G0_?mzN z=Gy2($`w5~int@<#Lu=Ujsw=~pbycH#SjHtE`IhSd?JP@V2(9}PsI=gY^NXk5blW~ z3RsV?&4dSFhyu&wcq-z~C{FbB{fO@O9iYeM(*0iDj-6YSo*r*68BWS=qBw0|0{R7) zi^l`GF3C{+<$yS`qek&@6xU$vkn5OnJ9-C3KLgpqVJyS*@x=*+M26RkFn5CCdm9j&hFF6#)YdWOdy_6li zhC%o#hLieNx5IBx_H8LU^nFK<;;$$>+7$mu@c|T(n;oc6;_iW^aO?g3~0&>02{?k{}rAocGKoYiYldc0;v^g5KD9?$gs z)RfX2x1%TPN%%0nRuTK;e3e5?5q(JDf8u}=n0H1W1F(zZIAD&~rt#Rq0ngQOAY7E< zIC4M5Tp4{X!-0OV$DCg6#bW~p%yC)TaDBvc2^w zd(9brR~Vmm3@?WIljlcbry10D3Y{Ae_wCGZ{TuZ{N;v z{+HdzE?)1$f!GOzcAa84xi?>AIGHzGXZTP^KVo>Ms5tH&!#@N6#_+{ZA8GI(spm$} zsW3bm(!Ck33u!}!i-Udgyh-ej06z;D{ccE)XE?bZM>3quBbP8d70TVu@L`ZX&2S%R zFFC(SJuiXoDx>!Wf7UX5H}M;^kJ#x9={^j11icl*zW^V}a5v!I3||iEVAx|Bxd!5@ zCNugxNM})c94*3P6ml881GMun!!LlJs~9d1{58X+MdrCLz;Qjv*vE%j8T|rC6MI;X zpVvqr&#lDIjc|b6f3Y5$$Omo(^&=zpe25y7~n27)XgCq<{I&N63cZn3x)z6@7D$8gZ4IG&F!fp(cv9NW>xViYVH-b;kJp}qByog)=kua z<9;M#1ZNq<=*e};isHDvWW8n>!^t~8b`;0ulHct(Q5@TGhjBQb;rRPu96T6K*6Y0) zo&x%4isN#tcq<&20-Urr0(fRS`~YPK*YhRVKg{swz>hO_62SiTcK8Fzjxy4^fc`DT zaktlpdFx3FTH&9NX^-_o-_P z?*jZ5!^v~dJ%;Op{yD|za^F*&?-$TFF?=lW?+l*U=L1(~cn0v^3||d= z0K<0xhd-m#R_=D-mW=)gaBGH>dAtL|uYz8{@JqlaFkBYqovsWg^X%!si9apCpJ5co zw;J;P-U5o_`v|#yrcxa1$$Nd9DUS7b!2YgwcoA^C!{b2*>R-z6zQC_C+#Glf!%czT zWw=Qz8w5kM{yh_??FBRj>j9G=i~1%aCpmbee6IK#Ndk%q9^MnqCb-K zdp$uC%I(SM$vRj+hLiOXV}_IWfrc_X2>dT#IC+0%I>X8OVF1HPKh9$~x!$HToILOE zVE7xT&u-w@UDyTb*l8%7W%T6yxt=>wzCBJGg(z z_h&7X9=eLRqy!5fq<=|0yHgz3(;4cc&hXK|`v50?z~4dQFo@#VWkndGU`yG-^)v^6 zj-m9po@D(hkkaG&ghIK&3=agph~aC1$1!{r@HB>>0iMBdvOcnn;bc7`hvDzQPCmoC zT+CGv8C3zizsva*!=PPx40nNY3mHzHQ+laj-#ZW z`zVgx;bdK=1UT^<`Q61sh7*5&qBt%x8+~w)hA;Jr9r8Vw z8pW~x5f-DMO>sQb$oRCPIDMTLP#o(Ep?^In4lcmvxmgs)`U{|6%5d`gj!nSHc%A`% z+s1IR-n56|i$VVn!()LLFr2LWoMiY~&=)hj2zVL8PXlkDIPRk1*a;|f#5WrhaJy)( zN^v}{9)q1841Wl`FTw#^n;OPw|nU#XBpCWBmy@M&3gp zcF1~l0HY^7g5tD&@;*GaL-TA(kIU@??KwtqJQRx12M6*zgk_kQVu->$Mn99M97n$I zC3?aoVSEwp2YOYCRXQyfPnwL}@lQ=Hb%ZHLEE97l^pW;ph} z1>%Q2l%BSKf#KQU=elm%Ohtkm9&1 z=^`07j^Tj0Fou)o&9&|D!glx4r4gE&&DyFth;70oZP>5GMrq8Pcgg#R~UsWqFr14 zA<2>7%U~JppFSYK^FuskvxIa%hWkKz0K-EeZOHItkS5nmVt*s>VT^tkr0p1f7}BE| zPToIpVt5IpCosGk(qtW-)Q9+GI-`FC>6r|F32A?ZH$Zw0!^!-T_=%MJ19Y*lC*uv0 z+%Y)M*#6Q2fjHXZ4++H2gd4#D;vd4(f&0Op@FGZ)$eaaw+fMNXp9WDoMCvAUU0$*n^hhc+aqEN|{OE?T4lH}K;uuV39HRPAn zYLoI2CdvOM2HQ0GQ^t|}5llTu{YgD+P`~H0VbwvkI7=SM_R3P~Y!uQ?3 z<A3LW@c{bOc|a_B+^Tz~9RlAqK+53#?MABImX6rdYL3A82o3D}7u ze=9$Gwn!lWAueADeMo*%ej#FiD?bm)$L*s7$xqk`6!}~E^-#VRZ(Q^@wfG&E(9ZY) zv;2Qh^tb$KFmC@5nQ7aSVo3fLbcD~}rVvFgP~)CHMDn%mQG~>4-9yMvGaP5N`0oed z>jKkE?h)iC^9W*x)SsmFpnNvJvbIQ(&J@Vz$G@xlTmPM$E6RTx3c|yo2l|lwr2nva zwrrdwTL*8^x)WUg)KT0Fec1f?GAn^TY?uuX2t66&Omq0(4E`;DdXUH>{<~6iV3OSb E09iaY&Hw-a literal 0 HcmV?d00001