From: W. Trevor King Date: Thu, 16 Sep 2010 01:45:19 +0000 (-0400) Subject: Cleaned up src/average/. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c0f211a729467529ae6b4396407464b8f49cd71b;p=parallel_computing.git Cleaned up src/average/. --- diff --git a/content/point_to_point/.html_toc b/content/point_to_point/.html_toc deleted file mode 100644 index ab7c2e3..0000000 --- a/content/point_to_point/.html_toc +++ /dev/null @@ -1 +0,0 @@ -average_example diff --git a/content/point_to_point/average_example/index.shtml b/content/point_to_point/average_example/index.shtml deleted file mode 100644 index b95393c..0000000 --- a/content/point_to_point/average_example/index.shtml +++ /dev/null @@ -1,48 +0,0 @@ - - -

Average Example

- -

Computing the average and standard deviation of a set of numbers is -a simple task. Yet doing it in parallel provides a good introduction -to message passing programming using MPI.

- -

The data set data is some noisy data set. -The file contains "x" and a "y" coordinates of points that you can -display with plot_data. We seek the average and standard deviation of -the "y" column.

- -

The code average.c solves this problem -using a serial algorithm. Note the use of malloc -and free to reserve and release memory space. This allows -writing a flexible and general code.

- -

A parallel implementation of the solution could use an algorithm in -which the data is divided among all the processes. This illustrates -one of the advantage of using a parallel architecture in that the -memory requirement per node is much less than in a serial -implementation of the code. Or that the agregate memory of the -parallel computer is tipically much larger than the memory of a single -computer, allowing larger problems to be solved.

- -

The algorithm of a parallel implementation is illustrated in a -simplified flowchart. Note the -following:

- - -

A parallel code should be written by the student. It requires a -careful buildup and testing of the statements.

- - diff --git a/content/point_to_point/index.shtml b/content/point_to_point/index.shtml index 4112f2e..018244a 100644 --- a/content/point_to_point/index.shtml +++ b/content/point_to_point/index.shtml @@ -309,9 +309,9 @@ The following message passing examples are bundled in process.
hop_again_again.c
Each node sends a message to left and right neighbors in a ring - fashion again-and-again
+ fashion again-and-again.
ring.c
-
Non-blocking communication example
+
Non-blocking communication example.

Communication time

@@ -322,4 +322,53 @@ The following message passing examples are bundled in time. +

Average Example

+ +

Computing the average and standard deviation of a set of numbers is +a simple task. Yet doing it in parallel provides a good introduction +to message passing programming using MPI.

+ +The following averaging example is bundled in +average.tar.gz. + +

Consider a noisy data set data generated +by generate_random.c. +The file contains x and a y coordinates of points that +you can display with gnuplot. We seek the average and +standard deviation of the y column.

+ +

The code average.c solves this +problem using a serial algorithm. Note the use of malloc +and free to reserve and release memory space. This allows +for flexible and general code.

+ +

A parallel implementation of the solution could use an algorithm in +which the data is divided among all the processes. This illustrates +one of the advantage of using a parallel architecture in that the +memory requirement per node is much less than in a serial +implementation of the code. Or that the agregate memory of the +parallel computer is tipically much larger than the memory of a single +computer, allowing larger problems to be solved.

+ +

The algorithm of a parallel implementation is illustrated in a +simplified flowchart. Note the +following:

+ + +

The reader is encouraged to write their own parallel +implementation. Be careful and think things through!

+ diff --git a/src/MPI2_message_passing/README b/src/MPI2_message_passing/README index a723280..34db2b5 100644 --- a/src/MPI2_message_passing/README +++ b/src/MPI2_message_passing/README @@ -50,35 +50,6 @@ Just run Usage ----- -MPI configuration -~~~~~~~~~~~~~~~~~ - -You should only have to do this once for each host, but since this is -your first MPI program, you probably haven't done it yet. From - - $ man mpd - -we have: - - "A file named .mpd.conf file must be present in the user's home - directory with read and write access only for the user, and must - contain at least a line with MPD_SECRETWORD=" - -so create the file with a random secret word: - - $ touch ~/.mpd.conf - $ chmod 600 ~/.mpd.conf - $ echo "MPD_SECRETWORD=$(cat /dev/urandom | tr -d -c A-Z-a-z-0-9 | head -c 60)" ~/.mpd.conf - -Running MPI programs -~~~~~~~~~~~~~~~~~~~~ - -Create a list of possible hosts `mpd.hosts`, for example - - xphy1.physics.xterm.net - xphy2.physics.xterm.net - ... - Start the Message Passing Daemons with $ mpdboot -f mpd.hosts diff --git a/src/average/Makefile b/src/average/Makefile new file mode 100644 index 0000000..8e41ced --- /dev/null +++ b/src/average/Makefile @@ -0,0 +1,51 @@ +# General Environment + +RM = /bin/rm +DATA_SIZE = 131071 # the 6th Mersenne prime +DATA = data + +# Non-MPI Environment + +CC = /usr/bin/gcc +CFLAGS = +LD = $(CC) +LDFLAGS = -lm +EXECS = average generate_random + +# MPI Environment + +MPI_CC = /usr/bin/mpicc +MPI_CFLAGS = +MPI_LD = $(MPI_CC) +MPI_LDFLAGS = -lm +MPI_EXECS = average_parallel + +# Top level targets + +all: $(EXECS) $(MPI_EXECS) $(DATA) + +clean: + $(RM) -f *.o $(EXECS) $(DATA) + +# Non-MPI rules + +data : generate_random + ./$< $(DATA_SIZE) > $@ + +$(EXECS:%=%.o) : %.o : %.c + $(CC) -c $(CFLAGS) -o $@ $^ + +$(EXECS) : % : %.o + $(LD) $(LDFLAGS) -o $@ $^ + +# MPI rules + +$(MPI_EXECS:%=%.o) : %.o : %.c + $(MPI_CC) -c $(CFLAGS) -o $@ $^ + +$(MPI_EXECS) : % : %.o + $(MPI_LD) $(LDFLAGS) -o $@ $^ + +# Interesting Makefile sections +# 4.12.1 Syntax of Static Pattern Rules +# 6.3.1 Substitution References diff --git a/src/average/README b/src/average/README new file mode 100644 index 0000000..22d61cc --- /dev/null +++ b/src/average/README @@ -0,0 +1,40 @@ +average +======= + +Compute the average and standard deviation of a set of numbers using +both serial and parallel algorithms. + +Manifest +-------- + +================== ============================================== +README This file. +Makefile Automate building and cleanup. +generate_random.c Generate random data for the 'data' file. +read_file.c/.h Shared code defining read_data(). +average.c Serial average / std. dev. calculation. +average_parallel.c Serial average / std. dev. calculation. +flowchart.pdf Diagram of control flow in average_parallel.c. +================== ============================================== + +Build +----- + +Just run + + $ make + +which also builds a random data file 'data'. + +Usage +----- + +Serial code + + $ ./average data + +Parallel code + + $ mpdboot -f mpd.hosts + $ mpiexec -n 4 ./average_parallel data + $ mpdallexit diff --git a/src/average/average.c b/src/average/average.c index d1d5811..5140889 100644 --- a/src/average/average.c +++ b/src/average/average.c @@ -1,53 +1,93 @@ -// -// average and standard deviation of a set of numbers -// -// Michel Vallieres +/* Serial average and standard deviation of a set of numbers + * + * usage: average data_file + * + * Arguments: + * data_file path to a data file as output by 'generate_random' + */ -#include -#include -#include +/* Michel Vallieres */ -int main( int *argc, char *argv[] ) +#include +#include +#include + + +int read_data(const char *file_name, int *pN, double **pData); + + +int main(int argc, char *argv[]) { + int N, i; + double *data, average, diff, std_dev; + char *file_name = "data"; + + // parse arguments + if (argc > 1) + file_name = argv[1]; + + // setup + if (read_data(file_name, &N, &data) != EXIT_SUCCESS) + return EXIT_FAILURE; + + printf("computing\n"); + + // average + average = 0.0; + for (i=0; i < N; i++) + average += data[i]; + printf("sum: %f\n", average); + average /= N; + printf("average: %f\n", average); + + // standard deviation + std_dev = 0.0; + for (i=0; i < N; i++) + { + diff = data[i] - average; + std_dev += diff * diff; + } + std_dev = sqrt(std_dev / (N-1)); + printf("standard deviation: %f\n", std_dev); + + // cleanup and exit + free(data); + return EXIT_SUCCESS; +} + +/* Read in data from a file, discarding the x data. */ +int read_data(const char *file_name, int *pN, double **pData) { FILE *fp; - int N, k; - double *x, average, std_dev, dumb, sum; + int i; + double x, y; + + // open the file + if ((fp = fopen(file_name, "r")) == NULL) + { + fprintf(stderr, "error in opening data file %s\n", file_name); + return EXIT_FAILURE; + } - // title - printf( "\n Average code \n\n" ); + // read the size of the data file + fscanf(fp, "# %d", pN); - // open file - if ( ( fp = fopen( "data", "r" ) ) == NULL ) + // allocate memory for the data + *pData = (double *)malloc(sizeof(double)* *pN); + if (*pData == NULL) { - printf( "error in opening data file\n" ); - exit(1); + fprintf(stderr, "could not allocate %d bytes\n", sizeof(double)* *pN); + return EXIT_FAILURE; } - // value of N - fscanf( fp, " %d ", &N ); - printf( " Number pf points: %d \n", N ); - // make space in memory for x - x = (double *)malloc( sizeof(double)*N ); - // read in x - for ( k=0 ; k -#include -#include +/* Michel Vallieres + * Spring 2002 */ -int main( int argc, char *argv[] ) +#include +#include +#include +#include + +#define TAG_NUMBER 100 +#define TAG_DATA 101 +#define TAG_SUM 102 +#define TAG_AVERAGE 103 +#define TAG_STD_DEV 104 + + +int master(int argc, char *argv[], int rank, int size); +int slave(int rank); + +int main(int argc, char *argv[]) { - FILE *fp; - int N, k; - double *x, average, std_dev, dumb, sum, local_sum; - int process, my_rank, size; - int N_local; - MPI_Status recv_status; + int rank, size; MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); - // NODE 0 - - if ( my_rank == 0 ) + if (size == 1) { - // title - printf( "\n Average code \n\n" ); - // open file - if ( ( fp = fopen( "data", "r" ) ) == NULL ) - { - printf( "error in opening data file\n" ); - exit(1); - } - // value of N - fscanf( fp, " %d ", &N ); - printf( " Number pf points: %d \n", N ); - // size of data chunks on each node - N_local = N / size; - // make space in memory for x (node zero) - x = (double *)malloc( sizeof(double)*(N_local+size) ); - - for ( process=1 ; process 1) + file_name = argv[1]; + + // setup + + // open the file + if ((fp = fopen(file_name, "r")) == NULL) { - // Number of numbers to analize locally - MPI_Recv( &N_local, 1, MPI_INT, 0, 121, - MPI_COMM_WORLD, &recv_status ); - - printf( " Node %d - N %d \n", my_rank, N_local ); - - // make space in memory for x (node zero) - x = (double *)malloc( sizeof(double)*(N_local+size) ); - // local x array - MPI_Recv( x, N_local, MPI_DOUBLE, 0, 122, - MPI_COMM_WORLD, &recv_status); - // average (local sum) - local_sum = 0.0; - for ( k=0 ; k -#include -#include -#include - -// random numbers in { 0 - 1 } -double random_num() -{ - return (double)rand()/(double)RAND_MAX; -} - - -// -// Gaussian distributed random numbers -// -double gaussian ( double *gauss1, double *gauss2 ) -{ - double twou, radius, theta; - - twou = -2.0*log(1.0-random_num()); - radius = sqrt(twou); - theta = 2*M_PI*random_num(); - *gauss1 = radius*cos(theta); - *gauss2 = radius*sin(theta); -} - -int main( int argc, char *argv[] ) -{ - int N, i; - double xmin, xmax, x, y; - double gauss1, gauss2; - - N = atoi( argv[1] ); - printf( " %d \n", N ); - - xmin = 0.2; - xmax = 1.2; - - for ( i=0; i< N ; i++ ) - { - x = xmin + (xmax-xmin)*random_num(); - y = -1.5 + 0.8*x - 0.7*x*x; - gaussian( &gauss1, &gauss2 ); - y = y * ( 1 + 0.30*gauss1*0.7/x ); - printf( " %f %f \n", x, y ); - } -} - diff --git a/src/average/generate_random.c b/src/average/generate_random.c new file mode 100644 index 0000000..a426e5b --- /dev/null +++ b/src/average/generate_random.c @@ -0,0 +1,56 @@ +/* generate random data + * + * usage: generate_random N + * + * Arguments: + * N the number of pairs to generate. + * Output: + * An ASCII file whose first line is '# N', and subsequent lines are + * tab (\t) separated (x,y) pairs. For example: + * # 25729 + * 1.040188 -1.484743 + * 0.998440 -1.608843 + * The random numbers will be uniformly distributed on [XMIN, XMAX] + * with y values given by: y(x) = YA + YB x + YC x^2 + YR*random_num(). + */ + +#include +#include +#include +#include + +/* NOTE: Strictly ISO or POSIX compiant compilers will not define M_PI. + * See http://www.sbin.org/doc/glibc/libc_19.html#SEC389 + * You can define it yourself by uncommenting the following line. */ +//# define M_PI 3.14159265358979323846 /* pi */ + +#define XMIN 0.2 +#define XMAX 1.2 +#define YA -1.5 +#define YB 0.8 +#define YC -0.7 +#define YR 0.2 + +/* Uniform random number from [0,1] */ +double random_num() +{ + return (double)rand() / (double)RAND_MAX; +} + +int main( int argc, char *argv[] ) +{ + int N, i; + double x, y; + + N = atoi(argv[1]); + printf("# %d\n", N); + + for (i=0; i < N ; i++) + { + x = XMIN + (XMAX-XMIN)*random_num(); + y = YA + YB*x + YC*x*x + YR*random_num(); + printf("%f\t%f\n", x, y); + } + + return EXIT_SUCCESS; +} diff --git a/src/average/read_data.c b/src/average/read_data.c new file mode 100644 index 0000000..c3fb2ca --- /dev/null +++ b/src/average/read_data.c @@ -0,0 +1,7 @@ +#include +#include + +#include "read_data.h" + + + diff --git a/src/average/read_data.h b/src/average/read_data.h new file mode 100644 index 0000000..8d0e511 --- /dev/null +++ b/src/average/read_data.h @@ -0,0 +1,5 @@ +#ifndef READ_DATA_H /* protect against multiple inclusion */ +#define READ_DATA_H + + +#endif /* READ_DATA_H */