// // Computes the triangular factorization of a matrix: A = LU // The matrix A is overwritten by L and U // // Warning: Return values of calls are not checked for error to keep // the code simple. // // Compilation command on EOS: // module load intel/compilers // icc -o matrix_serial.exe matrix_serial.c -lpthread -lc // // Sample execution and output ($ sign is the shell prompt): // // $ matrix_serial.exe 1024 1 // Threads = 1, matrix_size = 1024, LU time (sec) = 1.8609 // Threads = 1, matrix_size = 1024, Solve L time (sec) = 0.0070 // Threads = 1, matrix_size = 1024, Solve U time (sec) = 0.0069 // Threads = 1, matrix_size = 1024, err norm = 6.728074e-13 // #include #include #include #include #include #define MAX_THREADS 1024 #define MAX_N 16384 // Dense matrix structure typedef struct _matrix { double ** val; int n; } MATRIX_t; // Vector structure typedef struct _vector { double * val; int n; } VECTOR_t; // Variable Declarations MATRIX_t * A; // Matrix MATRIX_t * L; // Lower triangular matrix MATRIX_t * U; // Upper triangular matrix VECTOR_t * xs; // Exact solution for A*x = b VECTOR_t * b; // Right hand side (b = A*xs) VECTOR_t * x; // Computed by solving A*x = b VECTOR_t * y; // int num_threads; // ----------------------------------------------------------------------------- // Vector Routines // // Create and initialize vector VECTOR_t * init_vector ( int n, double init_value ) { int i; VECTOR_t * x = calloc(1, sizeof(VECTOR_t)); x->n = n; x->val = calloc(x->n, sizeof(double)); for (i = 0; i < x->n; i++) { x->val[i] = init_value; } return x; } // ----------------------------------------------------------------------------- // Matrix Routines // // Create and initialize matrix with zero value entries MATRIX_t * init_zero_matrix ( int n ) { int i, j; MATRIX_t * A = calloc(1, sizeof(MATRIX_t)); A->n = n; // Allocate storage for matrix elements A->val = calloc(A->n, sizeof(double *)); for (i = 0; i < A->n; i++) { A->val[i] = calloc(A->n, sizeof(double)); } // Initialize matrix elements for (i = 0; i < A->n; i++) { for (j = 0; j < A->n; j++) { A->val[i][j] = 0.0; } } return A; } // Create copy of matrix MATRIX_t * init_copy_matrix ( MATRIX_t * B ) { int i, j; MATRIX_t * A = calloc(1, sizeof(MATRIX_t)); A->n = B->n; // Allocate storage for matrix elements A->val = calloc(A->n, sizeof(double *)); for (i = 0; i < A->n; i++) { A->val[i] = calloc(A->n, sizeof(double)); } // Initialize matrix elements for (i = 0; i < A->n; i++) { for (j = 0; j < A->n; j++) { A->val[i][j] = B->val[i][j]; } } return A; } // Create and initialize matrix: A[i][j] = 1.0/(1.0+abs(i-j)); MATRIX_t * init_matrix ( int n ) { int i, j; MATRIX_t * A = calloc(1, sizeof(MATRIX_t)); A->n = n; // Allocate storage for matrix elements A->val = calloc(A->n, sizeof(double *)); for (i = 0; i < A->n; i++) { A->val[i] = calloc(A->n, sizeof(double)); } // Initialize matrix elements for (i = 0; i < A->n; i++) { for (j = 0; j < A->n; j++) { A->val[i][j] = 1.0/(1.0+fabs(i-j)); } } return A; } // Matrix vector product void matrix_vector_product ( MATRIX_t * A, VECTOR_t * x, VECTOR_t * y ) { int i, j; for (i = 0; i < A->n; i++) { y->val[i] = 0.0; for (j = 0; j < A->n; j++) { y->val[i] += A->val[i][j] * x->val[j]; } } } // Serial code to compute LU factorization A = L*U (no pivoting) // Assume U is initialized with A void compute_LU_factors ( MATRIX_t * L, MATRIX_t * U) { int i, j, k; double pivot; for (k = 0; k < U->n; k++) { pivot = U->val[k][k]; L->val[k][k] = 1.0; for (i = k+1; i < U->n; i++) { L->val[i][k] = U->val[i][k]/pivot; U->val[i][k] = 0.0; } for (i = k+1; i < U->n; i++) for (j = k+1; j < U->n; j++) U->val[i][j] -= L->val[i][k]*U->val[k][j]; } } // Serial code to solve lower triangular system void solve_lower_triangular_system ( MATRIX_t * L, VECTOR_t * b, VECTOR_t * x ) { int i, k; for (i = 0; i < L->n; i++) x->val[i] = b->val[i]; for (k = 0; k < L->n; k++) { x->val[k] = x->val[k]/L->val[k][k]; for (i = k+1; i < L->n; i++) { x->val[i] -= L->val[i][k]*x->val[k]; } } } // Serial code to solve upper triangular system void solve_upper_triangular_system ( MATRIX_t * U, VECTOR_t * b, VECTOR_t * x ) { int i, k; for (i = 0; i < U->n; i++) x->val[i] = b->val[i]; for (k = U->n-1; k >= 0; k--) { x->val[k] = x->val[k]/U->val[k][k]; for (i = k-1; i >= 0; i--) { x->val[i] -= U->val[i][k]*x->val[k]; } } } // Main program // - Initialize a dense matrix A // - Compute triangular factors L and U such that A = L*U // - Compute the solution of the system Ax = b as shown below: // Solve Ly = b for y // Solve Ux = y for x // - Check solution // int main(int argc, char *argv[]) { struct timeval start, stop; double total_time; int i, j, matrix_size; float err, sum; if (argc != 3) { printf("Need two integers as input \n"); printf("Use: \n"); exit(0); } if ((matrix_size = atoi(argv[argc-2])) > MAX_N) { printf("Maximum matrix size allowed: %d.\n", MAX_N); exit(0); }; if ((num_threads = atoi(argv[argc-1])) > MAX_THREADS) { printf("Maximum number of threads allowed: %d.\n", MAX_THREADS); exit(0); }; if (num_threads > matrix_size) { printf("Number of threads (%d) > matrix_size (%d) not allowed.\n", num_threads, matrix_size); exit(0); }; // Initialize matrix A A = init_matrix(matrix_size); // Initialize solution xs = 1.0 xs = init_vector(matrix_size, 1.0); // Initialize right hand side vector b = A * xs b = init_vector(matrix_size, 0.0); matrix_vector_product(A, xs, b); // Compute LU factorization of A s.t. A = L * U L = init_zero_matrix(A->n); U = init_copy_matrix(A); gettimeofday(&start, NULL); compute_LU_factors(L, U); gettimeofday(&stop, NULL); total_time = (stop.tv_sec-start.tv_sec) +0.000001*(stop.tv_usec-start.tv_usec); printf("Threads = %d, matrix_size = %d, LU time (sec) = %8.4f\n", num_threads, matrix_size, total_time); // Solve Ly = b for y y = init_vector(matrix_size, 0.0); gettimeofday(&start, NULL); solve_lower_triangular_system(L, b, y); gettimeofday(&stop, NULL); total_time = (stop.tv_sec-start.tv_sec) +0.000001*(stop.tv_usec-start.tv_usec); printf("Threads = %d, matrix_size = %d, Solve L time (sec) = %8.4f\n", num_threads, matrix_size, total_time); // Solve Ux = y for x x = init_vector(matrix_size, 0.0); gettimeofday(&start, NULL); solve_upper_triangular_system(U, y, x); gettimeofday(&stop, NULL); total_time = (stop.tv_sec-start.tv_sec) +0.000001*(stop.tv_usec-start.tv_usec); printf("Threads = %d, matrix_size = %d, Solve U time (sec) = %8.4f\n", num_threads, matrix_size, total_time); // Compute error norm: norm of (xs - x) err = 0.0; for (j = 0; j < x->n; j++) { err += (xs->val[j]-x->val[j])*(xs->val[j]-x->val[j]); } err = sqrtf(err); printf("Threads = %d, matrix_size = %d, err norm = %e\n", num_threads, matrix_size, err); }