//@HEADER // *************************************************** // // HPCG: High Performance Conjugate Gradient Benchmark // // Contact: // Michael A. Heroux ( maherou@sandia.gov) // Jack Dongarra (dongarra@eecs.utk.edu) // Piotr Luszczek (luszczek@eecs.utk.edu) // // *************************************************** //@HEADER /*! @file ComputeDotProduct_ref.cpp HPCG routine */ #ifndef HPCG_NO_MPI #include "mytimer.hpp" #include #endif #ifndef HPCG_NO_OPENMP #include #endif #include "ComputeDotProduct_ref.hpp" #include /*! Routine to compute the dot product of two vectors where: This is the reference dot-product implementation. It _CANNOT_ be modified for the purposes of this benchmark. @param[in] n the number of vector elements (on this processor) @param[in] x, y the input vectors @param[in] result a pointer to scalar value, on exit will contain result. @param[out] time_allreduce the time it took to perform the communication between processes @return returns 0 upon success and non-zero otherwise @see ComputeDotProduct */ int ComputeDotProduct_ref(const local_int_t n, const Vector& x, const Vector& y, double& result, double& time_allreduce) { assert(x.localLength >= n); // Test vector lengths assert(y.localLength >= n); double local_result = 0.0; double* xv = x.values; double* yv = y.values; if (yv == xv) { #ifndef HPCG_NO_OPENMP #pragma omp parallel for reduction(+ : local_result) #endif for (local_int_t i = 0; i < n; i++) local_result += xv[i] * xv[i]; } else { #ifndef HPCG_NO_OPENMP #pragma omp parallel for reduction(+ : local_result) #endif for (local_int_t i = 0; i < n; i++) local_result += xv[i] * yv[i]; } #ifndef HPCG_NO_MPI // Use MPI's reduce function to collect all partial sums double t0 = mytimer(); double global_result = 0.0; MPI_Allreduce(&local_result, &global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); result = global_result; time_allreduce += mytimer() - t0; #else time_allreduce += 0.0; result = local_result; #endif return 0; }