first commit

2026-01-18 20:37:50 +08:00
commit fff9f18287
123 changed files with 1385491 additions and 0 deletions
--- a/src/ComputeDotProduct_ref.cpp
+++ b/src/ComputeDotProduct_ref.cpp
@@ -0,0 +1,84 @@
+
+//@HEADER
+// ***************************************************
+//
+// HPCG: High Performance Conjugate Gradient Benchmark
+//
+// Contact:
+// Michael A. Heroux ( maherou@sandia.gov)
+// Jack Dongarra     (dongarra@eecs.utk.edu)
+// Piotr Luszczek    (luszczek@eecs.utk.edu)
+//
+// ***************************************************
+//@HEADER
+
+/*!
+ @file ComputeDotProduct_ref.cpp
+
+ HPCG routine
+ */
+
+#ifndef HPCG_NO_MPI
+#include "mytimer.hpp"
+#include <mpi.h>
+#endif
+#ifndef HPCG_NO_OPENMP
+#include <omp.h>
+#endif
+#include "ComputeDotProduct_ref.hpp"
+#include <cassert>
+
+/*!
+  Routine to compute the dot product of two vectors where:
+
+  This is the reference dot-product implementation.  It _CANNOT_ be modified for the
+  purposes of this benchmark.
+
+  @param[in] n the number of vector elements (on this processor)
+  @param[in] x, y the input vectors
+  @param[in] result a pointer to scalar value, on exit will contain result.
+  @param[out] time_allreduce the time it took to perform the communication between processes
+
+  @return returns 0 upon success and non-zero otherwise
+
+  @see ComputeDotProduct
+*/
+int ComputeDotProduct_ref(const local_int_t n, const Vector& x, const Vector& y, double& result, double& time_allreduce)
+{
+    assert(x.localLength >= n); // Test vector lengths
+    assert(y.localLength >= n);
+
+    double local_result = 0.0;
+    double* xv = x.values;
+    double* yv = y.values;
+    if (yv == xv)
+    {
+#ifndef HPCG_NO_OPENMP
+#pragma omp parallel for reduction(+ : local_result)
+#endif
+        for (local_int_t i = 0; i < n; i++)
+            local_result += xv[i] * xv[i];
+    }
+    else
+    {
+#ifndef HPCG_NO_OPENMP
+#pragma omp parallel for reduction(+ : local_result)
+#endif
+        for (local_int_t i = 0; i < n; i++)
+            local_result += xv[i] * yv[i];
+    }
+
+#ifndef HPCG_NO_MPI
+    // Use MPI's reduce function to collect all partial sums
+    double t0 = mytimer();
+    double global_result = 0.0;
+    MPI_Allreduce(&local_result, &global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    result = global_result;
+    time_allreduce += mytimer() - t0;
+#else
+    time_allreduce += 0.0;
+    result = local_result;
+#endif
+
+    return 0;
+}