Files
tssc-hpcg/src/ComputeResidual.cpp
2026-01-18 20:37:50 +08:00

96 lines
2.3 KiB
C++

//@HEADER
// ***************************************************
//
// HPCG: High Performance Conjugate Gradient Benchmark
//
// Contact:
// Michael A. Heroux ( maherou@sandia.gov)
// Jack Dongarra (dongarra@eecs.utk.edu)
// Piotr Luszczek (luszczek@eecs.utk.edu)
//
// ***************************************************
//@HEADER
/*!
@file ComputeResidual.cpp
HPCG routine
*/
#ifndef HPCG_NO_MPI
#include <mpi.h>
#endif
#ifndef HPCG_NO_OPENMP
#include <omp.h>
#endif
#include "Vector.hpp"
#ifdef HPCG_DETAILED_DEBUG
#include "hpcg.hpp"
#include <fstream>
#endif
#include "ComputeResidual.hpp"
#include <cmath> // needed for fabs
#ifdef HPCG_DETAILED_DEBUG
#include <iostream>
#endif
/*!
Routine to compute the inf-norm difference between two vectors where:
@param[in] n number of vector elements (local to this processor)
@param[in] v1, v2 input vectors
@param[out] residual pointer to scalar value; on exit, will contain result: inf-norm difference
@return Returns zero on success and a non-zero value otherwise.
*/
int ComputeResidual(const local_int_t n, const Vector& v1, const Vector& v2, double& residual)
{
double* v1v = v1.values;
double* v2v = v2.values;
double local_residual = 0.0;
#ifndef HPCG_NO_OPENMP
#pragma omp parallel shared(local_residual, v1v, v2v)
{
double threadlocal_residual = 0.0;
#pragma omp for
for (local_int_t i = 0; i < n; i++)
{
double diff = std::fabs(v1v[i] - v2v[i]);
if (diff > threadlocal_residual)
threadlocal_residual = diff;
}
#pragma omp critical
{
if (threadlocal_residual > local_residual)
local_residual = threadlocal_residual;
}
}
#else // No threading
for (local_int_t i = 0; i < n; i++)
{
double diff = std::fabs(v1v[i] - v2v[i]);
if (diff > local_residual)
local_residual = diff;
#ifdef HPCG_DETAILED_DEBUG
HPCG_fout << " Computed, exact, diff = " << v1v[i] << " " << v2v[i] << " " << diff << std::endl;
#endif
}
#endif
#ifndef HPCG_NO_MPI
// Use MPI's reduce function to collect all partial sums
double global_residual = 0;
MPI_Allreduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
residual = global_residual;
#else
residual = local_residual;
#endif
return 0;
}