193 lines
7.4 KiB
C++
193 lines
7.4 KiB
C++
|
|
//@HEADER
|
|
// ***************************************************
|
|
//
|
|
// HPCG: High Performance Conjugate Gradient Benchmark
|
|
//
|
|
// Contact:
|
|
// Michael A. Heroux ( maherou@sandia.gov)
|
|
// Jack Dongarra (dongarra@eecs.utk.edu)
|
|
// Piotr Luszczek (luszczek@eecs.utk.edu)
|
|
//
|
|
// ***************************************************
|
|
//@HEADER
|
|
|
|
/*
|
|
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*!
|
|
@file CheckProblem.cpp
|
|
|
|
HPCG routine
|
|
*/
|
|
|
|
#ifndef HPCG_NO_MPI
|
|
#include <mpi.h>
|
|
#endif
|
|
|
|
#ifndef HPCG_NO_OPENMP
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
#if defined(HPCG_DEBUG) || defined(HPCG_DETAILED_DEBUG)
|
|
#include <fstream>
|
|
using std::endl;
|
|
#include "hpcg.hpp"
|
|
#endif
|
|
#include <cassert>
|
|
|
|
#include "CheckProblem.hpp"
|
|
|
|
/*!
|
|
Check the contents of the generated sparse matrix to see if values match expected contents.
|
|
|
|
@param[in] A The known system matrix
|
|
@param[inout] b The newly allocated and generated right hand side vector (if b!=0 on entry)
|
|
@param[inout] x The newly allocated solution vector with entries set to 0.0 (if x!=0 on entry)
|
|
@param[inout] xexact The newly allocated solution vector with entries set to the exact solution (if the xexact!=0
|
|
non-zero on entry)
|
|
|
|
@see GenerateGeometry
|
|
*/
|
|
|
|
void CheckProblem(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact)
|
|
{
|
|
|
|
// Make local copies of geometry information. Use global_int_t since the RHS products in the calculations
|
|
// below may result in global range values.
|
|
global_int_t nx = A.geom->nx;
|
|
global_int_t ny = A.geom->ny;
|
|
global_int_t nz = A.geom->nz;
|
|
global_int_t gnx = A.geom->gnx;
|
|
global_int_t gny = A.geom->gny;
|
|
global_int_t gnz = A.geom->gnz;
|
|
global_int_t gix0 = A.geom->gix0;
|
|
global_int_t giy0 = A.geom->giy0;
|
|
global_int_t giz0 = A.geom->giz0;
|
|
|
|
local_int_t localNumberOfRows = nx * ny * nz; // This is the size of our subblock
|
|
global_int_t totalNumberOfRows = gnx * gny * gnz; // Total number of grid points in mesh
|
|
|
|
double* bv = 0;
|
|
double* xv = 0;
|
|
double* xexactv = 0;
|
|
if (b != 0)
|
|
bv = b->values; // Only compute exact solution if requested
|
|
if (x != 0)
|
|
xv = x->values; // Only compute exact solution if requested
|
|
if (xexact != 0)
|
|
xexactv = xexact->values; // Only compute exact solution if requested
|
|
|
|
local_int_t localNumberOfNonzeros = 0;
|
|
// TODO: This triply nested loop could be flattened or use nested parallelism
|
|
#ifndef HPCG_NO_OPENMP
|
|
#pragma omp parallel for
|
|
#endif
|
|
for (local_int_t iz = 0; iz < nz; iz++)
|
|
{
|
|
global_int_t giz = giz0 + iz;
|
|
for (local_int_t iy = 0; iy < ny; iy++)
|
|
{
|
|
global_int_t giy = giy0 + iy;
|
|
for (local_int_t ix = 0; ix < nx; ix++)
|
|
{
|
|
global_int_t gix = gix0 + ix;
|
|
local_int_t currentLocalRow = iz * nx * ny + iy * nx + ix;
|
|
global_int_t currentGlobalRow = giz * gnx * gny + giy * gnx + gix;
|
|
assert(A.localToGlobalMap[currentLocalRow] == currentGlobalRow);
|
|
#ifdef HPCG_DETAILED_DEBUG
|
|
HPCG_fout << " rank, globalRow, localRow = " << A.geom->rank << " " << currentGlobalRow << " "
|
|
<< A.globalToLocalMap.find(currentGlobalRow)->second << endl;
|
|
#endif
|
|
char numberOfNonzerosInRow = 0;
|
|
double* currentValuePointer
|
|
= A.matrixValues[currentLocalRow]; // Pointer to current value in current row
|
|
global_int_t* currentIndexPointerG
|
|
= A.mtxIndG[currentLocalRow]; // Pointer to current index in current row
|
|
for (int sz = -1; sz <= 1; sz++)
|
|
{
|
|
if (giz + sz > -1 && giz + sz < gnz)
|
|
{
|
|
for (int sy = -1; sy <= 1; sy++)
|
|
{
|
|
if (giy + sy > -1 && giy + sy < gny)
|
|
{
|
|
for (int sx = -1; sx <= 1; sx++)
|
|
{
|
|
if (gix + sx > -1 && gix + sx < gnx)
|
|
{
|
|
global_int_t curcol = currentGlobalRow + sz * gnx * gny + sy * gnx + sx;
|
|
if (curcol == currentGlobalRow)
|
|
{
|
|
assert(A.matrixDiagonal[currentLocalRow] == currentValuePointer);
|
|
assert(*currentValuePointer++ == 26.0);
|
|
}
|
|
else
|
|
{
|
|
assert(*currentValuePointer++ == -1.0);
|
|
}
|
|
assert(*currentIndexPointerG++ == curcol);
|
|
numberOfNonzerosInRow++;
|
|
} // end x bounds test
|
|
} // end sx loop
|
|
} // end y bounds test
|
|
} // end sy loop
|
|
} // end z bounds test
|
|
} // end sz loop
|
|
assert(A.nonzerosInRow[currentLocalRow] == numberOfNonzerosInRow);
|
|
#ifndef HPCG_NO_OPENMP
|
|
#pragma omp critical
|
|
#endif
|
|
localNumberOfNonzeros += numberOfNonzerosInRow; // Protect this with an atomic
|
|
if (b != 0)
|
|
assert(bv[currentLocalRow] == 26.0 - ((double) (numberOfNonzerosInRow - 1)));
|
|
if (x != 0)
|
|
assert(xv[currentLocalRow] == 0.0);
|
|
if (xexact != 0)
|
|
assert(xexactv[currentLocalRow] == 1.0);
|
|
} // end ix loop
|
|
} // end iy loop
|
|
} // end iz loop
|
|
#ifdef HPCG_DETAILED_DEBUG
|
|
HPCG_fout << "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfRows << " rows."
|
|
<< endl
|
|
<< "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfNonzeros
|
|
<< " nonzeros." << endl;
|
|
#endif
|
|
|
|
global_int_t totalNumberOfNonzeros = 0;
|
|
#ifndef HPCG_NO_MPI
|
|
// Use MPI's reduce function to sum all nonzeros
|
|
#ifdef HPCG_NO_LONG_LONG
|
|
MPI_Allreduce(&localNumberOfNonzeros, &totalNumberOfNonzeros, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
|
#else
|
|
long long lnnz = localNumberOfNonzeros, gnnz = 0; // convert to 64 bit for MPI call
|
|
MPI_Allreduce(&lnnz, &gnnz, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
|
|
totalNumberOfNonzeros = gnnz; // Copy back
|
|
#endif
|
|
#else
|
|
totalNumberOfNonzeros = localNumberOfNonzeros;
|
|
#endif
|
|
|
|
assert(A.totalNumberOfRows == totalNumberOfRows);
|
|
assert(A.totalNumberOfNonzeros == totalNumberOfNonzeros);
|
|
assert(A.localNumberOfRows == localNumberOfRows);
|
|
assert(A.localNumberOfNonzeros == localNumberOfNonzeros);
|
|
|
|
return;
|
|
}
|