tssc-hpcg/src/CheckProblem.cpp


//@HEADER
// ***************************************************
//
// HPCG: High Performance Conjugate Gradient Benchmark
//
// Contact:
// Michael A. Heroux ( maherou@sandia.gov)
// Jack Dongarra     (dongarra@eecs.utk.edu)
// Piotr Luszczek    (luszczek@eecs.utk.edu)
//
// ***************************************************
//@HEADER

/*
 * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*!
 @file CheckProblem.cpp

 HPCG routine
 */

#ifndef HPCG_NO_MPI
#include <mpi.h>
#endif

#ifndef HPCG_NO_OPENMP
#include <omp.h>
#endif

#if defined(HPCG_DEBUG) || defined(HPCG_DETAILED_DEBUG)
#include <fstream>
using std::endl;
#include "hpcg.hpp"
#endif
#include <cassert>

#include "CheckProblem.hpp"

/*!
  Check the contents of the generated sparse matrix to see if values match expected contents.

  @param[in]  A      The known system matrix
  @param[inout] b      The newly allocated and generated right hand side vector (if b!=0 on entry)
  @param[inout] x      The newly allocated solution vector with entries set to 0.0 (if x!=0 on entry)
  @param[inout] xexact The newly allocated solution vector with entries set to the exact solution (if the xexact!=0
  non-zero on entry)

  @see GenerateGeometry
*/

void CheckProblem(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact)
{

    // Make local copies of geometry information.  Use global_int_t since the RHS products in the calculations
    // below may result in global range values.
    global_int_t nx = A.geom->nx;
    global_int_t ny = A.geom->ny;
    global_int_t nz = A.geom->nz;
    global_int_t gnx = A.geom->gnx;
    global_int_t gny = A.geom->gny;
    global_int_t gnz = A.geom->gnz;
    global_int_t gix0 = A.geom->gix0;
    global_int_t giy0 = A.geom->giy0;
    global_int_t giz0 = A.geom->giz0;

    local_int_t localNumberOfRows = nx * ny * nz;     // This is the size of our subblock
    global_int_t totalNumberOfRows = gnx * gny * gnz; // Total number of grid points in mesh

    double* bv = 0;
    double* xv = 0;
    double* xexactv = 0;
    if (b != 0)
        bv = b->values; // Only compute exact solution if requested
    if (x != 0)
        xv = x->values; // Only compute exact solution if requested
    if (xexact != 0)
        xexactv = xexact->values; // Only compute exact solution if requested

    local_int_t localNumberOfNonzeros = 0;
    // TODO:  This triply nested loop could be flattened or use nested parallelism
#ifndef HPCG_NO_OPENMP
#pragma omp parallel for
#endif
    for (local_int_t iz = 0; iz < nz; iz++)
    {
        global_int_t giz = giz0 + iz;
        for (local_int_t iy = 0; iy < ny; iy++)
        {
            global_int_t giy = giy0 + iy;
            for (local_int_t ix = 0; ix < nx; ix++)
            {
                global_int_t gix = gix0 + ix;
                local_int_t currentLocalRow = iz * nx * ny + iy * nx + ix;
                global_int_t currentGlobalRow = giz * gnx * gny + giy * gnx + gix;
                assert(A.localToGlobalMap[currentLocalRow] == currentGlobalRow);
#ifdef HPCG_DETAILED_DEBUG
                HPCG_fout << " rank, globalRow, localRow = " << A.geom->rank << " " << currentGlobalRow << " "
                          << A.globalToLocalMap.find(currentGlobalRow)->second << endl;
#endif
                char numberOfNonzerosInRow = 0;
                double* currentValuePointer
                    = A.matrixValues[currentLocalRow]; // Pointer to current value in current row
                global_int_t* currentIndexPointerG
                    = A.mtxIndG[currentLocalRow]; // Pointer to current index in current row
                for (int sz = -1; sz <= 1; sz++)
                {
                    if (giz + sz > -1 && giz + sz < gnz)
                    {
                        for (int sy = -1; sy <= 1; sy++)
                        {
                            if (giy + sy > -1 && giy + sy < gny)
                            {
                                for (int sx = -1; sx <= 1; sx++)
                                {
                                    if (gix + sx > -1 && gix + sx < gnx)
                                    {
                                        global_int_t curcol = currentGlobalRow + sz * gnx * gny + sy * gnx + sx;
                                        if (curcol == currentGlobalRow)
                                        {
                                            assert(A.matrixDiagonal[currentLocalRow] == currentValuePointer);
                                            assert(*currentValuePointer++ == 26.0);
                                        }
                                        else
                                        {
                                            assert(*currentValuePointer++ == -1.0);
                                        }
                                        assert(*currentIndexPointerG++ == curcol);
                                        numberOfNonzerosInRow++;
                                    } // end x bounds test
                                } // end sx loop
                            } // end y bounds test
                        } // end sy loop
                    } // end z bounds test
                } // end sz loop
                assert(A.nonzerosInRow[currentLocalRow] == numberOfNonzerosInRow);
#ifndef HPCG_NO_OPENMP
#pragma omp critical
#endif
                localNumberOfNonzeros += numberOfNonzerosInRow; // Protect this with an atomic
                if (b != 0)
                    assert(bv[currentLocalRow] == 26.0 - ((double) (numberOfNonzerosInRow - 1)));
                if (x != 0)
                    assert(xv[currentLocalRow] == 0.0);
                if (xexact != 0)
                    assert(xexactv[currentLocalRow] == 1.0);
            } // end ix loop
        } // end iy loop
    } // end iz loop
#ifdef HPCG_DETAILED_DEBUG
    HPCG_fout << "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfRows << " rows."
              << endl
              << "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfNonzeros
              << " nonzeros." << endl;
#endif

    global_int_t totalNumberOfNonzeros = 0;
#ifndef HPCG_NO_MPI
    // Use MPI's reduce function to sum all nonzeros
#ifdef HPCG_NO_LONG_LONG
    MPI_Allreduce(&localNumberOfNonzeros, &totalNumberOfNonzeros, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
    long long lnnz = localNumberOfNonzeros, gnnz = 0; // convert to 64 bit for MPI call
    MPI_Allreduce(&lnnz, &gnnz, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
    totalNumberOfNonzeros = gnnz; // Copy back
#endif
#else
    totalNumberOfNonzeros = localNumberOfNonzeros;
#endif

    assert(A.totalNumberOfRows == totalNumberOfRows);
    assert(A.totalNumberOfNonzeros == totalNumberOfNonzeros);
    assert(A.localNumberOfRows == localNumberOfRows);
    assert(A.localNumberOfNonzeros == localNumberOfNonzeros);

    return;
}