//@HEADER // *************************************************** // // HPCG: High Performance Conjugate Gradient Benchmark // // Contact: // Michael A. Heroux ( maherou@sandia.gov) // Jack Dongarra (dongarra@eecs.utk.edu) // Piotr Luszczek (luszczek@eecs.utk.edu) // // *************************************************** //@HEADER /* * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! @file CheckProblem.cpp HPCG routine */ #ifndef HPCG_NO_MPI #include #endif #ifndef HPCG_NO_OPENMP #include #endif #if defined(HPCG_DEBUG) || defined(HPCG_DETAILED_DEBUG) #include using std::endl; #include "hpcg.hpp" #endif #include #include "CheckProblem.hpp" /*! Check the contents of the generated sparse matrix to see if values match expected contents. @param[in] A The known system matrix @param[inout] b The newly allocated and generated right hand side vector (if b!=0 on entry) @param[inout] x The newly allocated solution vector with entries set to 0.0 (if x!=0 on entry) @param[inout] xexact The newly allocated solution vector with entries set to the exact solution (if the xexact!=0 non-zero on entry) @see GenerateGeometry */ void CheckProblem(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact) { // Make local copies of geometry information. Use global_int_t since the RHS products in the calculations // below may result in global range values. global_int_t nx = A.geom->nx; global_int_t ny = A.geom->ny; global_int_t nz = A.geom->nz; global_int_t gnx = A.geom->gnx; global_int_t gny = A.geom->gny; global_int_t gnz = A.geom->gnz; global_int_t gix0 = A.geom->gix0; global_int_t giy0 = A.geom->giy0; global_int_t giz0 = A.geom->giz0; local_int_t localNumberOfRows = nx * ny * nz; // This is the size of our subblock global_int_t totalNumberOfRows = gnx * gny * gnz; // Total number of grid points in mesh double* bv = 0; double* xv = 0; double* xexactv = 0; if (b != 0) bv = b->values; // Only compute exact solution if requested if (x != 0) xv = x->values; // Only compute exact solution if requested if (xexact != 0) xexactv = xexact->values; // Only compute exact solution if requested local_int_t localNumberOfNonzeros = 0; // TODO: This triply nested loop could be flattened or use nested parallelism #ifndef HPCG_NO_OPENMP #pragma omp parallel for #endif for (local_int_t iz = 0; iz < nz; iz++) { global_int_t giz = giz0 + iz; for (local_int_t iy = 0; iy < ny; iy++) { global_int_t giy = giy0 + iy; for (local_int_t ix = 0; ix < nx; ix++) { global_int_t gix = gix0 + ix; local_int_t currentLocalRow = iz * nx * ny + iy * nx + ix; global_int_t currentGlobalRow = giz * gnx * gny + giy * gnx + gix; assert(A.localToGlobalMap[currentLocalRow] == currentGlobalRow); #ifdef HPCG_DETAILED_DEBUG HPCG_fout << " rank, globalRow, localRow = " << A.geom->rank << " " << currentGlobalRow << " " << A.globalToLocalMap.find(currentGlobalRow)->second << endl; #endif char numberOfNonzerosInRow = 0; double* currentValuePointer = A.matrixValues[currentLocalRow]; // Pointer to current value in current row global_int_t* currentIndexPointerG = A.mtxIndG[currentLocalRow]; // Pointer to current index in current row for (int sz = -1; sz <= 1; sz++) { if (giz + sz > -1 && giz + sz < gnz) { for (int sy = -1; sy <= 1; sy++) { if (giy + sy > -1 && giy + sy < gny) { for (int sx = -1; sx <= 1; sx++) { if (gix + sx > -1 && gix + sx < gnx) { global_int_t curcol = currentGlobalRow + sz * gnx * gny + sy * gnx + sx; if (curcol == currentGlobalRow) { assert(A.matrixDiagonal[currentLocalRow] == currentValuePointer); assert(*currentValuePointer++ == 26.0); } else { assert(*currentValuePointer++ == -1.0); } assert(*currentIndexPointerG++ == curcol); numberOfNonzerosInRow++; } // end x bounds test } // end sx loop } // end y bounds test } // end sy loop } // end z bounds test } // end sz loop assert(A.nonzerosInRow[currentLocalRow] == numberOfNonzerosInRow); #ifndef HPCG_NO_OPENMP #pragma omp critical #endif localNumberOfNonzeros += numberOfNonzerosInRow; // Protect this with an atomic if (b != 0) assert(bv[currentLocalRow] == 26.0 - ((double) (numberOfNonzerosInRow - 1))); if (x != 0) assert(xv[currentLocalRow] == 0.0); if (xexact != 0) assert(xexactv[currentLocalRow] == 1.0); } // end ix loop } // end iy loop } // end iz loop #ifdef HPCG_DETAILED_DEBUG HPCG_fout << "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfRows << " rows." << endl << "Process " << A.geom->rank << " of " << A.geom->size << " has " << localNumberOfNonzeros << " nonzeros." << endl; #endif global_int_t totalNumberOfNonzeros = 0; #ifndef HPCG_NO_MPI // Use MPI's reduce function to sum all nonzeros #ifdef HPCG_NO_LONG_LONG MPI_Allreduce(&localNumberOfNonzeros, &totalNumberOfNonzeros, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); #else long long lnnz = localNumberOfNonzeros, gnnz = 0; // convert to 64 bit for MPI call MPI_Allreduce(&lnnz, &gnnz, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); totalNumberOfNonzeros = gnnz; // Copy back #endif #else totalNumberOfNonzeros = localNumberOfNonzeros; #endif assert(A.totalNumberOfRows == totalNumberOfRows); assert(A.totalNumberOfNonzeros == totalNumberOfNonzeros); assert(A.localNumberOfRows == localNumberOfRows); assert(A.localNumberOfNonzeros == localNumberOfNonzeros); return; }