//@HEADER // *************************************************** // // HPCG: High Performance Conjugate Gradient Benchmark // // Contact: // Michael A. Heroux ( maherou@sandia.gov) // Jack Dongarra (dongarra@eecs.utk.edu) // Piotr Luszczek (luszczek@eecs.utk.edu) // // *************************************************** //@HEADER /* * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! @file TestCG.cpp HPCG routine */ // Changelog // // Version 0.4 // - Added timing of setup time for sparse MV // - Corrected percentages reported for sparse MV with overhead // ///////////////////////////////////////////////////////////////////////// #include #include using std::endl; #include "hpcg.hpp" #include #include "CG.hpp" #include "CG_ref.hpp" #include "TestCG.hpp" #include "CpuKernels.hpp" #include "CudaKernels.hpp" extern int use_output_file; /*! Test the correctness of the Preconditined CG implementation by using a system matrix with a dominant diagonal. @param[in] geom The description of the problem's geometry. @param[in] A The known system matrix @param[in] data the data structure with all necessary CG vectors preallocated @param[in] b The known right hand side vector @param[inout] x On entry: the initial guess; on exit: the new approximate solution @param[out] testcg_data the data structure with the results of the test including pass/fail information @return Returns zero on success and a non-zero value otherwise. @see CG() */ int TestCG(SparseMatrix& A, CGData& data, Vector& b, Vector& x, TestCGData& testcg_data) { // Use this array for collecting timing information std::vector times(8, 0.0); // Temporary storage for holding original diagonal and RHS Vector origDiagA, exaggeratedDiagA, origB; InitializeVector(origDiagA, A.localNumberOfRows, A.rankType); InitializeVector(exaggeratedDiagA, A.localNumberOfRows, A.rankType); InitializeVector(origB, A.localNumberOfRows, A.rankType); CopyMatrixDiagonal(A, origDiagA); if (A.rankType == GPU) { #ifdef USE_CUDA CopyMatrixDiagonalCuda(A, origDiagA); #endif } CopyVector(origDiagA, exaggeratedDiagA); CopyVector(b, origB); // Modify the matrix diagonal to greatly exaggerate diagonal values. // CG should converge in about 10 iterations for this problem, regardless of problem size for (local_int_t i = 0; i < A.localNumberOfRows; ++i) { global_int_t globalRowID = A.localToGlobalMap[i]; if (globalRowID < 9) { double scale = (globalRowID + 2) * 1.0e6; ScaleVectorValue(exaggeratedDiagA, i, scale); ScaleVectorValue(b, i, scale); } else { ScaleVectorValue(exaggeratedDiagA, i, 1.0e6); ScaleVectorValue(b, i, 1.0e6); } } // Reference Matrix ReplaceMatrixDiagonal(A, exaggeratedDiagA); if (A.rankType == GPU) { #ifdef USE_CUDA CopyVectorH2D(exaggeratedDiagA); PermVectorCuda(A.opt2ref, b, A.localNumberOfRows); PermVectorCuda(A.opt2ref, exaggeratedDiagA, A.localNumberOfRows); ReplaceMatrixDiagonalCuda(A, exaggeratedDiagA); cusparseSpSV_updateMatrix( cusparsehandle, A.cusparseOpt.spsvDescrL, exaggeratedDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL); cusparseSpSV_updateMatrix( cusparsehandle, A.cusparseOpt.spsvDescrU, exaggeratedDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL); #endif } else { #ifdef USE_GRACE PermVectorCpu(A.opt2ref, b, A.localNumberOfRows); PermVectorCpu(A.opt2ref, exaggeratedDiagA, A.localNumberOfRows); ReplaceMatrixDiagonalCpu(A, exaggeratedDiagA); nvpl_sparse_spsv_update_matrix( nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrL, exaggeratedDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL); nvpl_sparse_spsv_update_matrix( nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrU, exaggeratedDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL); #endif } //////////////////////////////// int niters = 0; double normr = 0.0; double normr0 = 0.0; int maxIters = 50; int numberOfCgCalls = 2; double tolerance = 1.0e-12; // Set tolerance to reasonable value for grossly scaled diagonal terms testcg_data.expected_niters_no_prec = 12; // For the unpreconditioned CG call, we should take about 10 iterations, permit 12 testcg_data.expected_niters_prec = 2; // For the preconditioned case, we should take about 1 iteration, permit 2 testcg_data.niters_max_no_prec = 0; testcg_data.niters_max_prec = 0; for (int k = 0; k < 2; ++k) { // This loop tests both unpreconditioned and preconditioned runs int expected_niters = testcg_data.expected_niters_no_prec; if (k == 1) expected_niters = testcg_data.expected_niters_prec; for (int i = 0; i < numberOfCgCalls; ++i) { ZeroVector(x); // Zero out x int ierr = CG(A, data, b, x, maxIters, tolerance, niters, normr, normr0, ×[0], k == 1, 0); if (ierr) if (use_output_file) { HPCG_fout << "Error in call to CG: " << ierr << ".\n" << endl; } else { std::cout << "Error in call to CG: " << ierr << ".\n" << endl; } if (niters <= expected_niters) { ++testcg_data.count_pass; } else { ++testcg_data.count_fail; } if (k == 0 && niters > testcg_data.niters_max_no_prec) testcg_data.niters_max_no_prec = niters; // Keep track of largest iter count if (k == 1 && niters > testcg_data.niters_max_prec) testcg_data.niters_max_prec = niters; // Same for preconditioned run if (A.geom->rank == 0) { if (use_output_file) { HPCG_fout << "Call [" << i << "] Number of Iterations [" << niters << "] Scaled Residual [" << normr / normr0 << "]" << endl; } else { std::cout << "Call [" << i << "] Number of Iterations [" << niters << "] Scaled Residual [" << normr / normr0 << "]" << endl; } if (niters > expected_niters) if (use_output_file) { HPCG_fout << " Expected " << expected_niters << " iterations. Performed " << niters << "." << endl; } else { std::cout << " Expected " << expected_niters << " iterations. Performed " << niters << "." << endl; } } } } // Restore matrix diagonal and RHS ReplaceMatrixDiagonal(A, origDiagA); if (A.rankType == GPU) { #ifdef USE_CUDA ReplaceMatrixDiagonalCuda(A, origDiagA); cusparseSpSV_updateMatrix( cusparsehandle, A.cusparseOpt.spsvDescrL, origDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL); cusparseSpSV_updateMatrix( cusparsehandle, A.cusparseOpt.spsvDescrU, origDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL); #endif } else { #ifdef USE_GRACE ReplaceMatrixDiagonalCpu(A, origDiagA); nvpl_sparse_spsv_update_matrix( nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrL, origDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL); nvpl_sparse_spsv_update_matrix( nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrU, origDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL); #endif } CopyVector(origB, b); // Delete vectors DeleteVector(origDiagA); DeleteVector(exaggeratedDiagA); DeleteVector(origB); testcg_data.normr = normr; return 0; }