243 lines
8.6 KiB
C++
243 lines
8.6 KiB
C++
|
|
//@HEADER
|
|
// ***************************************************
|
|
//
|
|
// HPCG: High Performance Conjugate Gradient Benchmark
|
|
//
|
|
// Contact:
|
|
// Michael A. Heroux ( maherou@sandia.gov)
|
|
// Jack Dongarra (dongarra@eecs.utk.edu)
|
|
// Piotr Luszczek (luszczek@eecs.utk.edu)
|
|
//
|
|
// ***************************************************
|
|
//@HEADER
|
|
|
|
/*
|
|
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*!
|
|
@file TestCG.cpp
|
|
|
|
HPCG routine
|
|
*/
|
|
|
|
// Changelog
|
|
//
|
|
// Version 0.4
|
|
// - Added timing of setup time for sparse MV
|
|
// - Corrected percentages reported for sparse MV with overhead
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <fstream>
|
|
#include <iostream>
|
|
using std::endl;
|
|
#include "hpcg.hpp"
|
|
#include <vector>
|
|
|
|
#include "CG.hpp"
|
|
#include "CG_ref.hpp"
|
|
#include "TestCG.hpp"
|
|
#include "CpuKernels.hpp"
|
|
#include "CudaKernels.hpp"
|
|
|
|
extern int use_output_file;
|
|
|
|
/*!
|
|
Test the correctness of the Preconditined CG implementation by using a system matrix with a dominant diagonal.
|
|
|
|
@param[in] geom The description of the problem's geometry.
|
|
@param[in] A The known system matrix
|
|
@param[in] data the data structure with all necessary CG vectors preallocated
|
|
@param[in] b The known right hand side vector
|
|
@param[inout] x On entry: the initial guess; on exit: the new approximate solution
|
|
@param[out] testcg_data the data structure with the results of the test including pass/fail information
|
|
|
|
@return Returns zero on success and a non-zero value otherwise.
|
|
|
|
@see CG()
|
|
*/
|
|
|
|
int TestCG(SparseMatrix& A, CGData& data, Vector& b, Vector& x, TestCGData& testcg_data)
|
|
{
|
|
// Use this array for collecting timing information
|
|
std::vector<double> times(8, 0.0);
|
|
// Temporary storage for holding original diagonal and RHS
|
|
Vector origDiagA, exaggeratedDiagA, origB;
|
|
InitializeVector(origDiagA, A.localNumberOfRows, A.rankType);
|
|
InitializeVector(exaggeratedDiagA, A.localNumberOfRows, A.rankType);
|
|
InitializeVector(origB, A.localNumberOfRows, A.rankType);
|
|
CopyMatrixDiagonal(A, origDiagA);
|
|
if (A.rankType == GPU)
|
|
{
|
|
#ifdef USE_CUDA
|
|
CopyMatrixDiagonalCuda(A, origDiagA);
|
|
#endif
|
|
}
|
|
CopyVector(origDiagA, exaggeratedDiagA);
|
|
CopyVector(b, origB);
|
|
|
|
// Modify the matrix diagonal to greatly exaggerate diagonal values.
|
|
// CG should converge in about 10 iterations for this problem, regardless of problem size
|
|
for (local_int_t i = 0; i < A.localNumberOfRows; ++i)
|
|
{
|
|
global_int_t globalRowID = A.localToGlobalMap[i];
|
|
if (globalRowID < 9)
|
|
{
|
|
double scale = (globalRowID + 2) * 1.0e6;
|
|
ScaleVectorValue(exaggeratedDiagA, i, scale);
|
|
ScaleVectorValue(b, i, scale);
|
|
}
|
|
else
|
|
{
|
|
ScaleVectorValue(exaggeratedDiagA, i, 1.0e6);
|
|
ScaleVectorValue(b, i, 1.0e6);
|
|
}
|
|
}
|
|
|
|
// Reference Matrix
|
|
ReplaceMatrixDiagonal(A, exaggeratedDiagA);
|
|
|
|
if (A.rankType == GPU)
|
|
{
|
|
#ifdef USE_CUDA
|
|
CopyVectorH2D(exaggeratedDiagA);
|
|
PermVectorCuda(A.opt2ref, b, A.localNumberOfRows);
|
|
PermVectorCuda(A.opt2ref, exaggeratedDiagA, A.localNumberOfRows);
|
|
ReplaceMatrixDiagonalCuda(A, exaggeratedDiagA);
|
|
cusparseSpSV_updateMatrix(
|
|
cusparsehandle, A.cusparseOpt.spsvDescrL, exaggeratedDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL);
|
|
cusparseSpSV_updateMatrix(
|
|
cusparsehandle, A.cusparseOpt.spsvDescrU, exaggeratedDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_GRACE
|
|
PermVectorCpu(A.opt2ref, b, A.localNumberOfRows);
|
|
PermVectorCpu(A.opt2ref, exaggeratedDiagA, A.localNumberOfRows);
|
|
ReplaceMatrixDiagonalCpu(A, exaggeratedDiagA);
|
|
nvpl_sparse_spsv_update_matrix(
|
|
nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrL, exaggeratedDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL);
|
|
nvpl_sparse_spsv_update_matrix(
|
|
nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrU, exaggeratedDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL);
|
|
#endif
|
|
}
|
|
|
|
////////////////////////////////
|
|
|
|
int niters = 0;
|
|
double normr = 0.0;
|
|
double normr0 = 0.0;
|
|
int maxIters = 50;
|
|
int numberOfCgCalls = 2;
|
|
double tolerance = 1.0e-12; // Set tolerance to reasonable value for grossly scaled diagonal terms
|
|
testcg_data.expected_niters_no_prec
|
|
= 12; // For the unpreconditioned CG call, we should take about 10 iterations, permit 12
|
|
testcg_data.expected_niters_prec = 2; // For the preconditioned case, we should take about 1 iteration, permit 2
|
|
testcg_data.niters_max_no_prec = 0;
|
|
testcg_data.niters_max_prec = 0;
|
|
for (int k = 0; k < 2; ++k)
|
|
{ // This loop tests both unpreconditioned and preconditioned runs
|
|
int expected_niters = testcg_data.expected_niters_no_prec;
|
|
if (k == 1)
|
|
expected_niters = testcg_data.expected_niters_prec;
|
|
for (int i = 0; i < numberOfCgCalls; ++i)
|
|
{
|
|
ZeroVector(x); // Zero out x
|
|
int ierr = CG(A, data, b, x, maxIters, tolerance, niters, normr, normr0, ×[0], k == 1, 0);
|
|
if (ierr)
|
|
if (use_output_file)
|
|
{
|
|
HPCG_fout << "Error in call to CG: " << ierr << ".\n" << endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Error in call to CG: " << ierr << ".\n" << endl;
|
|
}
|
|
if (niters <= expected_niters)
|
|
{
|
|
++testcg_data.count_pass;
|
|
}
|
|
else
|
|
{
|
|
++testcg_data.count_fail;
|
|
}
|
|
if (k == 0 && niters > testcg_data.niters_max_no_prec)
|
|
testcg_data.niters_max_no_prec = niters; // Keep track of largest iter count
|
|
if (k == 1 && niters > testcg_data.niters_max_prec)
|
|
testcg_data.niters_max_prec = niters; // Same for preconditioned run
|
|
if (A.geom->rank == 0)
|
|
{
|
|
if (use_output_file)
|
|
{
|
|
HPCG_fout << "Call [" << i << "] Number of Iterations [" << niters << "] Scaled Residual ["
|
|
<< normr / normr0 << "]" << endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Call [" << i << "] Number of Iterations [" << niters << "] Scaled Residual ["
|
|
<< normr / normr0 << "]" << endl;
|
|
}
|
|
if (niters > expected_niters)
|
|
if (use_output_file)
|
|
{
|
|
HPCG_fout << " Expected " << expected_niters << " iterations. Performed " << niters << "."
|
|
<< endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << " Expected " << expected_niters << " iterations. Performed " << niters << "."
|
|
<< endl;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Restore matrix diagonal and RHS
|
|
ReplaceMatrixDiagonal(A, origDiagA);
|
|
|
|
if (A.rankType == GPU)
|
|
{
|
|
#ifdef USE_CUDA
|
|
ReplaceMatrixDiagonalCuda(A, origDiagA);
|
|
cusparseSpSV_updateMatrix(
|
|
cusparsehandle, A.cusparseOpt.spsvDescrL, origDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL);
|
|
cusparseSpSV_updateMatrix(
|
|
cusparsehandle, A.cusparseOpt.spsvDescrU, origDiagA.values_d, CUSPARSE_SPSV_UPDATE_DIAGONAL);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_GRACE
|
|
ReplaceMatrixDiagonalCpu(A, origDiagA);
|
|
nvpl_sparse_spsv_update_matrix(
|
|
nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrL, origDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL);
|
|
nvpl_sparse_spsv_update_matrix(
|
|
nvpl_sparse_handle, A.nvplSparseOpt.spsvDescrU, origDiagA.values, NVPL_SPARSE_SPSV_UPDATE_DIAGONAL);
|
|
#endif
|
|
}
|
|
|
|
CopyVector(origB, b);
|
|
// Delete vectors
|
|
DeleteVector(origDiagA);
|
|
DeleteVector(exaggeratedDiagA);
|
|
DeleteVector(origB);
|
|
testcg_data.normr = normr;
|
|
|
|
return 0;
|
|
} |