first commit
This commit is contained in:
512
src/ReportResults.cpp
Normal file
512
src/ReportResults.cpp
Normal file
@@ -0,0 +1,512 @@
|
||||
|
||||
//@HEADER
|
||||
// ***************************************************
|
||||
//
|
||||
// HPCG: High Performance Conjugate Gradient Benchmark
|
||||
//
|
||||
// Contact:
|
||||
// Michael A. Heroux ( maherou@sandia.gov)
|
||||
// Jack Dongarra (dongarra@eecs.utk.edu)
|
||||
// Piotr Luszczek (luszczek@eecs.utk.edu)
|
||||
//
|
||||
// ***************************************************
|
||||
//@HEADER
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*!
|
||||
@file ReportResults.cpp
|
||||
|
||||
HPCG routine
|
||||
*/
|
||||
|
||||
#ifndef HPCG_NO_MPI
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include "OptimizeProblem.hpp"
|
||||
#include "OutputFile.hpp"
|
||||
#include "ReportResults.hpp"
|
||||
#include <vector>
|
||||
|
||||
#ifdef HPCG_DEBUG
|
||||
#include <fstream>
|
||||
using std::endl;
|
||||
|
||||
#include "hpcg.hpp"
|
||||
#endif
|
||||
|
||||
extern int use_output_file;
|
||||
|
||||
/*!
|
||||
Creates a YAML file and writes the information about the HPCG run, its results, and validity.
|
||||
|
||||
@param[in] geom The description of the problem's geometry.
|
||||
@param[in] A The known system matrix
|
||||
@param[in] numberOfMgLevels Number of levels in multigrid V cycle
|
||||
@param[in] numberOfCgSets Number of CG runs performed
|
||||
@param[in] niters Number of preconditioned CG iterations performed to lower the residual below a threshold
|
||||
@param[in] times Vector of cumulative timings for each of the phases of a preconditioned CG iteration
|
||||
@param[in] testcg_data the data structure with the results of the CG-correctness test including pass/fail
|
||||
information
|
||||
@param[in] testsymmetry_data the data structure with the results of the CG symmetry test including pass/fail
|
||||
information
|
||||
@param[in] testnorms_data the data structure with the results of the CG norm test including pass/fail information
|
||||
@param[in] global_failure indicates whether a failure occurred during the correctness tests of CG
|
||||
|
||||
@see YAML_Doc
|
||||
*/
|
||||
|
||||
void ReportResults(const SparseMatrix& A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters,
|
||||
double times[], const TestCGData& testcg_data, const TestSymmetryData& testsymmetry_data,
|
||||
const TestNormsData& testnorms_data, int global_failure, bool quickPath)
|
||||
{
|
||||
|
||||
double minOfficialTime = 1800; // Any official benchmark result must run at least this many seconds
|
||||
|
||||
#ifndef HPCG_NO_MPI
|
||||
double t4 = times[4];
|
||||
double t4min = 0.0;
|
||||
double t4max = 0.0;
|
||||
double t4avg = 0.0;
|
||||
MPI_Allreduce(&t4, &t4min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&t4, &t4max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(&t4, &t4avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
t4avg = t4avg / ((double) A.geom->size);
|
||||
#endif
|
||||
|
||||
if (A.geom->rank == 0)
|
||||
{ // Only PE 0 needs to compute and report timing results
|
||||
|
||||
// TODO: Put the FLOP count, Memory BW and Memory Usage models into separate functions
|
||||
|
||||
// ======================== FLOP count model =======================================
|
||||
|
||||
double fNumberOfCgSets = numberOfCgSets;
|
||||
double fniters = fNumberOfCgSets * (double) optMaxIters;
|
||||
double fnrow = A.totalNumberOfRows;
|
||||
double fnnz = A.totalNumberOfNonzeros;
|
||||
|
||||
// Op counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops)
|
||||
double fnops_ddot = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow; // 3 ddots with nrow adds and nrow mults
|
||||
double fnops_waxpby
|
||||
= (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow; // 3 WAXPBYs with nrow adds and nrow mults
|
||||
double fnops_sparsemv = (fniters + fNumberOfCgSets) * 2.0 * fnnz; // 1 SpMV with nnz adds and nnz mults
|
||||
// Op counts from the multigrid preconditioners
|
||||
double fnops_precond = 0.0;
|
||||
const SparseMatrix* Af = &A;
|
||||
for (int i = 1; i < numberOfMgLevels; ++i)
|
||||
{
|
||||
double fnnz_Af = Af->totalNumberOfNonzeros;
|
||||
double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps;
|
||||
double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps;
|
||||
fnops_precond += fnumberOfPresmootherSteps * fniters * 4.0 * fnnz_Af; // number of presmoother flops
|
||||
fnops_precond += fniters * 2.0 * fnnz_Af; // cost of fine grid residual calculation
|
||||
fnops_precond += fnumberOfPostsmootherSteps * fniters * 4.0 * fnnz_Af; // number of postsmoother flops
|
||||
Af = Af->Ac; // Go to next coarse level
|
||||
}
|
||||
|
||||
fnops_precond
|
||||
+= fniters * 4.0 * ((double) Af->totalNumberOfNonzeros); // One symmetric GS sweep at the coarsest level
|
||||
double fnops = fnops_ddot + fnops_waxpby + fnops_sparsemv + fnops_precond;
|
||||
double frefnops = fnops * ((double) refMaxIters) / ((double) optMaxIters);
|
||||
|
||||
// ======================== Memory bandwidth model =======================================
|
||||
|
||||
// Read/Write counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops)
|
||||
double fnreads_ddot
|
||||
= (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow * sizeof(double); // 3 ddots with 2 nrow reads
|
||||
double fnwrites_ddot = (3.0 * fniters + fNumberOfCgSets) * sizeof(double); // 3 ddots with 1 write
|
||||
double fnreads_waxpby = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow
|
||||
* sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults
|
||||
double fnwrites_waxpby
|
||||
= (3.0 * fniters + fNumberOfCgSets) * fnrow * sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults
|
||||
double fnreads_sparsemv = (fniters + fNumberOfCgSets)
|
||||
* (fnnz * (sizeof(double) + sizeof(local_int_t))
|
||||
+ fnrow * sizeof(double)); // 1 SpMV with nnz reads of values, nnz reads indices,
|
||||
// plus nrow reads of x
|
||||
double fnwrites_sparsemv = (fniters + fNumberOfCgSets) * fnrow * sizeof(double); // 1 SpMV nrow writes
|
||||
// Op counts from the multigrid preconditioners
|
||||
double fnreads_precond = 0.0;
|
||||
double fnwrites_precond = 0.0;
|
||||
Af = &A;
|
||||
for (int i = 1; i < numberOfMgLevels; ++i)
|
||||
{
|
||||
double fnnz_Af = Af->totalNumberOfNonzeros;
|
||||
double fnrow_Af = Af->totalNumberOfRows;
|
||||
double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps;
|
||||
double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps;
|
||||
fnreads_precond += fnumberOfPresmootherSteps * fniters
|
||||
* (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t))
|
||||
+ fnrow_Af * sizeof(double)); // number of presmoother reads
|
||||
fnwrites_precond
|
||||
+= fnumberOfPresmootherSteps * fniters * fnrow_Af * sizeof(double); // number of presmoother writes
|
||||
fnreads_precond += fniters
|
||||
* (fnnz_Af * (sizeof(double) + sizeof(local_int_t))
|
||||
+ fnrow_Af * sizeof(double)); // Number of reads for fine grid residual calculation
|
||||
fnwrites_precond
|
||||
+= fniters * fnnz_Af * sizeof(double); // Number of writes for fine grid residual calculation
|
||||
fnreads_precond += fnumberOfPostsmootherSteps * fniters
|
||||
* (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t))
|
||||
+ fnrow_Af * sizeof(double)); // number of postsmoother reads
|
||||
fnwrites_precond
|
||||
+= fnumberOfPostsmootherSteps * fniters * fnnz_Af * sizeof(double); // number of postsmoother writes
|
||||
Af = Af->Ac; // Go to next coarse level
|
||||
}
|
||||
|
||||
double fnnz_Af = Af->totalNumberOfNonzeros;
|
||||
double fnrow_Af = Af->totalNumberOfRows;
|
||||
fnreads_precond
|
||||
+= fniters * (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t)) + fnrow_Af * sizeof(double));
|
||||
; // One symmetric GS sweep at the coarsest level
|
||||
fnwrites_precond += fniters * fnrow_Af * sizeof(double); // One symmetric GS sweep at the coarsest level
|
||||
double fnreads = fnreads_ddot + fnreads_waxpby + fnreads_sparsemv + fnreads_precond;
|
||||
double fnwrites = fnwrites_ddot + fnwrites_waxpby + fnwrites_sparsemv + fnwrites_precond;
|
||||
double frefnreads = fnreads * ((double) refMaxIters) / ((double) optMaxIters);
|
||||
double frefnwrites = fnwrites * ((double) refMaxIters) / ((double) optMaxIters);
|
||||
|
||||
// ======================== Memory usage model =======================================
|
||||
|
||||
// Data in GenerateProblem_ref
|
||||
|
||||
double numberOfNonzerosPerRow
|
||||
= 27.0; // We are approximating a 27-point finite element/volume/difference 3D stencil
|
||||
double size = ((double) A.geom->size); // Needed for estimating size of halo
|
||||
|
||||
double fnbytes = ((double) sizeof(Geometry)); // Geometry struct in main.cpp
|
||||
fnbytes += ((double) sizeof(double) * fNumberOfCgSets); // testnorms_data in main.cpp
|
||||
|
||||
// Model for GenerateProblem_ref.cpp
|
||||
fnbytes += fnrow * sizeof(char); // array nonzerosInRow
|
||||
fnbytes += fnrow * ((double) sizeof(global_int_t*)); // mtxIndG
|
||||
fnbytes += fnrow * ((double) sizeof(local_int_t*)); // mtxIndL
|
||||
fnbytes += fnrow * ((double) sizeof(double*)); // matrixValues
|
||||
fnbytes += fnrow * ((double) sizeof(double*)); // matrixDiagonal
|
||||
fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(local_int_t)); // mtxIndL[1..nrows]
|
||||
fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(double)); // matrixValues[1..nrows]
|
||||
fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(global_int_t)); // mtxIndG[1..nrows]
|
||||
fnbytes += fnrow * ((double) 3 * sizeof(double)); // x, b, xexact
|
||||
|
||||
// Model for CGData.hpp
|
||||
double fncol = ((global_int_t) A.localNumberOfColumns)
|
||||
* size; // Estimate of the global number of columns using the value from rank 0
|
||||
fnbytes += fnrow * ((double) 2 * sizeof(double)); // r, Ap
|
||||
fnbytes += fncol * ((double) 2 * sizeof(double)); // z, p
|
||||
|
||||
std::vector<double> fnbytesPerLevel(numberOfMgLevels); // Count byte usage per level (level 0 is main CG level)
|
||||
fnbytesPerLevel[0] = fnbytes;
|
||||
|
||||
// Benchmarker-provided model for OptimizeProblem.cpp
|
||||
double fnbytes_OptimizedProblem = OptimizeProblemMemoryUse(A);
|
||||
fnbytes += fnbytes_OptimizedProblem;
|
||||
|
||||
Af = A.Ac;
|
||||
for (int i = 1; i < numberOfMgLevels; ++i)
|
||||
{
|
||||
double fnrow_Af = Af->totalNumberOfRows;
|
||||
double fncol_Af = ((global_int_t) Af->localNumberOfColumns)
|
||||
* size; // Estimate of the global number of columns using the value from rank 0
|
||||
double fnbytes_Af = 0.0;
|
||||
// Model for GenerateCoarseProblem.cpp
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(local_int_t)); // f2cOperator
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(double)); // rc
|
||||
fnbytes_Af += 2.0 * fncol_Af
|
||||
* ((double) sizeof(double)); // xc, Axf are estimated based on the size of these arrays on rank 0
|
||||
fnbytes_Af += ((double) (sizeof(Geometry) + sizeof(SparseMatrix) + 3 * sizeof(Vector)
|
||||
+ sizeof(MGData))); // Account for structs geomc, Ac, rc, xc, Axf - (minor)
|
||||
|
||||
// Model for GenerateProblem.cpp (called within GenerateCoarseProblem.cpp)
|
||||
fnbytes_Af += fnrow_Af * sizeof(char); // array nonzerosInRow
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(global_int_t*)); // mtxIndG
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(local_int_t*)); // mtxIndL
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(double*)); // matrixValues
|
||||
fnbytes_Af += fnrow_Af * ((double) sizeof(double*)); // matrixDiagonal
|
||||
fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(local_int_t)); // mtxIndL[1..nrows]
|
||||
fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(double)); // matrixValues[1..nrows]
|
||||
fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(global_int_t)); // mtxIndG[1..nrows]
|
||||
|
||||
// Model for SetupHalo_ref.cpp
|
||||
#ifndef HPCG_NO_MPI
|
||||
fnbytes_Af += ((double) sizeof(double) * Af->totalToBeSent); // sendBuffer
|
||||
fnbytes_Af += ((double) sizeof(local_int_t) * Af->totalToBeSent); // elementsToSend
|
||||
fnbytes_Af += ((double) sizeof(int) * Af->numberOfSendNeighbors); // neighbors
|
||||
fnbytes_Af += ((double) sizeof(local_int_t) * Af->numberOfSendNeighbors); // receiveLength, sendLength
|
||||
#endif
|
||||
fnbytesPerLevel[i] = fnbytes_Af;
|
||||
fnbytes += fnbytes_Af; // Running sum
|
||||
Af = Af->Ac; // Go to next coarse level
|
||||
}
|
||||
|
||||
assert(Af == 0); // Make sure we got to the lowest grid level
|
||||
|
||||
// Count number of bytes used per equation
|
||||
double fnbytesPerEquation = fnbytes / fnrow;
|
||||
|
||||
// Instantiate YAML document
|
||||
OutputFile doc("HPCG-Benchmark", "3.1");
|
||||
doc.add("Release date", "March 28, 2019");
|
||||
|
||||
doc.add("Machine Summary", "");
|
||||
doc.get("Machine Summary")->add("Distributed Processes", A.geom->size);
|
||||
doc.get("Machine Summary")->add("Threads per processes", A.geom->numThreads);
|
||||
|
||||
doc.add("Global Problem Dimensions", "");
|
||||
doc.get("Global Problem Dimensions")->add("Global nx", A.geom->gnx);
|
||||
doc.get("Global Problem Dimensions")->add("Global ny", A.geom->gny);
|
||||
doc.get("Global Problem Dimensions")->add("Global nz", A.geom->gnz);
|
||||
|
||||
doc.add("Processor Dimensions", "");
|
||||
doc.get("Processor Dimensions")->add("npx", A.geom->npx);
|
||||
doc.get("Processor Dimensions")->add("npy", A.geom->npy);
|
||||
doc.get("Processor Dimensions")->add("npz", A.geom->npz);
|
||||
|
||||
doc.add("Local Domain Dimensions", "");
|
||||
doc.get("Local Domain Dimensions")->add("nx", A.geom->nx);
|
||||
doc.get("Local Domain Dimensions")->add("ny", A.geom->ny);
|
||||
|
||||
doc.add("########## Problem Summary ##########", "");
|
||||
|
||||
doc.add("Setup Information", "");
|
||||
doc.get("Setup Information")->add("Setup Time", times[9]);
|
||||
|
||||
doc.add("Linear System Information", "");
|
||||
doc.get("Linear System Information")->add("Number of Equations", A.totalNumberOfRows);
|
||||
doc.get("Linear System Information")->add("Number of Nonzero Terms", A.totalNumberOfNonzeros);
|
||||
|
||||
doc.add("Multigrid Information", "");
|
||||
doc.get("Multigrid Information")->add("Number of coarse grid levels", numberOfMgLevels - 1);
|
||||
Af = &A;
|
||||
doc.get("Multigrid Information")->add("Coarse Grids", "");
|
||||
for (int i = 1; i < numberOfMgLevels; ++i)
|
||||
{
|
||||
doc.get("Multigrid Information")->get("Coarse Grids")->add("Grid Level", i);
|
||||
doc.get("Multigrid Information")
|
||||
->get("Coarse Grids")
|
||||
->add("Number of Equations", Af->Ac->totalNumberOfRows);
|
||||
doc.get("Multigrid Information")
|
||||
->get("Coarse Grids")
|
||||
->add("Number of Nonzero Terms", Af->Ac->totalNumberOfNonzeros);
|
||||
doc.get("Multigrid Information")
|
||||
->get("Coarse Grids")
|
||||
->add("Number of Presmoother Steps", Af->mgData->numberOfPresmootherSteps);
|
||||
doc.get("Multigrid Information")
|
||||
->get("Coarse Grids")
|
||||
->add("Number of Postsmoother Steps", Af->mgData->numberOfPostsmootherSteps);
|
||||
Af = Af->Ac;
|
||||
}
|
||||
|
||||
doc.add("########## Memory Use Summary ##########", "");
|
||||
|
||||
doc.add("Memory Use Information", "");
|
||||
doc.get("Memory Use Information")->add("Total memory used for data (Gbytes)", fnbytes / 1000000000.0);
|
||||
doc.get("Memory Use Information")
|
||||
->add("Memory used for OptimizeProblem data (Gbytes)", fnbytes_OptimizedProblem / 1000000000.0);
|
||||
doc.get("Memory Use Information")
|
||||
->add("Bytes per equation (Total memory / Number of Equations)", fnbytesPerEquation);
|
||||
|
||||
doc.get("Memory Use Information")
|
||||
->add("Memory used for linear system and CG (Gbytes)", fnbytesPerLevel[0] / 1000000000.0);
|
||||
|
||||
doc.get("Memory Use Information")->add("Coarse Grids", "");
|
||||
for (int i = 1; i < numberOfMgLevels; ++i)
|
||||
{
|
||||
doc.get("Memory Use Information")->get("Coarse Grids")->add("Grid Level", i);
|
||||
doc.get("Memory Use Information")
|
||||
->get("Coarse Grids")
|
||||
->add("Memory used", fnbytesPerLevel[i] / 1000000000.0);
|
||||
}
|
||||
|
||||
doc.add("########## V&V Testing Summary ##########", "");
|
||||
doc.add("Spectral Convergence Tests", "");
|
||||
if (testcg_data.count_fail == 0)
|
||||
doc.get("Spectral Convergence Tests")->add("Result", "PASSED");
|
||||
else
|
||||
doc.get("Spectral Convergence Tests")->add("Result", "FAILED");
|
||||
doc.get("Spectral Convergence Tests")->add("Unpreconditioned", "");
|
||||
doc.get("Spectral Convergence Tests")
|
||||
->get("Unpreconditioned")
|
||||
->add("Maximum iteration count", testcg_data.niters_max_no_prec);
|
||||
doc.get("Spectral Convergence Tests")
|
||||
->get("Unpreconditioned")
|
||||
->add("Expected iteration count", testcg_data.expected_niters_no_prec);
|
||||
doc.get("Spectral Convergence Tests")->add("Preconditioned", "");
|
||||
doc.get("Spectral Convergence Tests")
|
||||
->get("Preconditioned")
|
||||
->add("Maximum iteration count", testcg_data.niters_max_prec);
|
||||
doc.get("Spectral Convergence Tests")
|
||||
->get("Preconditioned")
|
||||
->add("Expected iteration count", testcg_data.expected_niters_prec);
|
||||
|
||||
const char DepartureFromSymmetry[] = "Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon";
|
||||
doc.add(DepartureFromSymmetry, "");
|
||||
if (testsymmetry_data.count_fail == 0)
|
||||
doc.get(DepartureFromSymmetry)->add("Result", "PASSED");
|
||||
else
|
||||
doc.get(DepartureFromSymmetry)->add("Result", "FAILED");
|
||||
doc.get(DepartureFromSymmetry)->add("Departure for SpMV", testsymmetry_data.depsym_spmv);
|
||||
doc.get(DepartureFromSymmetry)->add("Departure for MG", testsymmetry_data.depsym_mg);
|
||||
|
||||
doc.add("########## Iterations Summary ##########", "");
|
||||
doc.add("Iteration Count Information", "");
|
||||
if (!global_failure)
|
||||
doc.get("Iteration Count Information")->add("Result", "PASSED");
|
||||
else
|
||||
doc.get("Iteration Count Information")->add("Result", "FAILED");
|
||||
doc.get("Iteration Count Information")->add("Reference CG iterations per set", refMaxIters);
|
||||
doc.get("Iteration Count Information")->add("Optimized CG iterations per set", optMaxIters);
|
||||
doc.get("Iteration Count Information")
|
||||
->add("Total number of reference iterations", refMaxIters * numberOfCgSets);
|
||||
doc.get("Iteration Count Information")
|
||||
->add("Total number of optimized iterations", optMaxIters * numberOfCgSets);
|
||||
|
||||
doc.add("########## Reproducibility Summary ##########", "");
|
||||
doc.add("Reproducibility Information", "");
|
||||
if (testnorms_data.pass)
|
||||
doc.get("Reproducibility Information")->add("Result", "PASSED");
|
||||
else
|
||||
doc.get("Reproducibility Information")->add("Result", "FAILED");
|
||||
doc.get("Reproducibility Information")->add("Scaled residual mean", testnorms_data.mean);
|
||||
doc.get("Reproducibility Information")->add("Scaled residual variance", testnorms_data.variance);
|
||||
|
||||
doc.add("########## Performance Summary (times in sec) ##########", "");
|
||||
|
||||
doc.add("Benchmark Time Summary", "");
|
||||
doc.get("Benchmark Time Summary")->add("Optimization phase", times[7]);
|
||||
doc.get("Benchmark Time Summary")->add("DDOT", times[1]);
|
||||
doc.get("Benchmark Time Summary")->add("WAXPBY", times[2]);
|
||||
doc.get("Benchmark Time Summary")->add("SpMV", times[3]);
|
||||
doc.get("Benchmark Time Summary")->add("MG", times[5]);
|
||||
doc.get("Benchmark Time Summary")->add("Total", times[0]);
|
||||
|
||||
doc.add("Floating Point Operations Summary", "");
|
||||
doc.get("Floating Point Operations Summary")->add("Raw DDOT", fnops_ddot);
|
||||
doc.get("Floating Point Operations Summary")->add("Raw WAXPBY", fnops_waxpby);
|
||||
doc.get("Floating Point Operations Summary")->add("Raw SpMV", fnops_sparsemv);
|
||||
doc.get("Floating Point Operations Summary")->add("Raw MG", fnops_precond);
|
||||
doc.get("Floating Point Operations Summary")->add("Total", fnops);
|
||||
doc.get("Floating Point Operations Summary")->add("Total with convergence overhead", frefnops);
|
||||
|
||||
doc.add("GB/s Summary", "");
|
||||
doc.get("GB/s Summary")->add("Raw Read B/W", fnreads / times[0] / 1.0E9);
|
||||
doc.get("GB/s Summary")->add("Raw Write B/W", fnwrites / times[0] / 1.0E9);
|
||||
doc.get("GB/s Summary")->add("Raw Total B/W", (fnreads + fnwrites) / (times[0]) / 1.0E9);
|
||||
doc.get("GB/s Summary")
|
||||
->add("Total with convergence and optimization phase overhead",
|
||||
(frefnreads + frefnwrites) / (times[0] + fNumberOfCgSets * (times[7] / 10.0 + times[9] / 10.0))
|
||||
/ 1.0E9);
|
||||
|
||||
doc.add("GFLOP/s Summary", "");
|
||||
doc.get("GFLOP/s Summary")->add("Raw DDOT", fnops_ddot / times[1] / 1.0E9);
|
||||
doc.get("GFLOP/s Summary")->add("Raw WAXPBY", fnops_waxpby / times[2] / 1.0E9);
|
||||
doc.get("GFLOP/s Summary")->add("Raw SpMV", fnops_sparsemv / (times[3]) / 1.0E9);
|
||||
doc.get("GFLOP/s Summary")->add("Raw MG", fnops_precond / (times[5]) / 1.0E9);
|
||||
doc.get("GFLOP/s Summary")->add("Raw Total", fnops / times[0] / 1.0E9);
|
||||
doc.get("GFLOP/s Summary")->add("Total with convergence overhead", frefnops / times[0] / 1.0E9);
|
||||
// This final GFLOP/s rating includes the overhead of problem setup and optimizing the data structures vs ten
|
||||
// sets of 50 iterations of CG
|
||||
double totalGflops = frefnops / (times[0] + fNumberOfCgSets * (times[7] / 10.0 + times[9] / 10.0)) / 1.0E9;
|
||||
double totalGflops24 = frefnops / (times[0] + fNumberOfCgSets * times[7] / 10.0) / 1.0E9;
|
||||
doc.get("GFLOP/s Summary")->add("Total with convergence and optimization phase overhead", totalGflops);
|
||||
|
||||
doc.add("User Optimization Overheads", "");
|
||||
doc.get("User Optimization Overheads")->add("Optimization phase time (sec)", (times[7]));
|
||||
doc.get("User Optimization Overheads")
|
||||
->add("Optimization phase time vs reference SpMV+MG time", times[7] / times[8]);
|
||||
|
||||
#ifndef HPCG_NO_MPI
|
||||
doc.add("DDOT Timing Variations", "");
|
||||
doc.get("DDOT Timing Variations")->add("Min DDOT MPI_Allreduce time", t4min);
|
||||
doc.get("DDOT Timing Variations")->add("Max DDOT MPI_Allreduce time", t4max);
|
||||
doc.get("DDOT Timing Variations")->add("Avg DDOT MPI_Allreduce time", t4avg);
|
||||
|
||||
// doc.get("Sparse Operations Overheads")->add("Halo exchange time (sec)", (times[6]));
|
||||
// doc.get("Sparse Operations Overheads")->add("Halo exchange as percentage of SpMV time",
|
||||
// (times[6])/totalSparseMVTime*100.0);
|
||||
#endif
|
||||
doc.add("Final Summary", "");
|
||||
bool isValidRun = (testcg_data.count_fail == 0) && (testsymmetry_data.count_fail == 0) && (testnorms_data.pass)
|
||||
&& (!global_failure);
|
||||
if (isValidRun)
|
||||
{
|
||||
doc.get("Final Summary")->add("HPCG result is VALID with a GFLOP/s rating of", totalGflops);
|
||||
doc.get("Final Summary")->add("HPCG 2.4 rating for historical reasons is", totalGflops24);
|
||||
if (!A.isDotProductOptimized)
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("Reference version of ComputeDotProduct used",
|
||||
"Performance results are most likely suboptimal");
|
||||
}
|
||||
if (!A.isSpmvOptimized)
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("Reference version of ComputeSPMV used", "Performance results are most likely suboptimal");
|
||||
}
|
||||
if (!A.isMgOptimized)
|
||||
{
|
||||
if (A.geom->numThreads > 1)
|
||||
doc.get("Final Summary")
|
||||
->add("Reference version of ComputeMG used and number of threads greater than 1",
|
||||
"Performance results are severely suboptimal");
|
||||
else // numThreads ==1
|
||||
doc.get("Final Summary")
|
||||
->add("Reference version of ComputeMG used", "Performance results are most likely suboptimal");
|
||||
}
|
||||
if (!A.isWaxpbyOptimized)
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("Reference version of ComputeWAXPBY used", "Performance results are most likely suboptimal");
|
||||
}
|
||||
if (times[0] >= minOfficialTime)
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("Please upload results from the YAML file contents to", "http://hpcg-benchmark.org");
|
||||
}
|
||||
else
|
||||
{
|
||||
doc.get("Final Summary")->add("Results are valid but execution time (sec) is", times[0]);
|
||||
if (quickPath)
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("You have selected the QuickPath option",
|
||||
"Results are official for legacy installed systems with confirmation from the HPCG "
|
||||
"Benchmark leaders.");
|
||||
doc.get("Final Summary")
|
||||
->add("After confirmation please upload results from the YAML file contents to",
|
||||
"http://hpcg-benchmark.org");
|
||||
}
|
||||
else
|
||||
{
|
||||
doc.get("Final Summary")
|
||||
->add("Official results execution time (sec) must be at least", minOfficialTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
doc.get("Final Summary")->add("HPCG result is", "INVALID.");
|
||||
doc.get("Final Summary")
|
||||
->add("Please review the YAML file contents", "You may NOT submit these results for consideration.");
|
||||
}
|
||||
|
||||
std::string yaml = doc.generate();
|
||||
#ifdef HPCG_DEBUG
|
||||
HPCG_fout << yaml;
|
||||
#endif
|
||||
}
|
||||
return;
|
||||
}
|
||||
Reference in New Issue
Block a user