//@HEADER // *************************************************** // // HPCG: High Performance Conjugate Gradient Benchmark // // Contact: // Michael A. Heroux ( maherou@sandia.gov) // Jack Dongarra (dongarra@eecs.utk.edu) // Piotr Luszczek (luszczek@eecs.utk.edu) // // *************************************************** //@HEADER /* * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! @file ReportResults.cpp HPCG routine */ #ifndef HPCG_NO_MPI #include #endif #include "OptimizeProblem.hpp" #include "OutputFile.hpp" #include "ReportResults.hpp" #include #ifdef HPCG_DEBUG #include using std::endl; #include "hpcg.hpp" #endif extern int use_output_file; /*! Creates a YAML file and writes the information about the HPCG run, its results, and validity. @param[in] geom The description of the problem's geometry. @param[in] A The known system matrix @param[in] numberOfMgLevels Number of levels in multigrid V cycle @param[in] numberOfCgSets Number of CG runs performed @param[in] niters Number of preconditioned CG iterations performed to lower the residual below a threshold @param[in] times Vector of cumulative timings for each of the phases of a preconditioned CG iteration @param[in] testcg_data the data structure with the results of the CG-correctness test including pass/fail information @param[in] testsymmetry_data the data structure with the results of the CG symmetry test including pass/fail information @param[in] testnorms_data the data structure with the results of the CG norm test including pass/fail information @param[in] global_failure indicates whether a failure occurred during the correctness tests of CG @see YAML_Doc */ void ReportResults(const SparseMatrix& A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters, double times[], const TestCGData& testcg_data, const TestSymmetryData& testsymmetry_data, const TestNormsData& testnorms_data, int global_failure, bool quickPath) { double minOfficialTime = 1800; // Any official benchmark result must run at least this many seconds #ifndef HPCG_NO_MPI double t4 = times[4]; double t4min = 0.0; double t4max = 0.0; double t4avg = 0.0; MPI_Allreduce(&t4, &t4min, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&t4, &t4max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&t4, &t4avg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); t4avg = t4avg / ((double) A.geom->size); #endif if (A.geom->rank == 0) { // Only PE 0 needs to compute and report timing results // TODO: Put the FLOP count, Memory BW and Memory Usage models into separate functions // ======================== FLOP count model ======================================= double fNumberOfCgSets = numberOfCgSets; double fniters = fNumberOfCgSets * (double) optMaxIters; double fnrow = A.totalNumberOfRows; double fnnz = A.totalNumberOfNonzeros; // Op counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops) double fnops_ddot = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow; // 3 ddots with nrow adds and nrow mults double fnops_waxpby = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow; // 3 WAXPBYs with nrow adds and nrow mults double fnops_sparsemv = (fniters + fNumberOfCgSets) * 2.0 * fnnz; // 1 SpMV with nnz adds and nnz mults // Op counts from the multigrid preconditioners double fnops_precond = 0.0; const SparseMatrix* Af = &A; for (int i = 1; i < numberOfMgLevels; ++i) { double fnnz_Af = Af->totalNumberOfNonzeros; double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps; double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps; fnops_precond += fnumberOfPresmootherSteps * fniters * 4.0 * fnnz_Af; // number of presmoother flops fnops_precond += fniters * 2.0 * fnnz_Af; // cost of fine grid residual calculation fnops_precond += fnumberOfPostsmootherSteps * fniters * 4.0 * fnnz_Af; // number of postsmoother flops Af = Af->Ac; // Go to next coarse level } fnops_precond += fniters * 4.0 * ((double) Af->totalNumberOfNonzeros); // One symmetric GS sweep at the coarsest level double fnops = fnops_ddot + fnops_waxpby + fnops_sparsemv + fnops_precond; double frefnops = fnops * ((double) refMaxIters) / ((double) optMaxIters); // ======================== Memory bandwidth model ======================================= // Read/Write counts come from implementation of CG in CG.cpp (include 1 extra for the CG preamble ops) double fnreads_ddot = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow * sizeof(double); // 3 ddots with 2 nrow reads double fnwrites_ddot = (3.0 * fniters + fNumberOfCgSets) * sizeof(double); // 3 ddots with 1 write double fnreads_waxpby = (3.0 * fniters + fNumberOfCgSets) * 2.0 * fnrow * sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults double fnwrites_waxpby = (3.0 * fniters + fNumberOfCgSets) * fnrow * sizeof(double); // 3 WAXPBYs with nrow adds and nrow mults double fnreads_sparsemv = (fniters + fNumberOfCgSets) * (fnnz * (sizeof(double) + sizeof(local_int_t)) + fnrow * sizeof(double)); // 1 SpMV with nnz reads of values, nnz reads indices, // plus nrow reads of x double fnwrites_sparsemv = (fniters + fNumberOfCgSets) * fnrow * sizeof(double); // 1 SpMV nrow writes // Op counts from the multigrid preconditioners double fnreads_precond = 0.0; double fnwrites_precond = 0.0; Af = &A; for (int i = 1; i < numberOfMgLevels; ++i) { double fnnz_Af = Af->totalNumberOfNonzeros; double fnrow_Af = Af->totalNumberOfRows; double fnumberOfPresmootherSteps = Af->mgData->numberOfPresmootherSteps; double fnumberOfPostsmootherSteps = Af->mgData->numberOfPostsmootherSteps; fnreads_precond += fnumberOfPresmootherSteps * fniters * (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t)) + fnrow_Af * sizeof(double)); // number of presmoother reads fnwrites_precond += fnumberOfPresmootherSteps * fniters * fnrow_Af * sizeof(double); // number of presmoother writes fnreads_precond += fniters * (fnnz_Af * (sizeof(double) + sizeof(local_int_t)) + fnrow_Af * sizeof(double)); // Number of reads for fine grid residual calculation fnwrites_precond += fniters * fnnz_Af * sizeof(double); // Number of writes for fine grid residual calculation fnreads_precond += fnumberOfPostsmootherSteps * fniters * (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t)) + fnrow_Af * sizeof(double)); // number of postsmoother reads fnwrites_precond += fnumberOfPostsmootherSteps * fniters * fnnz_Af * sizeof(double); // number of postsmoother writes Af = Af->Ac; // Go to next coarse level } double fnnz_Af = Af->totalNumberOfNonzeros; double fnrow_Af = Af->totalNumberOfRows; fnreads_precond += fniters * (2.0 * fnnz_Af * (sizeof(double) + sizeof(local_int_t)) + fnrow_Af * sizeof(double)); ; // One symmetric GS sweep at the coarsest level fnwrites_precond += fniters * fnrow_Af * sizeof(double); // One symmetric GS sweep at the coarsest level double fnreads = fnreads_ddot + fnreads_waxpby + fnreads_sparsemv + fnreads_precond; double fnwrites = fnwrites_ddot + fnwrites_waxpby + fnwrites_sparsemv + fnwrites_precond; double frefnreads = fnreads * ((double) refMaxIters) / ((double) optMaxIters); double frefnwrites = fnwrites * ((double) refMaxIters) / ((double) optMaxIters); // ======================== Memory usage model ======================================= // Data in GenerateProblem_ref double numberOfNonzerosPerRow = 27.0; // We are approximating a 27-point finite element/volume/difference 3D stencil double size = ((double) A.geom->size); // Needed for estimating size of halo double fnbytes = ((double) sizeof(Geometry)); // Geometry struct in main.cpp fnbytes += ((double) sizeof(double) * fNumberOfCgSets); // testnorms_data in main.cpp // Model for GenerateProblem_ref.cpp fnbytes += fnrow * sizeof(char); // array nonzerosInRow fnbytes += fnrow * ((double) sizeof(global_int_t*)); // mtxIndG fnbytes += fnrow * ((double) sizeof(local_int_t*)); // mtxIndL fnbytes += fnrow * ((double) sizeof(double*)); // matrixValues fnbytes += fnrow * ((double) sizeof(double*)); // matrixDiagonal fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(local_int_t)); // mtxIndL[1..nrows] fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(double)); // matrixValues[1..nrows] fnbytes += fnrow * numberOfNonzerosPerRow * ((double) sizeof(global_int_t)); // mtxIndG[1..nrows] fnbytes += fnrow * ((double) 3 * sizeof(double)); // x, b, xexact // Model for CGData.hpp double fncol = ((global_int_t) A.localNumberOfColumns) * size; // Estimate of the global number of columns using the value from rank 0 fnbytes += fnrow * ((double) 2 * sizeof(double)); // r, Ap fnbytes += fncol * ((double) 2 * sizeof(double)); // z, p std::vector fnbytesPerLevel(numberOfMgLevels); // Count byte usage per level (level 0 is main CG level) fnbytesPerLevel[0] = fnbytes; // Benchmarker-provided model for OptimizeProblem.cpp double fnbytes_OptimizedProblem = OptimizeProblemMemoryUse(A); fnbytes += fnbytes_OptimizedProblem; Af = A.Ac; for (int i = 1; i < numberOfMgLevels; ++i) { double fnrow_Af = Af->totalNumberOfRows; double fncol_Af = ((global_int_t) Af->localNumberOfColumns) * size; // Estimate of the global number of columns using the value from rank 0 double fnbytes_Af = 0.0; // Model for GenerateCoarseProblem.cpp fnbytes_Af += fnrow_Af * ((double) sizeof(local_int_t)); // f2cOperator fnbytes_Af += fnrow_Af * ((double) sizeof(double)); // rc fnbytes_Af += 2.0 * fncol_Af * ((double) sizeof(double)); // xc, Axf are estimated based on the size of these arrays on rank 0 fnbytes_Af += ((double) (sizeof(Geometry) + sizeof(SparseMatrix) + 3 * sizeof(Vector) + sizeof(MGData))); // Account for structs geomc, Ac, rc, xc, Axf - (minor) // Model for GenerateProblem.cpp (called within GenerateCoarseProblem.cpp) fnbytes_Af += fnrow_Af * sizeof(char); // array nonzerosInRow fnbytes_Af += fnrow_Af * ((double) sizeof(global_int_t*)); // mtxIndG fnbytes_Af += fnrow_Af * ((double) sizeof(local_int_t*)); // mtxIndL fnbytes_Af += fnrow_Af * ((double) sizeof(double*)); // matrixValues fnbytes_Af += fnrow_Af * ((double) sizeof(double*)); // matrixDiagonal fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(local_int_t)); // mtxIndL[1..nrows] fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(double)); // matrixValues[1..nrows] fnbytes_Af += fnrow_Af * numberOfNonzerosPerRow * ((double) sizeof(global_int_t)); // mtxIndG[1..nrows] // Model for SetupHalo_ref.cpp #ifndef HPCG_NO_MPI fnbytes_Af += ((double) sizeof(double) * Af->totalToBeSent); // sendBuffer fnbytes_Af += ((double) sizeof(local_int_t) * Af->totalToBeSent); // elementsToSend fnbytes_Af += ((double) sizeof(int) * Af->numberOfSendNeighbors); // neighbors fnbytes_Af += ((double) sizeof(local_int_t) * Af->numberOfSendNeighbors); // receiveLength, sendLength #endif fnbytesPerLevel[i] = fnbytes_Af; fnbytes += fnbytes_Af; // Running sum Af = Af->Ac; // Go to next coarse level } assert(Af == 0); // Make sure we got to the lowest grid level // Count number of bytes used per equation double fnbytesPerEquation = fnbytes / fnrow; // Instantiate YAML document OutputFile doc("HPCG-Benchmark", "3.1"); doc.add("Release date", "March 28, 2019"); doc.add("Machine Summary", ""); doc.get("Machine Summary")->add("Distributed Processes", A.geom->size); doc.get("Machine Summary")->add("Threads per processes", A.geom->numThreads); doc.add("Global Problem Dimensions", ""); doc.get("Global Problem Dimensions")->add("Global nx", A.geom->gnx); doc.get("Global Problem Dimensions")->add("Global ny", A.geom->gny); doc.get("Global Problem Dimensions")->add("Global nz", A.geom->gnz); doc.add("Processor Dimensions", ""); doc.get("Processor Dimensions")->add("npx", A.geom->npx); doc.get("Processor Dimensions")->add("npy", A.geom->npy); doc.get("Processor Dimensions")->add("npz", A.geom->npz); doc.add("Local Domain Dimensions", ""); doc.get("Local Domain Dimensions")->add("nx", A.geom->nx); doc.get("Local Domain Dimensions")->add("ny", A.geom->ny); doc.add("########## Problem Summary ##########", ""); doc.add("Setup Information", ""); doc.get("Setup Information")->add("Setup Time", times[9]); doc.add("Linear System Information", ""); doc.get("Linear System Information")->add("Number of Equations", A.totalNumberOfRows); doc.get("Linear System Information")->add("Number of Nonzero Terms", A.totalNumberOfNonzeros); doc.add("Multigrid Information", ""); doc.get("Multigrid Information")->add("Number of coarse grid levels", numberOfMgLevels - 1); Af = &A; doc.get("Multigrid Information")->add("Coarse Grids", ""); for (int i = 1; i < numberOfMgLevels; ++i) { doc.get("Multigrid Information")->get("Coarse Grids")->add("Grid Level", i); doc.get("Multigrid Information") ->get("Coarse Grids") ->add("Number of Equations", Af->Ac->totalNumberOfRows); doc.get("Multigrid Information") ->get("Coarse Grids") ->add("Number of Nonzero Terms", Af->Ac->totalNumberOfNonzeros); doc.get("Multigrid Information") ->get("Coarse Grids") ->add("Number of Presmoother Steps", Af->mgData->numberOfPresmootherSteps); doc.get("Multigrid Information") ->get("Coarse Grids") ->add("Number of Postsmoother Steps", Af->mgData->numberOfPostsmootherSteps); Af = Af->Ac; } doc.add("########## Memory Use Summary ##########", ""); doc.add("Memory Use Information", ""); doc.get("Memory Use Information")->add("Total memory used for data (Gbytes)", fnbytes / 1000000000.0); doc.get("Memory Use Information") ->add("Memory used for OptimizeProblem data (Gbytes)", fnbytes_OptimizedProblem / 1000000000.0); doc.get("Memory Use Information") ->add("Bytes per equation (Total memory / Number of Equations)", fnbytesPerEquation); doc.get("Memory Use Information") ->add("Memory used for linear system and CG (Gbytes)", fnbytesPerLevel[0] / 1000000000.0); doc.get("Memory Use Information")->add("Coarse Grids", ""); for (int i = 1; i < numberOfMgLevels; ++i) { doc.get("Memory Use Information")->get("Coarse Grids")->add("Grid Level", i); doc.get("Memory Use Information") ->get("Coarse Grids") ->add("Memory used", fnbytesPerLevel[i] / 1000000000.0); } doc.add("########## V&V Testing Summary ##########", ""); doc.add("Spectral Convergence Tests", ""); if (testcg_data.count_fail == 0) doc.get("Spectral Convergence Tests")->add("Result", "PASSED"); else doc.get("Spectral Convergence Tests")->add("Result", "FAILED"); doc.get("Spectral Convergence Tests")->add("Unpreconditioned", ""); doc.get("Spectral Convergence Tests") ->get("Unpreconditioned") ->add("Maximum iteration count", testcg_data.niters_max_no_prec); doc.get("Spectral Convergence Tests") ->get("Unpreconditioned") ->add("Expected iteration count", testcg_data.expected_niters_no_prec); doc.get("Spectral Convergence Tests")->add("Preconditioned", ""); doc.get("Spectral Convergence Tests") ->get("Preconditioned") ->add("Maximum iteration count", testcg_data.niters_max_prec); doc.get("Spectral Convergence Tests") ->get("Preconditioned") ->add("Expected iteration count", testcg_data.expected_niters_prec); const char DepartureFromSymmetry[] = "Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon"; doc.add(DepartureFromSymmetry, ""); if (testsymmetry_data.count_fail == 0) doc.get(DepartureFromSymmetry)->add("Result", "PASSED"); else doc.get(DepartureFromSymmetry)->add("Result", "FAILED"); doc.get(DepartureFromSymmetry)->add("Departure for SpMV", testsymmetry_data.depsym_spmv); doc.get(DepartureFromSymmetry)->add("Departure for MG", testsymmetry_data.depsym_mg); doc.add("########## Iterations Summary ##########", ""); doc.add("Iteration Count Information", ""); if (!global_failure) doc.get("Iteration Count Information")->add("Result", "PASSED"); else doc.get("Iteration Count Information")->add("Result", "FAILED"); doc.get("Iteration Count Information")->add("Reference CG iterations per set", refMaxIters); doc.get("Iteration Count Information")->add("Optimized CG iterations per set", optMaxIters); doc.get("Iteration Count Information") ->add("Total number of reference iterations", refMaxIters * numberOfCgSets); doc.get("Iteration Count Information") ->add("Total number of optimized iterations", optMaxIters * numberOfCgSets); doc.add("########## Reproducibility Summary ##########", ""); doc.add("Reproducibility Information", ""); if (testnorms_data.pass) doc.get("Reproducibility Information")->add("Result", "PASSED"); else doc.get("Reproducibility Information")->add("Result", "FAILED"); doc.get("Reproducibility Information")->add("Scaled residual mean", testnorms_data.mean); doc.get("Reproducibility Information")->add("Scaled residual variance", testnorms_data.variance); doc.add("########## Performance Summary (times in sec) ##########", ""); doc.add("Benchmark Time Summary", ""); doc.get("Benchmark Time Summary")->add("Optimization phase", times[7]); doc.get("Benchmark Time Summary")->add("DDOT", times[1]); doc.get("Benchmark Time Summary")->add("WAXPBY", times[2]); doc.get("Benchmark Time Summary")->add("SpMV", times[3]); doc.get("Benchmark Time Summary")->add("MG", times[5]); doc.get("Benchmark Time Summary")->add("Total", times[0]); doc.add("Floating Point Operations Summary", ""); doc.get("Floating Point Operations Summary")->add("Raw DDOT", fnops_ddot); doc.get("Floating Point Operations Summary")->add("Raw WAXPBY", fnops_waxpby); doc.get("Floating Point Operations Summary")->add("Raw SpMV", fnops_sparsemv); doc.get("Floating Point Operations Summary")->add("Raw MG", fnops_precond); doc.get("Floating Point Operations Summary")->add("Total", fnops); doc.get("Floating Point Operations Summary")->add("Total with convergence overhead", frefnops); doc.add("GB/s Summary", ""); doc.get("GB/s Summary")->add("Raw Read B/W", fnreads / times[0] / 1.0E9); doc.get("GB/s Summary")->add("Raw Write B/W", fnwrites / times[0] / 1.0E9); doc.get("GB/s Summary")->add("Raw Total B/W", (fnreads + fnwrites) / (times[0]) / 1.0E9); doc.get("GB/s Summary") ->add("Total with convergence and optimization phase overhead", (frefnreads + frefnwrites) / (times[0] + fNumberOfCgSets * (times[7] / 10.0 + times[9] / 10.0)) / 1.0E9); doc.add("GFLOP/s Summary", ""); doc.get("GFLOP/s Summary")->add("Raw DDOT", fnops_ddot / times[1] / 1.0E9); doc.get("GFLOP/s Summary")->add("Raw WAXPBY", fnops_waxpby / times[2] / 1.0E9); doc.get("GFLOP/s Summary")->add("Raw SpMV", fnops_sparsemv / (times[3]) / 1.0E9); doc.get("GFLOP/s Summary")->add("Raw MG", fnops_precond / (times[5]) / 1.0E9); doc.get("GFLOP/s Summary")->add("Raw Total", fnops / times[0] / 1.0E9); doc.get("GFLOP/s Summary")->add("Total with convergence overhead", frefnops / times[0] / 1.0E9); // This final GFLOP/s rating includes the overhead of problem setup and optimizing the data structures vs ten // sets of 50 iterations of CG double totalGflops = frefnops / (times[0] + fNumberOfCgSets * (times[7] / 10.0 + times[9] / 10.0)) / 1.0E9; double totalGflops24 = frefnops / (times[0] + fNumberOfCgSets * times[7] / 10.0) / 1.0E9; doc.get("GFLOP/s Summary")->add("Total with convergence and optimization phase overhead", totalGflops); doc.add("User Optimization Overheads", ""); doc.get("User Optimization Overheads")->add("Optimization phase time (sec)", (times[7])); doc.get("User Optimization Overheads") ->add("Optimization phase time vs reference SpMV+MG time", times[7] / times[8]); #ifndef HPCG_NO_MPI doc.add("DDOT Timing Variations", ""); doc.get("DDOT Timing Variations")->add("Min DDOT MPI_Allreduce time", t4min); doc.get("DDOT Timing Variations")->add("Max DDOT MPI_Allreduce time", t4max); doc.get("DDOT Timing Variations")->add("Avg DDOT MPI_Allreduce time", t4avg); // doc.get("Sparse Operations Overheads")->add("Halo exchange time (sec)", (times[6])); // doc.get("Sparse Operations Overheads")->add("Halo exchange as percentage of SpMV time", // (times[6])/totalSparseMVTime*100.0); #endif doc.add("Final Summary", ""); bool isValidRun = (testcg_data.count_fail == 0) && (testsymmetry_data.count_fail == 0) && (testnorms_data.pass) && (!global_failure); if (isValidRun) { doc.get("Final Summary")->add("HPCG result is VALID with a GFLOP/s rating of", totalGflops); doc.get("Final Summary")->add("HPCG 2.4 rating for historical reasons is", totalGflops24); if (!A.isDotProductOptimized) { doc.get("Final Summary") ->add("Reference version of ComputeDotProduct used", "Performance results are most likely suboptimal"); } if (!A.isSpmvOptimized) { doc.get("Final Summary") ->add("Reference version of ComputeSPMV used", "Performance results are most likely suboptimal"); } if (!A.isMgOptimized) { if (A.geom->numThreads > 1) doc.get("Final Summary") ->add("Reference version of ComputeMG used and number of threads greater than 1", "Performance results are severely suboptimal"); else // numThreads ==1 doc.get("Final Summary") ->add("Reference version of ComputeMG used", "Performance results are most likely suboptimal"); } if (!A.isWaxpbyOptimized) { doc.get("Final Summary") ->add("Reference version of ComputeWAXPBY used", "Performance results are most likely suboptimal"); } if (times[0] >= minOfficialTime) { doc.get("Final Summary") ->add("Please upload results from the YAML file contents to", "http://hpcg-benchmark.org"); } else { doc.get("Final Summary")->add("Results are valid but execution time (sec) is", times[0]); if (quickPath) { doc.get("Final Summary") ->add("You have selected the QuickPath option", "Results are official for legacy installed systems with confirmation from the HPCG " "Benchmark leaders."); doc.get("Final Summary") ->add("After confirmation please upload results from the YAML file contents to", "http://hpcg-benchmark.org"); } else { doc.get("Final Summary") ->add("Official results execution time (sec) must be at least", minOfficialTime); } } } else { doc.get("Final Summary")->add("HPCG result is", "INVALID."); doc.get("Final Summary") ->add("Please review the YAML file contents", "You may NOT submit these results for consideration."); } std::string yaml = doc.generate(); #ifdef HPCG_DEBUG HPCG_fout << yaml; #endif } return; }