//@HEADER // *************************************************** // // HPCG: High Performance Conjugate Gradient Benchmark // // Contact: // Michael A. Heroux ( maherou@sandia.gov) // Jack Dongarra (dongarra@eecs.utk.edu) // Piotr Luszczek (luszczek@eecs.utk.edu) // // *************************************************** //@HEADER /* * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /*! @file ExchangeHalo.cpp HPCG routine */ // Compile this routine only if running with MPI #ifndef HPCG_NO_MPI #include "ExchangeHalo.hpp" #include "Geometry.hpp" #include #include extern p2p_comm_mode_t P2P_Mode; /*! Communicates data that is at the border of the part of the domain assigned to this processor. @param[in] A The known system matrix @param[inout] x On entry: the local vector entries followed by entries to be communicated; on exit: the vector with non-local entries updated by other processors */ void ExchangeHalo(const SparseMatrix& A, Vector& x) { local_int_t localNumberOfRows = A.localNumberOfRows; int num_neighbors = A.numberOfSendNeighbors; local_int_t * receiveLength = A.receiveLength; local_int_t * sendLength = A.sendLength; int * neighbors = A.neighbors; double * sendBuffer = A.sendBuffer; local_int_t totalToBeSent = A.totalToBeSent; local_int_t * elementsToSend = A.elementsToSend; double * const xv = x.values; int size, rank; // Number of MPI processes, My process ID MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // // first post receives, these are immediate receives // Do not wait for result to come, will do that at the // wait call below. // int MPI_MY_TAG = 99; MPI_Request * request = new MPI_Request[num_neighbors]; // // Externals are at end of locals // double * x_external = (double *) xv + localNumberOfRows; // Post receives first // TODO: Thread this loop for (int i = 0; i < num_neighbors; i++) { local_int_t n_recv = receiveLength[i]; MPI_Irecv(x_external, n_recv, MPI_DOUBLE, neighbors[i], MPI_MY_TAG, MPI_COMM_WORLD, request+i); x_external += n_recv; } // // Fill up send buffer // // TODO: Thread this loop for (local_int_t i=0; i