Merge pull request #19 from qiboteam/multi-node-multi-GPU

Multi node multi gpu
This commit is contained in:
liwei
2023-10-17 15:31:51 +08:00
committed by GitHub
2 changed files with 42 additions and 1 deletions

View File

@@ -94,7 +94,8 @@ class QiboCircuitToEinsum:
required_shape = self.op_shape_from_qubits(len(gate_qubits)) required_shape = self.op_shape_from_qubits(len(gate_qubits))
self.gate_tensors.append( self.gate_tensors.append(
( (
cp.asarray(gate.matrix).reshape(required_shape), cp.asarray(gate.matrix(), dtype=self.dtype).reshape(
required_shape),
gate_qubits, gate_qubits,
) )
) )

View File

@@ -1,5 +1,9 @@
from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
from cuquantum import contract from cuquantum import contract
from cuquantum import cutensornet as cutn
import multiprocessing
from cupy.cuda.runtime import getDeviceCount
import cupy as cp
from qibotn.QiboCircuitToMPS import QiboCircuitToMPS from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
from qibotn.mps_contraction_helper import MPSContractionHelper from qibotn.mps_contraction_helper import MPSContractionHelper
@@ -10,6 +14,42 @@ def eval(qibo_circ, datatype):
return contract(*myconvertor.state_vector_operands()) return contract(*myconvertor.state_vector_operands())
def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
"""
from mpi4py import MPI # this line initializes MPI
ncpu_threads = multiprocessing.cpu_count() // 2
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
handle = cutn.create()
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands_interleave = myconvertor.state_vector_operands()
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
network = cutn.Network(*operands_interleave, options=network_opts)
network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads})
# Execution: To execute the contraction using the optimal path found previously
result = network.contract()
cutn.destroy(handle)
return result, rank
def eval_mps(qibo_circ, gate_algo, datatype): def eval_mps(qibo_circ, gate_algo, datatype):
myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype) myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
mps_helper = MPSContractionHelper(myconvertor.num_qubits) mps_helper = MPSContractionHelper(myconvertor.num_qubits)