diff --git a/src/qibotn/test_multinode.py b/src/qibotn/test_multinode.py index 0570b38..8b234ec 100644 --- a/src/qibotn/test_multinode.py +++ b/src/qibotn/test_multinode.py @@ -1,126 +1,65 @@ -import qibo -#import qibotn.cutn as cutn -from cuquantum import cutensornet as cutn - -from qibo import gates -from qibo.models import Circuit, QFT +import os +import sys +from timeit import default_timer as timer import numpy as np -from mpi4py import MPI # this line initializes MPI import cupy as cp +import cuquantum +from cuquantum import cutensornet as cutn +from qibo import gates +from qibo.models import QFT +from mpi4py import MPI # this line initializes MPI from cupy.cuda.runtime import getDeviceCount from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum -import cuquantum def qibo_qft(nqubits, swaps): circ_qibo = QFT(nqubits, swaps) state_vec = np.array(circ_qibo()) return circ_qibo, state_vec -print("QiboTN") +args = sys.argv + +if len(args) < 2: + print("Usage: python script.py [nqubits] ") + sys.exit(1) + +nqubits = int(args[1]) root = 0 comm = MPI.COMM_WORLD rank, size = comm.Get_rank(), comm.Get_size() -print("Andy: Rank ", rank," size ", size) -# Assign the device for each process. device_id = rank % getDeviceCount() cp.cuda.Device(device_id).use() +#print("Andy: Rank ", rank," size ", size, 'Device count',getDeviceCount()) + +# Check if the env var is set +if not "CUTENSORNET_COMM_LIB" in os.environ: + raise RuntimeError("need to set CUTENSORNET_COMM_LIB to the path of the MPI wrapper library") + +if not os.path.isfile(os.environ["CUTENSORNET_COMM_LIB"]): + raise RuntimeError("CUTENSORNET_COMM_LIB does not point to the path of the MPI wrapper library") datatype = 'complex128' -nqubits = 10 -''' -qibo_circ = Circuit(nqubits) -qibo_circ.add(gates.H(0)) -#qibo_circ.add(gates.CZ(3,4)) -qibo_circ.add(gates.CZ(2,4)) -#qibo_circ.add(gates.CNOT(0,4)) -#qibo_circ.add(gates.SWAP(0,4)) -qibo_circ.add(gates.H(2)) -qibo_circ.add(gates.H(4)) -''' qibo_circ = QFT(nqubits) - -''' -expr = 'ehl,gj,edhg,bif,d,c,k,iklj,cf,a->ba' -shapes = [(8, 2, 5), (5, 7), (8, 8, 2, 5), (8, 6, 3), (8,), (6,), (5,), (6, 5, 5, 7), (6, 3), (3,)] -print("Andy: expr =",expr) -if rank == root: - operands = [cp.random.rand(*shape) for shape in shapes] -else: - operands = [cp.empty(shape) for shape in shapes] -''' - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) -expr, mode_label, q_frontier, operands = myconvertor.state_vector() -shapes = [tensor.shape for tensor in operands] -print("expr ", expr) -print("Operands ", operands) -print("Shape", shapes) -# Set the operand data on root. Since we use the buffer interface APIs offered by mpi4py for communicating array -# objects, we can directly use device arrays (cupy.ndarray, for example) as we assume mpi4py is built against -# a CUDA-aware MPI. -if rank != root: - operands = [cp.empty(shape,dtype="complex128") for shape in shapes] - -''' -if rank == root: - operands = [cp.random.rand(*shape) for shape in shapes] - print("Operands random", operands) - -else: - operands = [cp.empty(shape) for shape in shapes] -''' - -for operand in operands: - print("Is CUPY array? ", cp.get_array_module(operand), " Operand size = ", operand.nbytes) - -for operand in operands: - comm.Bcast(operand, root) # Bind the communicator to the library handle handle = cutn.create() -print("Andy cutn.create()") -print("Andy ", cutn.get_mpi_comm_pointer(comm)) cutn.distributed_reset_configuration( handle, *cutn.get_mpi_comm_pointer(comm) ) -print("Andy cutn.distributed_reset_configuration") -operands_interleave = myconvertor.get_interleave_format( mode_label, q_frontier, operands) -print("new function interkeave ", operands_interleave) -print("Ori function interleave", myconvertor.state_vector_operands()) - -result = cuquantum.contract(*operands_interleave, options={'device_id' : device_id, 'handle': handle}) -#result = cuquantum.contract(expr, *operands, options={'device_id' : device_id, 'handle': handle}) - -''' - -# Create a new GPU buffer for verification -result_cp = cp.empty_like(result) - -# Sum the partial contribution from each process on root, with GPU if rank == root: - comm.Reduce(sendbuf=MPI.IN_PLACE, recvbuf=result_cp, op=MPI.SUM, root=root) -else: - comm.Reduce(sendbuf=result_cp, recvbuf=None, op=MPI.SUM, root=root) -''' + start = timer() + +result = cuquantum.contract(*myconvertor.state_vector_operands(), options={'device_id' : device_id, 'handle': handle}) + +if rank == root: + end = timer() + # Check correctness. if rank == root: - #operands = myconvertor.state_vector_operands() - #result_cp = cp.einsum(*operands, optimize=True) - #result_cp = np.einsum(*operands, optimize=True) - (qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) - print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) - - -''' -result_tn = cutn.eval(qibo_circ, datatype) - -qibo.set_backend(backend="qibojit", platform="numpy") -(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) -#print(result_tn) -#print(result_sv) - -assert np.allclose( - result_sv, result_tn.flatten()), "Resulting dense vectors do not match" -''' \ No newline at end of file + #(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) + time = end - start + #print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) + print("nqubit", nqubits, "time taken = ", time, 's') + \ No newline at end of file