From ec4784d09fe831c91c490a4b5a275c8294992596 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Fri, 14 Jul 2023 12:13:27 +0800 Subject: [PATCH 01/11] added draft code for multi node --- src/qibotn/test_multinode.py | 126 +++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 src/qibotn/test_multinode.py diff --git a/src/qibotn/test_multinode.py b/src/qibotn/test_multinode.py new file mode 100644 index 0000000..0570b38 --- /dev/null +++ b/src/qibotn/test_multinode.py @@ -0,0 +1,126 @@ +import qibo +#import qibotn.cutn as cutn +from cuquantum import cutensornet as cutn + +from qibo import gates +from qibo.models import Circuit, QFT +import numpy as np +from mpi4py import MPI # this line initializes MPI +import cupy as cp +from cupy.cuda.runtime import getDeviceCount +from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum +import cuquantum + +def qibo_qft(nqubits, swaps): + circ_qibo = QFT(nqubits, swaps) + state_vec = np.array(circ_qibo()) + return circ_qibo, state_vec + +print("QiboTN") + +root = 0 +comm = MPI.COMM_WORLD +rank, size = comm.Get_rank(), comm.Get_size() +print("Andy: Rank ", rank," size ", size) +# Assign the device for each process. +device_id = rank % getDeviceCount() +cp.cuda.Device(device_id).use() + +datatype = 'complex128' +nqubits = 10 +''' +qibo_circ = Circuit(nqubits) +qibo_circ.add(gates.H(0)) +#qibo_circ.add(gates.CZ(3,4)) +qibo_circ.add(gates.CZ(2,4)) +#qibo_circ.add(gates.CNOT(0,4)) +#qibo_circ.add(gates.SWAP(0,4)) +qibo_circ.add(gates.H(2)) +qibo_circ.add(gates.H(4)) +''' +qibo_circ = QFT(nqubits) + +''' +expr = 'ehl,gj,edhg,bif,d,c,k,iklj,cf,a->ba' +shapes = [(8, 2, 5), (5, 7), (8, 8, 2, 5), (8, 6, 3), (8,), (6,), (5,), (6, 5, 5, 7), (6, 3), (3,)] +print("Andy: expr =",expr) +if rank == root: + operands = [cp.random.rand(*shape) for shape in shapes] +else: + operands = [cp.empty(shape) for shape in shapes] +''' + +myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) +expr, mode_label, q_frontier, operands = myconvertor.state_vector() +shapes = [tensor.shape for tensor in operands] +print("expr ", expr) +print("Operands ", operands) +print("Shape", shapes) +# Set the operand data on root. Since we use the buffer interface APIs offered by mpi4py for communicating array +# objects, we can directly use device arrays (cupy.ndarray, for example) as we assume mpi4py is built against +# a CUDA-aware MPI. +if rank != root: + operands = [cp.empty(shape,dtype="complex128") for shape in shapes] + +''' +if rank == root: + operands = [cp.random.rand(*shape) for shape in shapes] + print("Operands random", operands) + +else: + operands = [cp.empty(shape) for shape in shapes] +''' + +for operand in operands: + print("Is CUPY array? ", cp.get_array_module(operand), " Operand size = ", operand.nbytes) + +for operand in operands: + comm.Bcast(operand, root) + +# Bind the communicator to the library handle +handle = cutn.create() +print("Andy cutn.create()") +print("Andy ", cutn.get_mpi_comm_pointer(comm)) +cutn.distributed_reset_configuration( + handle, *cutn.get_mpi_comm_pointer(comm) +) +print("Andy cutn.distributed_reset_configuration") + +operands_interleave = myconvertor.get_interleave_format( mode_label, q_frontier, operands) +print("new function interkeave ", operands_interleave) +print("Ori function interleave", myconvertor.state_vector_operands()) + +result = cuquantum.contract(*operands_interleave, options={'device_id' : device_id, 'handle': handle}) +#result = cuquantum.contract(expr, *operands, options={'device_id' : device_id, 'handle': handle}) + +''' + +# Create a new GPU buffer for verification +result_cp = cp.empty_like(result) + +# Sum the partial contribution from each process on root, with GPU +if rank == root: + comm.Reduce(sendbuf=MPI.IN_PLACE, recvbuf=result_cp, op=MPI.SUM, root=root) +else: + comm.Reduce(sendbuf=result_cp, recvbuf=None, op=MPI.SUM, root=root) +''' +# Check correctness. +if rank == root: + #operands = myconvertor.state_vector_operands() + #result_cp = cp.einsum(*operands, optimize=True) + #result_cp = np.einsum(*operands, optimize=True) + (qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) + print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) + + +''' +result_tn = cutn.eval(qibo_circ, datatype) + +qibo.set_backend(backend="qibojit", platform="numpy") +(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) +#print(result_tn) +#print(result_sv) + +assert np.allclose( + result_sv, result_tn.flatten()), "Resulting dense vectors do not match" +''' \ No newline at end of file From 5d65149271aa9c7c0a24596de56f131a0f0351fe Mon Sep 17 00:00:00 2001 From: tankya2 Date: Fri, 21 Jul 2023 16:47:16 +0800 Subject: [PATCH 02/11] Update --- src/qibotn/test_multinode.py | 131 ++++++++++------------------------- 1 file changed, 35 insertions(+), 96 deletions(-) diff --git a/src/qibotn/test_multinode.py b/src/qibotn/test_multinode.py index 0570b38..8b234ec 100644 --- a/src/qibotn/test_multinode.py +++ b/src/qibotn/test_multinode.py @@ -1,126 +1,65 @@ -import qibo -#import qibotn.cutn as cutn -from cuquantum import cutensornet as cutn - -from qibo import gates -from qibo.models import Circuit, QFT +import os +import sys +from timeit import default_timer as timer import numpy as np -from mpi4py import MPI # this line initializes MPI import cupy as cp +import cuquantum +from cuquantum import cutensornet as cutn +from qibo import gates +from qibo.models import QFT +from mpi4py import MPI # this line initializes MPI from cupy.cuda.runtime import getDeviceCount from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum -import cuquantum def qibo_qft(nqubits, swaps): circ_qibo = QFT(nqubits, swaps) state_vec = np.array(circ_qibo()) return circ_qibo, state_vec -print("QiboTN") +args = sys.argv + +if len(args) < 2: + print("Usage: python script.py [nqubits] ") + sys.exit(1) + +nqubits = int(args[1]) root = 0 comm = MPI.COMM_WORLD rank, size = comm.Get_rank(), comm.Get_size() -print("Andy: Rank ", rank," size ", size) -# Assign the device for each process. device_id = rank % getDeviceCount() cp.cuda.Device(device_id).use() +#print("Andy: Rank ", rank," size ", size, 'Device count',getDeviceCount()) + +# Check if the env var is set +if not "CUTENSORNET_COMM_LIB" in os.environ: + raise RuntimeError("need to set CUTENSORNET_COMM_LIB to the path of the MPI wrapper library") + +if not os.path.isfile(os.environ["CUTENSORNET_COMM_LIB"]): + raise RuntimeError("CUTENSORNET_COMM_LIB does not point to the path of the MPI wrapper library") datatype = 'complex128' -nqubits = 10 -''' -qibo_circ = Circuit(nqubits) -qibo_circ.add(gates.H(0)) -#qibo_circ.add(gates.CZ(3,4)) -qibo_circ.add(gates.CZ(2,4)) -#qibo_circ.add(gates.CNOT(0,4)) -#qibo_circ.add(gates.SWAP(0,4)) -qibo_circ.add(gates.H(2)) -qibo_circ.add(gates.H(4)) -''' qibo_circ = QFT(nqubits) - -''' -expr = 'ehl,gj,edhg,bif,d,c,k,iklj,cf,a->ba' -shapes = [(8, 2, 5), (5, 7), (8, 8, 2, 5), (8, 6, 3), (8,), (6,), (5,), (6, 5, 5, 7), (6, 3), (3,)] -print("Andy: expr =",expr) -if rank == root: - operands = [cp.random.rand(*shape) for shape in shapes] -else: - operands = [cp.empty(shape) for shape in shapes] -''' - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) -expr, mode_label, q_frontier, operands = myconvertor.state_vector() -shapes = [tensor.shape for tensor in operands] -print("expr ", expr) -print("Operands ", operands) -print("Shape", shapes) -# Set the operand data on root. Since we use the buffer interface APIs offered by mpi4py for communicating array -# objects, we can directly use device arrays (cupy.ndarray, for example) as we assume mpi4py is built against -# a CUDA-aware MPI. -if rank != root: - operands = [cp.empty(shape,dtype="complex128") for shape in shapes] - -''' -if rank == root: - operands = [cp.random.rand(*shape) for shape in shapes] - print("Operands random", operands) - -else: - operands = [cp.empty(shape) for shape in shapes] -''' - -for operand in operands: - print("Is CUPY array? ", cp.get_array_module(operand), " Operand size = ", operand.nbytes) - -for operand in operands: - comm.Bcast(operand, root) # Bind the communicator to the library handle handle = cutn.create() -print("Andy cutn.create()") -print("Andy ", cutn.get_mpi_comm_pointer(comm)) cutn.distributed_reset_configuration( handle, *cutn.get_mpi_comm_pointer(comm) ) -print("Andy cutn.distributed_reset_configuration") -operands_interleave = myconvertor.get_interleave_format( mode_label, q_frontier, operands) -print("new function interkeave ", operands_interleave) -print("Ori function interleave", myconvertor.state_vector_operands()) - -result = cuquantum.contract(*operands_interleave, options={'device_id' : device_id, 'handle': handle}) -#result = cuquantum.contract(expr, *operands, options={'device_id' : device_id, 'handle': handle}) - -''' - -# Create a new GPU buffer for verification -result_cp = cp.empty_like(result) - -# Sum the partial contribution from each process on root, with GPU if rank == root: - comm.Reduce(sendbuf=MPI.IN_PLACE, recvbuf=result_cp, op=MPI.SUM, root=root) -else: - comm.Reduce(sendbuf=result_cp, recvbuf=None, op=MPI.SUM, root=root) -''' + start = timer() + +result = cuquantum.contract(*myconvertor.state_vector_operands(), options={'device_id' : device_id, 'handle': handle}) + +if rank == root: + end = timer() + # Check correctness. if rank == root: - #operands = myconvertor.state_vector_operands() - #result_cp = cp.einsum(*operands, optimize=True) - #result_cp = np.einsum(*operands, optimize=True) - (qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) - print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) - - -''' -result_tn = cutn.eval(qibo_circ, datatype) - -qibo.set_backend(backend="qibojit", platform="numpy") -(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) -#print(result_tn) -#print(result_sv) - -assert np.allclose( - result_sv, result_tn.flatten()), "Resulting dense vectors do not match" -''' \ No newline at end of file + #(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) + time = end - start + #print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) + print("nqubit", nqubits, "time taken = ", time, 's') + \ No newline at end of file From cef8fb833e3fa0fcd97be54a23c75a12c6c5bc09 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 30 Aug 2023 10:39:17 +0800 Subject: [PATCH 03/11] Add eval_tn_mpi function --- src/qibotn/cutn.py | 56 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index e6f3e8c..5790772 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -1,8 +1,62 @@ -# from qibotn import quimb as qiboquimb from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum from cuquantum import contract +from cuquantum import cutensornet as cutn +from mpi4py import MPI # this line initializes MPI +import multiprocessing +from cupy.cuda.runtime import getDeviceCount def eval(qibo_circ, datatype): myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) return contract(*myconvertor.state_vector_operands()) + + +def eval_tn_MPI(qibo_circ, datatype): + + ncpu_threads = multiprocessing.cpu_count() // 2 + n_samples = 8 + + root = 0 + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + device_id = rank % getDeviceCount() + cp.cuda.Device(device_id).use() + + handle = cutn.create() + cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm)) + network_opts = cutn.NetworkOptions(handle=handle, blocking="auto") + + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + operands_interleave = myconvertor.state_vector_operands() + + network = cutn.Network(*operands_interleave, options=network_opts) + network.contract_path(optimize={'samples': n_samples, 'threads': ncpu_threads}) # Calculate path, info + + result = network.contract() + + cutn.destroy(handle) + + if rank == root: + return result, rank + + +if __name__ == "__main__": + + from qibo.models import QFT + import cupy as cp + import numpy as np + + num_qubits = 10 + swaps = True + circ_qibo = QFT(num_qubits, swaps) + + dtype="complex128" + sv_mpi, rank = eval_tn_MPI(circ_qibo, dtype) + + if rank == 0: + sv_reference = eval(circ_qibo, dtype) + state_vec = np.array(circ_qibo()) + print(f"State vector difference: {abs(sv_mpi-sv_reference).max():0.3e}") + assert cp.allclose(sv_mpi, sv_reference) + assert cp.allclose(sv_mpi.flatten(), state_vec) \ No newline at end of file From 000c4a1b8e5bf433bcbd2b631ddaf658252942c2 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 30 Aug 2023 17:24:34 +0800 Subject: [PATCH 04/11] Remove unuse file --- src/qibotn/test_multinode.py | 65 ------------------------------------ 1 file changed, 65 deletions(-) delete mode 100644 src/qibotn/test_multinode.py diff --git a/src/qibotn/test_multinode.py b/src/qibotn/test_multinode.py deleted file mode 100644 index 8b234ec..0000000 --- a/src/qibotn/test_multinode.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import sys -from timeit import default_timer as timer -import numpy as np -import cupy as cp -import cuquantum -from cuquantum import cutensornet as cutn -from qibo import gates -from qibo.models import QFT -from mpi4py import MPI # this line initializes MPI -from cupy.cuda.runtime import getDeviceCount -from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum - -def qibo_qft(nqubits, swaps): - circ_qibo = QFT(nqubits, swaps) - state_vec = np.array(circ_qibo()) - return circ_qibo, state_vec - -args = sys.argv - -if len(args) < 2: - print("Usage: python script.py [nqubits] ") - sys.exit(1) - -nqubits = int(args[1]) - -root = 0 -comm = MPI.COMM_WORLD -rank, size = comm.Get_rank(), comm.Get_size() -device_id = rank % getDeviceCount() -cp.cuda.Device(device_id).use() -#print("Andy: Rank ", rank," size ", size, 'Device count',getDeviceCount()) - -# Check if the env var is set -if not "CUTENSORNET_COMM_LIB" in os.environ: - raise RuntimeError("need to set CUTENSORNET_COMM_LIB to the path of the MPI wrapper library") - -if not os.path.isfile(os.environ["CUTENSORNET_COMM_LIB"]): - raise RuntimeError("CUTENSORNET_COMM_LIB does not point to the path of the MPI wrapper library") - -datatype = 'complex128' -qibo_circ = QFT(nqubits) -myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - -# Bind the communicator to the library handle -handle = cutn.create() -cutn.distributed_reset_configuration( - handle, *cutn.get_mpi_comm_pointer(comm) -) - -if rank == root: - start = timer() - -result = cuquantum.contract(*myconvertor.state_vector_operands(), options={'device_id' : device_id, 'handle': handle}) - -if rank == root: - end = timer() - -# Check correctness. -if rank == root: - #(qibo_circ, result_sv) = qibo_qft(nqubits, swaps=True) - time = end - start - #print("Does the cuQuantum parallel contraction result match the cupy.einsum result?", cp.allclose(result.flatten(), result_sv)) - print("nqubit", nqubits, "time taken = ", time, 's') - \ No newline at end of file From 1c9df2647264f4419fcf31f3a785bf58523e11ed Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 30 Aug 2023 17:25:04 +0800 Subject: [PATCH 05/11] Update with multi node code --- src/qibotn/cutn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 5790772..7d9984f 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -19,7 +19,6 @@ def eval_tn_MPI(qibo_circ, datatype): root = 0 comm = MPI.COMM_WORLD rank = comm.Get_rank() - device_id = rank % getDeviceCount() cp.cuda.Device(device_id).use() @@ -31,7 +30,7 @@ def eval_tn_MPI(qibo_circ, datatype): operands_interleave = myconvertor.state_vector_operands() network = cutn.Network(*operands_interleave, options=network_opts) - network.contract_path(optimize={'samples': n_samples, 'threads': ncpu_threads}) # Calculate path, info + network.contract_path(optimize={'samples': n_samples, 'threads': ncpu_threads}) # Calculate optimal path, returns path and info result = network.contract() From d12b8ab882d1a629eec60e0c69f49d7f4d3b41c3 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 30 Aug 2023 17:26:07 +0800 Subject: [PATCH 06/11] Black formate --- src/qibotn/cutn.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 7d9984f..2fc9079 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -12,50 +12,50 @@ def eval(qibo_circ, datatype): def eval_tn_MPI(qibo_circ, datatype): - ncpu_threads = multiprocessing.cpu_count() // 2 n_samples = 8 - + root = 0 comm = MPI.COMM_WORLD rank = comm.Get_rank() device_id = rank % getDeviceCount() cp.cuda.Device(device_id).use() - + handle = cutn.create() cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm)) network_opts = cutn.NetworkOptions(handle=handle, blocking="auto") myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) operands_interleave = myconvertor.state_vector_operands() - + network = cutn.Network(*operands_interleave, options=network_opts) - network.contract_path(optimize={'samples': n_samples, 'threads': ncpu_threads}) # Calculate optimal path, returns path and info - + network.contract_path( + optimize={"samples": n_samples, "threads": ncpu_threads} + ) # Calculate optimal path, returns path and info + result = network.contract() - + cutn.destroy(handle) - + if rank == root: return result, rank if __name__ == "__main__": - - from qibo.models import QFT + from qibo.models import QFT import cupy as cp import numpy as np - - num_qubits = 10 + + num_qubits = 10 swaps = True circ_qibo = QFT(num_qubits, swaps) - - dtype="complex128" + + dtype = "complex128" sv_mpi, rank = eval_tn_MPI(circ_qibo, dtype) - + if rank == 0: sv_reference = eval(circ_qibo, dtype) state_vec = np.array(circ_qibo()) print(f"State vector difference: {abs(sv_mpi-sv_reference).max():0.3e}") assert cp.allclose(sv_mpi, sv_reference) - assert cp.allclose(sv_mpi.flatten(), state_vec) \ No newline at end of file + assert cp.allclose(sv_mpi.flatten(), state_vec) From f59b1b0bc7346bd1aa4d6ee086c06c350f748238 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 30 Aug 2023 17:29:08 +0800 Subject: [PATCH 07/11] Update return for all ranks --- src/qibotn/cutn.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 2fc9079..9bc1d67 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -15,7 +15,6 @@ def eval_tn_MPI(qibo_circ, datatype): ncpu_threads = multiprocessing.cpu_count() // 2 n_samples = 8 - root = 0 comm = MPI.COMM_WORLD rank = comm.Get_rank() device_id = rank % getDeviceCount() @@ -37,8 +36,7 @@ def eval_tn_MPI(qibo_circ, datatype): cutn.destroy(handle) - if rank == root: - return result, rank + return result, rank if __name__ == "__main__": From fc665fcfc57144e4e559aebc0b14068ad6b9c4a9 Mon Sep 17 00:00:00 2001 From: Liwei Yang Date: Wed, 27 Sep 2023 16:41:52 +0800 Subject: [PATCH 08/11] Fix the compatibility issue with qibo-0.2.0 during circuit-to-TN conversion --- src/qibotn/QiboCircuitConvertor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py index c30cfb6..ba8212f 100644 --- a/src/qibotn/QiboCircuitConvertor.py +++ b/src/qibotn/QiboCircuitConvertor.py @@ -95,7 +95,8 @@ class QiboCircuitToEinsum: required_shape = self.op_shape_from_qubits(len(gate_qubits)) self.gate_tensors.append( ( - cp.asarray(gate.matrix).reshape(required_shape), + cp.asarray(gate.matrix(), dtype=self.dtype).reshape( + required_shape), gate_qubits, ) ) From b2a2bfedf1fd424607219c091345a7b6362ca3eb Mon Sep 17 00:00:00 2001 From: tankya2 Date: Tue, 3 Oct 2023 14:25:28 +0800 Subject: [PATCH 09/11] Removed main and added cupy import --- src/qibotn/cutn.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 9bc1d67..3d42eb7 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -4,6 +4,7 @@ from cuquantum import cutensornet as cutn from mpi4py import MPI # this line initializes MPI import multiprocessing from cupy.cuda.runtime import getDeviceCount +import cupy as cp def eval(qibo_circ, datatype): @@ -37,23 +38,3 @@ def eval_tn_MPI(qibo_circ, datatype): cutn.destroy(handle) return result, rank - - -if __name__ == "__main__": - from qibo.models import QFT - import cupy as cp - import numpy as np - - num_qubits = 10 - swaps = True - circ_qibo = QFT(num_qubits, swaps) - - dtype = "complex128" - sv_mpi, rank = eval_tn_MPI(circ_qibo, dtype) - - if rank == 0: - sv_reference = eval(circ_qibo, dtype) - state_vec = np.array(circ_qibo()) - print(f"State vector difference: {abs(sv_mpi-sv_reference).max():0.3e}") - assert cp.allclose(sv_mpi, sv_reference) - assert cp.allclose(sv_mpi.flatten(), state_vec) From 15e90ebcc77096b43b875481e40655c85614bbc4 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 4 Oct 2023 11:18:05 +0800 Subject: [PATCH 10/11] Added comments --- src/qibotn/cutn.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 3d42eb7..5267bc0 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -1,7 +1,6 @@ from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum from cuquantum import contract from cuquantum import cutensornet as cutn -from mpi4py import MPI # this line initializes MPI import multiprocessing from cupy.cuda.runtime import getDeviceCount import cupy as cp @@ -12,9 +11,16 @@ def eval(qibo_circ, datatype): return contract(*myconvertor.state_vector_operands()) -def eval_tn_MPI(qibo_circ, datatype): +def eval_tn_MPI(qibo_circ, datatype, n_samples=8): + """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI. + The conversion is performed by QiboCircuitToEinsum() afterwhich it goes through 2 steps: pathfinder and execution. + The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread. + After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN. + """ + + from mpi4py import MPI # this line initializes MPI + ncpu_threads = multiprocessing.cpu_count() // 2 - n_samples = 8 comm = MPI.COMM_WORLD rank = comm.Get_rank() @@ -25,14 +31,15 @@ def eval_tn_MPI(qibo_circ, datatype): cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm)) network_opts = cutn.NetworkOptions(handle=handle, blocking="auto") + # Perform circuit conversion myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) operands_interleave = myconvertor.state_vector_operands() + # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object. network = cutn.Network(*operands_interleave, options=network_opts) - network.contract_path( - optimize={"samples": n_samples, "threads": ncpu_threads} - ) # Calculate optimal path, returns path and info + network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads}) + # Execution: To execute the contraction using the optimal path found previously result = network.contract() cutn.destroy(handle) From f97e1f95cc1912152b290acebe0b0816e803fdac Mon Sep 17 00:00:00 2001 From: Liwei Yang Date: Tue, 17 Oct 2023 10:58:26 +0800 Subject: [PATCH 11/11] Minor typo fix --- src/qibotn/cutn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index 5267bc0..343e2e3 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -13,7 +13,7 @@ def eval(qibo_circ, datatype): def eval_tn_MPI(qibo_circ, datatype, n_samples=8): """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI. - The conversion is performed by QiboCircuitToEinsum() afterwhich it goes through 2 steps: pathfinder and execution. + The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution. The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread. After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN. """