Update dense_vector_tn_MPI
This commit is contained in:
@@ -64,6 +64,7 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
|||||||
|
|
||||||
from cuquantum import Network
|
from cuquantum import Network
|
||||||
from mpi4py import MPI
|
from mpi4py import MPI
|
||||||
|
import cuquantum.cutensornet as cutn
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
@@ -71,21 +72,31 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
|||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
|
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
cp.cuda.Device(device_id).use()
|
||||||
|
mempool = cp.get_default_memory_pool()
|
||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
if rank == 0:
|
||||||
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
|
|
||||||
operands = myconvertor.state_vector_operands()
|
operands = myconvertor.state_vector_operands()
|
||||||
|
else:
|
||||||
|
operands = None
|
||||||
|
|
||||||
# Assign the device for each process.
|
operands = comm.bcast(operands, root)
|
||||||
device_id = rank % getDeviceCount()
|
|
||||||
|
|
||||||
# Create network object.
|
# Create network object.
|
||||||
network = Network(*operands, options={"device_id": device_id})
|
network = Network(*operands, options={"device_id": device_id})
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(
|
path, info = network.contract_path(
|
||||||
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
|
optimize={
|
||||||
|
"samples": n_samples,
|
||||||
|
"slicing": {
|
||||||
|
"min_slices": max(32, size),
|
||||||
|
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||||
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
@@ -114,6 +125,9 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
|||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||||
|
|
||||||
|
del network
|
||||||
|
mempool.free_all_blocks()
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
@@ -139,6 +153,7 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
|||||||
from cupy.cuda import nccl
|
from cupy.cuda import nccl
|
||||||
from cuquantum import Network
|
from cuquantum import Network
|
||||||
from mpi4py import MPI
|
from mpi4py import MPI
|
||||||
|
import cuquantum.cutensornet as cutn
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm_mpi = MPI.COMM_WORLD
|
comm_mpi = MPI.COMM_WORLD
|
||||||
@@ -162,7 +177,13 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
|||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(
|
path, info = network.contract_path(
|
||||||
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
|
optimize={
|
||||||
|
"samples": n_samples,
|
||||||
|
"slicing": {
|
||||||
|
"min_slices": max(32, size),
|
||||||
|
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||||
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
@@ -247,7 +268,7 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
|
|||||||
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
|
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
|
||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
if rank==0:
|
if rank == 0:
|
||||||
|
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
operands = myconvertor.expectation_operands(
|
operands = myconvertor.expectation_operands(
|
||||||
@@ -255,14 +276,20 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
operands = None
|
operands = None
|
||||||
|
|
||||||
operands = comm_mpi.bcast(operands, root)
|
operands = comm_mpi.bcast(operands, root)
|
||||||
|
|
||||||
network = Network(*operands)
|
network = Network(*operands)
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(
|
path, info = network.contract_path(
|
||||||
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size),"memory_model":cutn.MemoryModel.CUTENSOR}}
|
optimize={
|
||||||
|
"samples": n_samples,
|
||||||
|
"slicing": {
|
||||||
|
"min_slices": max(32, size),
|
||||||
|
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||||
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
@@ -299,10 +326,10 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
|
|||||||
root,
|
root,
|
||||||
stream_ptr,
|
stream_ptr,
|
||||||
)
|
)
|
||||||
|
|
||||||
del network
|
del network
|
||||||
mempool.free_all_blocks()
|
mempool.free_all_blocks()
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
@@ -337,14 +364,14 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
|
|||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
rank = comm.Get_rank()
|
rank = comm.Get_rank()
|
||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
|
|
||||||
# Assign the device for each process.
|
# Assign the device for each process.
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
cp.cuda.Device(device_id).use()
|
cp.cuda.Device(device_id).use()
|
||||||
mempool = cp.get_default_memory_pool()
|
mempool = cp.get_default_memory_pool()
|
||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
if rank==0:
|
if rank == 0:
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
|
|
||||||
operands = myconvertor.expectation_operands(
|
operands = myconvertor.expectation_operands(
|
||||||
@@ -352,15 +379,21 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
operands = None
|
operands = None
|
||||||
|
|
||||||
operands = comm.bcast(operands, root)
|
operands = comm.bcast(operands, root)
|
||||||
|
|
||||||
# Create network object.
|
# Create network object.
|
||||||
network = Network(*operands, options={"device_id": device_id})
|
network = Network(*operands, options={"device_id": device_id})
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(
|
path, info = network.contract_path(
|
||||||
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size),"memory_model":cutn.MemoryModel.CUTENSOR}}
|
optimize={
|
||||||
|
"samples": n_samples,
|
||||||
|
"slicing": {
|
||||||
|
"min_slices": max(32, size),
|
||||||
|
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||||
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
@@ -388,7 +421,7 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
|
|||||||
|
|
||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||||
|
|
||||||
del network
|
del network
|
||||||
mempool.free_all_blocks()
|
mempool.free_all_blocks()
|
||||||
|
|
||||||
@@ -437,82 +470,3 @@ def pauli_string_gen(nqubits, pauli_string_pattern):
|
|||||||
char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
|
char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
|
||||||
result += char_to_add
|
result += char_to_add
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def expectation_pauli_tn_MPI_pathfinding(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
|
|
||||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
|
|
||||||
expectation of given Pauli string using multi node and multi GPU through
|
|
||||||
MPI.
|
|
||||||
|
|
||||||
The conversion is performed by QiboCircuitToEinsum(), after which it
|
|
||||||
goes through 2 steps: pathfinder and execution. The
|
|
||||||
pauli_string_pattern is used to generate the pauli string
|
|
||||||
corresponding to the number of qubits of the system. The pathfinder
|
|
||||||
looks at user defined number of samples (n_samples) iteratively to
|
|
||||||
select the least costly contraction path. This is sped up with multi
|
|
||||||
thread. After pathfinding the optimal path is used in the actual
|
|
||||||
contraction to give an expectation value.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
qibo_circ: The quantum circuit object.
|
|
||||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
|
||||||
pauli_string_pattern(str): pauli string pattern.
|
|
||||||
n_samples(int): Number of samples for pathfinding.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Expectation of quantum circuit due to pauli string.
|
|
||||||
"""
|
|
||||||
from cuquantum import Network
|
|
||||||
from mpi4py import MPI # this line initializes MPI
|
|
||||||
import cuquantum.cutensornet as cutn
|
|
||||||
import time
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
root = 0
|
|
||||||
comm = MPI.COMM_WORLD
|
|
||||||
rank = comm.Get_rank()
|
|
||||||
size = comm.Get_size()
|
|
||||||
|
|
||||||
# Assign the device for each process.
|
|
||||||
device_id = rank % getDeviceCount()
|
|
||||||
cp.cuda.Device(device_id).use()
|
|
||||||
mempool = cp.get_default_memory_pool()
|
|
||||||
|
|
||||||
# Perform circuit conversion
|
|
||||||
if rank==0:
|
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
|
||||||
|
|
||||||
operands = myconvertor.expectation_operands(
|
|
||||||
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
operands = None
|
|
||||||
|
|
||||||
operands = comm.bcast(operands, root)
|
|
||||||
|
|
||||||
# Create network object.
|
|
||||||
network = Network(*operands, options={"device_id": device_id})
|
|
||||||
start_time = time.time()
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
|
||||||
path, info = network.contract_path(
|
|
||||||
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size),"memory_model":cutn.MemoryModel.CUTENSOR}}
|
|
||||||
)
|
|
||||||
end_time = time.time()
|
|
||||||
|
|
||||||
# print("Andy rank",rank,"info",info, info.num_slices, info.opt_cost, info.largest_intermediate, end_time-start_time)
|
|
||||||
local_data = np.array([info.num_slices, info.opt_cost, info.largest_intermediate, end_time-start_time])
|
|
||||||
|
|
||||||
|
|
||||||
# Initialize a list to store the gathered data on rank 0
|
|
||||||
if rank == 0:
|
|
||||||
gathered_data = np.zeros((size, 4))
|
|
||||||
|
|
||||||
else:
|
|
||||||
gathered_data = None
|
|
||||||
|
|
||||||
# Gather data from all ranks to rank 0
|
|
||||||
comm.Gather(local_data, gathered_data, root=0)
|
|
||||||
# print("Andy rank",rank,"gathered data",gathered_data)
|
|
||||||
del network
|
|
||||||
mempool.free_all_blocks()
|
|
||||||
|
|
||||||
return gathered_data, rank
|
|
||||||
|
|||||||
Reference in New Issue
Block a user