Format with black
This commit is contained in:
@@ -110,7 +110,6 @@ class QiboCircuitToEinsum:
|
|||||||
|
|
||||||
self.basis_map = {"0": state_0, "1": state_1}
|
self.basis_map = {"0": state_0, "1": state_1}
|
||||||
|
|
||||||
|
|
||||||
def init_inverse_circuit(self, circuit):
|
def init_inverse_circuit(self, circuit):
|
||||||
self.gate_tensors_inverse = []
|
self.gate_tensors_inverse = []
|
||||||
gates_qubits_inverse = []
|
gates_qubits_inverse = []
|
||||||
@@ -131,14 +130,13 @@ class QiboCircuitToEinsum:
|
|||||||
|
|
||||||
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
||||||
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
||||||
|
|
||||||
|
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
|
||||||
def get_pauli_gates(self, pauli_map, dtype='complex128', backend=cp):
|
|
||||||
"""
|
"""
|
||||||
Populate the gates for all pauli operators.
|
Populate the gates for all pauli operators.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pauli_map: A dictionary mapping qubits to pauli operators.
|
pauli_map: A dictionary mapping qubits to pauli operators.
|
||||||
dtype: Data type for the tensor operands.
|
dtype: Data type for the tensor operands.
|
||||||
backend: The package the tensor operands belong to.
|
backend: The package the tensor operands belong to.
|
||||||
|
|
||||||
@@ -146,70 +144,74 @@ class QiboCircuitToEinsum:
|
|||||||
A sequence of pauli gates.
|
A sequence of pauli gates.
|
||||||
"""
|
"""
|
||||||
asarray = backend.asarray
|
asarray = backend.asarray
|
||||||
pauli_i = asarray([[1,0], [0,1]], dtype=dtype)
|
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||||
pauli_x = asarray([[0,1], [1,0]], dtype=dtype)
|
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||||
pauli_y = asarray([[0,-1j], [1j,0]], dtype=dtype)
|
pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
|
||||||
pauli_z = asarray([[1,0], [0,-1]], dtype=dtype)
|
pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
|
||||||
|
|
||||||
operand_map = {'I': pauli_i,
|
operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
|
||||||
'X': pauli_x,
|
|
||||||
'Y': pauli_y,
|
|
||||||
'Z': pauli_z}
|
|
||||||
gates = []
|
gates = []
|
||||||
for qubit, pauli_char in pauli_map.items():
|
for qubit, pauli_char in pauli_map.items():
|
||||||
operand = operand_map.get(pauli_char)
|
operand = operand_map.get(pauli_char)
|
||||||
if operand is None:
|
if operand is None:
|
||||||
raise ValueError('pauli string character must be one of I/X/Y/Z')
|
raise ValueError("pauli string character must be one of I/X/Y/Z")
|
||||||
gates.append((operand, (qubit,)))
|
gates.append((operand, (qubit,)))
|
||||||
return gates
|
return gates
|
||||||
|
|
||||||
def expectation_operands(self, pauli_string):
|
def expectation_operands(self, pauli_string):
|
||||||
#assign pauli string to qubit
|
# assign pauli string to qubit
|
||||||
#_get_forward_inverse_metadata()
|
# _get_forward_inverse_metadata()
|
||||||
input_bitstring = "0" * self.circuit.nqubits #Need all qubits!
|
input_bitstring = "0" * self.circuit.nqubits # Need all qubits!
|
||||||
|
|
||||||
input_operands = self._get_bitstring_tensors(input_bitstring)
|
input_operands = self._get_bitstring_tensors(input_bitstring)
|
||||||
pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
|
pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
|
||||||
pauli_map = pauli_string
|
pauli_map = pauli_string
|
||||||
coned_qubits = pauli_map.keys()
|
coned_qubits = pauli_map.keys()
|
||||||
|
|
||||||
(
|
(
|
||||||
mode_labels,
|
mode_labels,
|
||||||
qubits_frontier,
|
qubits_frontier,
|
||||||
next_frontier,
|
next_frontier,
|
||||||
) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
|
) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
|
||||||
|
|
||||||
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
||||||
self.gate_tensors, qubits_frontier, next_frontier
|
self.gate_tensors, qubits_frontier, next_frontier
|
||||||
)
|
)
|
||||||
|
|
||||||
operands = input_operands + gate_operands
|
operands = input_operands + gate_operands
|
||||||
mode_labels += gate_mode_labels
|
mode_labels += gate_mode_labels
|
||||||
|
|
||||||
self.init_inverse_circuit(self.circuit.invert())
|
self.init_inverse_circuit(self.circuit.invert())
|
||||||
|
|
||||||
|
|
||||||
next_frontier = max(qubits_frontier.values()) + 1
|
next_frontier = max(qubits_frontier.values()) + 1
|
||||||
|
|
||||||
#input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
|
# input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
|
||||||
|
|
||||||
|
pauli_gates = self.get_pauli_gates(
|
||||||
|
pauli_map, dtype=self.dtype, backend=self.backend
|
||||||
|
)
|
||||||
|
|
||||||
pauli_gates = self.get_pauli_gates(pauli_map, dtype=self.dtype, backend=self.backend)
|
|
||||||
|
|
||||||
|
|
||||||
gates_inverse = pauli_gates + self.gate_tensors_inverse
|
gates_inverse = pauli_gates + self.gate_tensors_inverse
|
||||||
|
|
||||||
gate_mode_labels_inverse, gate_operands_inverse = self._parse_gates_to_mode_labels_operands(
|
(
|
||||||
|
gate_mode_labels_inverse,
|
||||||
|
gate_operands_inverse,
|
||||||
|
) = self._parse_gates_to_mode_labels_operands(
|
||||||
gates_inverse, qubits_frontier, next_frontier
|
gates_inverse, qubits_frontier, next_frontier
|
||||||
)
|
)
|
||||||
mode_labels = mode_labels + gate_mode_labels_inverse + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
|
mode_labels = (
|
||||||
operands = operands + gate_operands_inverse + operands[:self.circuit.nqubits]
|
mode_labels
|
||||||
|
+ gate_mode_labels_inverse
|
||||||
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
+ [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
|
||||||
|
)
|
||||||
#expec = contract(*operand_exp_interleave)
|
operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]
|
||||||
#print(expec)
|
|
||||||
|
|
||||||
'''
|
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
||||||
|
|
||||||
|
# expec = contract(*operand_exp_interleave)
|
||||||
|
# print(expec)
|
||||||
|
|
||||||
|
"""
|
||||||
gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates,
|
gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates,
|
||||||
qubits_frontier,
|
qubits_frontier,
|
||||||
next_frontier)
|
next_frontier)
|
||||||
@@ -219,5 +221,5 @@ class QiboCircuitToEinsum:
|
|||||||
|
|
||||||
output_mode_labels = []
|
output_mode_labels = []
|
||||||
expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
|
expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
|
||||||
'''
|
"""
|
||||||
return operand_exp_interleave
|
return operand_exp_interleave
|
||||||
|
|||||||
@@ -19,8 +19,6 @@ class QiboTNBackend(NumpyBackend):
|
|||||||
or platform == "cu_tensornet_expectation"
|
or platform == "cu_tensornet_expectation"
|
||||||
or platform == "cu_tensornet_nccl"
|
or platform == "cu_tensornet_nccl"
|
||||||
or platform == "cu_tensornet_nccl_expectation"
|
or platform == "cu_tensornet_nccl_expectation"
|
||||||
|
|
||||||
|
|
||||||
): # pragma: no cover
|
): # pragma: no cover
|
||||||
self.platform = platform
|
self.platform = platform
|
||||||
else:
|
else:
|
||||||
@@ -72,45 +70,44 @@ class QiboTNBackend(NumpyBackend):
|
|||||||
state = cutn.eval_mps(circuit, gate_algo, self.dtype)
|
state = cutn.eval_mps(circuit, gate_algo, self.dtype)
|
||||||
|
|
||||||
if self.platform == "qu_tensornet":
|
if self.platform == "qu_tensornet":
|
||||||
|
# init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
|
||||||
#init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
|
# init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
|
||||||
#init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
|
|
||||||
init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
|
init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
|
||||||
init_state[0] = 1.0
|
init_state[0] = 1.0
|
||||||
state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
|
state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
|
||||||
|
|
||||||
if self.platform == "cu_tensornet_mpi":
|
if self.platform == "cu_tensornet_mpi":
|
||||||
if initial_state is not None:
|
if initial_state is not None:
|
||||||
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
||||||
|
|
||||||
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
||||||
state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype,32)
|
state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype, 32)
|
||||||
if rank > 0:
|
if rank > 0:
|
||||||
state = np.array(0)
|
state = np.array(0)
|
||||||
|
|
||||||
if self.platform == "cu_tensornet_nccl":
|
if self.platform == "cu_tensornet_nccl":
|
||||||
if initial_state is not None:
|
if initial_state is not None:
|
||||||
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
||||||
|
|
||||||
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
||||||
state, rank = cutn.eval_tn_nccl(circuit, self.dtype,32)
|
state, rank = cutn.eval_tn_nccl(circuit, self.dtype, 32)
|
||||||
if rank > 0:
|
if rank > 0:
|
||||||
state = np.array(0)
|
state = np.array(0)
|
||||||
|
|
||||||
if self.platform == "cu_tensornet_expectation":
|
if self.platform == "cu_tensornet_expectation":
|
||||||
if initial_state is not None:
|
if initial_state is not None:
|
||||||
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
||||||
|
|
||||||
state = cutn.eval_expectation(circuit, self.dtype)
|
state = cutn.eval_expectation(circuit, self.dtype)
|
||||||
|
|
||||||
if self.platform == "cu_tensornet_mpi_expectation":
|
if self.platform == "cu_tensornet_mpi_expectation":
|
||||||
if initial_state is not None:
|
if initial_state is not None:
|
||||||
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
||||||
|
|
||||||
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
||||||
#state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
|
||||||
state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype,32)
|
state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype, 32)
|
||||||
|
|
||||||
if rank > 0:
|
if rank > 0:
|
||||||
state = np.array(0)
|
state = np.array(0)
|
||||||
|
|
||||||
@@ -118,10 +115,10 @@ class QiboTNBackend(NumpyBackend):
|
|||||||
if initial_state is not None:
|
if initial_state is not None:
|
||||||
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
|
||||||
|
|
||||||
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
|
||||||
#state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
|
# state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
|
||||||
state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype,32)
|
state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype, 32)
|
||||||
|
|
||||||
if rank > 0:
|
if rank > 0:
|
||||||
state = np.array(0)
|
state = np.array(0)
|
||||||
|
|
||||||
|
|||||||
@@ -14,9 +14,13 @@ def eval(qibo_circ, datatype):
|
|||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
return contract(*myconvertor.state_vector_operands())
|
return contract(*myconvertor.state_vector_operands())
|
||||||
|
|
||||||
|
|
||||||
def eval_expectation(qibo_circ, datatype):
|
def eval_expectation(qibo_circ, datatype):
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
return contract(*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits)))
|
return contract(
|
||||||
|
*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
|
def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
|
||||||
from mpi4py import MPI # this line initializes MPI
|
from mpi4py import MPI # this line initializes MPI
|
||||||
@@ -24,73 +28,79 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
|
|||||||
from cuquantum import Network
|
from cuquantum import Network
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
rank = comm.Get_rank()
|
rank = comm.Get_rank()
|
||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
operands = myconvertor.state_vector_operands()
|
operands = myconvertor.state_vector_operands()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
# Broadcast the operand data.
|
# Broadcast the operand data.
|
||||||
#operands = comm.bcast(operands, root)
|
# operands = comm.bcast(operands, root)
|
||||||
|
|
||||||
# Assign the device for each process.
|
# Assign the device for each process.
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
#dev = cp.cuda.Device(device_id)
|
# dev = cp.cuda.Device(device_id)
|
||||||
#free_mem, total_mem = dev.mem_info
|
# free_mem, total_mem = dev.mem_info
|
||||||
#print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
|
# print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
|
||||||
|
|
||||||
# Create network object.
|
# Create network object.
|
||||||
network = Network(*operands, options={'device_id' : device_id})
|
network = Network(*operands, options={"device_id": device_id})
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
|
path, info = network.contract_path(
|
||||||
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
|
||||||
|
)
|
||||||
|
# print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||||
|
|
||||||
#if rank == root:
|
# if rank == root:
|
||||||
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
||||||
|
|
||||||
# Broadcast info from the sender to all other ranks.
|
# Broadcast info from the sender to all other ranks.
|
||||||
info = comm.bcast(info, sender)
|
info = comm.bcast(info, sender)
|
||||||
|
|
||||||
# Set path and slices.
|
# Set path and slices.
|
||||||
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
|
path, info = network.contract_path(
|
||||||
|
optimize={"path": info.path, "slicing": info.slices}
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate this process's share of the slices.
|
# Calculate this process's share of the slices.
|
||||||
num_slices = info.num_slices
|
num_slices = info.num_slices
|
||||||
chunk, extra = num_slices // size, num_slices % size
|
chunk, extra = num_slices // size, num_slices % size
|
||||||
slice_begin = rank * chunk + min(rank, extra)
|
slice_begin = rank * chunk + min(rank, extra)
|
||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
slice_end = (
|
||||||
|
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||||
|
)
|
||||||
slices = range(slice_begin, slice_end)
|
slices = range(slice_begin, slice_end)
|
||||||
|
|
||||||
#print(f"Process {rank} is processing slice range: {slices}.")
|
# print(f"Process {rank} is processing slice range: {slices}.")
|
||||||
|
|
||||||
# Contract the group of slices the process is responsible for.
|
# Contract the group of slices the process is responsible for.
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
#print(f"Process {rank} result shape is : {result.shape}.")
|
# print(f"Process {rank} result shape is : {result.shape}.")
|
||||||
#print(f"Process {rank} result size is : {result.nbytes}.")
|
# print(f"Process {rank} result size is : {result.nbytes}.")
|
||||||
|
|
||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
|
def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
|
||||||
from mpi4py import MPI # this line initializes MPI
|
from mpi4py import MPI # this line initializes MPI
|
||||||
import socket
|
import socket
|
||||||
@@ -98,18 +108,18 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
|
|||||||
from cupy.cuda import nccl
|
from cupy.cuda import nccl
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm_mpi = MPI.COMM_WORLD
|
comm_mpi = MPI.COMM_WORLD
|
||||||
rank = comm_mpi.Get_rank()
|
rank = comm_mpi.Get_rank()
|
||||||
size = comm_mpi.Get_size()
|
size = comm_mpi.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
cp.cuda.Device(device_id).use()
|
cp.cuda.Device(device_id).use()
|
||||||
|
|
||||||
# Set up the NCCL communicator.
|
# Set up the NCCL communicator.
|
||||||
nccl_id = nccl.get_unique_id() if rank == root else None
|
nccl_id = nccl.get_unique_id() if rank == root else None
|
||||||
nccl_id = comm_mpi.bcast(nccl_id, root)
|
nccl_id = comm_mpi.bcast(nccl_id, root)
|
||||||
@@ -117,51 +127,66 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
|
|||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
operands = myconvertor.state_vector_operands()
|
operands = myconvertor.state_vector_operands()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
network = Network(*operands)
|
network = Network(*operands)
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
|
path, info = network.contract_path(
|
||||||
|
optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
|
||||||
|
)
|
||||||
|
|
||||||
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
# print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||||
|
|
||||||
#if rank == root:
|
# if rank == root:
|
||||||
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
||||||
|
|
||||||
# Broadcast info from the sender to all other ranks.
|
# Broadcast info from the sender to all other ranks.
|
||||||
info = comm_mpi.bcast(info, sender)
|
info = comm_mpi.bcast(info, sender)
|
||||||
|
|
||||||
# Set path and slices.
|
# Set path and slices.
|
||||||
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
|
path, info = network.contract_path(
|
||||||
|
optimize={"path": info.path, "slicing": info.slices}
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate this process's share of the slices.
|
# Calculate this process's share of the slices.
|
||||||
num_slices = info.num_slices
|
num_slices = info.num_slices
|
||||||
chunk, extra = num_slices // size, num_slices % size
|
chunk, extra = num_slices // size, num_slices % size
|
||||||
slice_begin = rank * chunk + min(rank, extra)
|
slice_begin = rank * chunk + min(rank, extra)
|
||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
slice_end = (
|
||||||
|
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||||
|
)
|
||||||
slices = range(slice_begin, slice_end)
|
slices = range(slice_begin, slice_end)
|
||||||
|
|
||||||
#print(f"Process {rank} is processing slice range: {slices}.")
|
# print(f"Process {rank} is processing slice range: {slices}.")
|
||||||
|
|
||||||
# Contract the group of slices the process is responsible for.
|
# Contract the group of slices the process is responsible for.
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
#print(f"Process {rank} result shape is : {result.shape}.")
|
# print(f"Process {rank} result shape is : {result.shape}.")
|
||||||
#print(f"Process {rank} result size is : {result.nbytes}.")
|
# print(f"Process {rank} result size is : {result.nbytes}.")
|
||||||
|
|
||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
stream_ptr = cp.cuda.get_current_stream().ptr
|
stream_ptr = cp.cuda.get_current_stream().ptr
|
||||||
comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
|
comm_nccl.reduce(
|
||||||
|
result.data.ptr,
|
||||||
|
result.data.ptr,
|
||||||
|
result.size,
|
||||||
|
nccl.NCCL_FLOAT64,
|
||||||
|
nccl.NCCL_SUM,
|
||||||
|
root,
|
||||||
|
stream_ptr,
|
||||||
|
)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
|
def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
|
||||||
from mpi4py import MPI # this line initializes MPI
|
from mpi4py import MPI # this line initializes MPI
|
||||||
import socket
|
import socket
|
||||||
@@ -169,18 +194,18 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
|
|||||||
from cupy.cuda import nccl
|
from cupy.cuda import nccl
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm_mpi = MPI.COMM_WORLD
|
comm_mpi = MPI.COMM_WORLD
|
||||||
rank = comm_mpi.Get_rank()
|
rank = comm_mpi.Get_rank()
|
||||||
size = comm_mpi.Get_size()
|
size = comm_mpi.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
cp.cuda.Device(device_id).use()
|
cp.cuda.Device(device_id).use()
|
||||||
|
|
||||||
# Set up the NCCL communicator.
|
# Set up the NCCL communicator.
|
||||||
nccl_id = nccl.get_unique_id() if rank == root else None
|
nccl_id = nccl.get_unique_id() if rank == root else None
|
||||||
nccl_id = comm_mpi.bcast(nccl_id, root)
|
nccl_id = comm_mpi.bcast(nccl_id, root)
|
||||||
@@ -188,50 +213,64 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
|
|||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
||||||
|
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
network = Network(*operands)
|
network = Network(*operands)
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
|
path, info = network.contract_path(
|
||||||
|
optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
|
||||||
|
)
|
||||||
|
|
||||||
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
# print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||||
|
|
||||||
#if rank == root:
|
# if rank == root:
|
||||||
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
||||||
|
|
||||||
# Broadcast info from the sender to all other ranks.
|
# Broadcast info from the sender to all other ranks.
|
||||||
info = comm_mpi.bcast(info, sender)
|
info = comm_mpi.bcast(info, sender)
|
||||||
|
|
||||||
# Set path and slices.
|
# Set path and slices.
|
||||||
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
|
path, info = network.contract_path(
|
||||||
|
optimize={"path": info.path, "slicing": info.slices}
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate this process's share of the slices.
|
# Calculate this process's share of the slices.
|
||||||
num_slices = info.num_slices
|
num_slices = info.num_slices
|
||||||
chunk, extra = num_slices // size, num_slices % size
|
chunk, extra = num_slices // size, num_slices % size
|
||||||
slice_begin = rank * chunk + min(rank, extra)
|
slice_begin = rank * chunk + min(rank, extra)
|
||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
slice_end = (
|
||||||
|
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||||
|
)
|
||||||
slices = range(slice_begin, slice_end)
|
slices = range(slice_begin, slice_end)
|
||||||
|
|
||||||
#print(f"Process {rank} is processing slice range: {slices}.")
|
# print(f"Process {rank} is processing slice range: {slices}.")
|
||||||
|
|
||||||
# Contract the group of slices the process is responsible for.
|
# Contract the group of slices the process is responsible for.
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
#print(f"Process {rank} result shape is : {result.shape}.")
|
# print(f"Process {rank} result shape is : {result.shape}.")
|
||||||
#print(f"Process {rank} result size is : {result.nbytes}.")
|
# print(f"Process {rank} result size is : {result.nbytes}.")
|
||||||
|
|
||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
stream_ptr = cp.cuda.get_current_stream().ptr
|
stream_ptr = cp.cuda.get_current_stream().ptr
|
||||||
comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
|
comm_nccl.reduce(
|
||||||
|
result.data.ptr,
|
||||||
|
result.data.ptr,
|
||||||
|
result.size,
|
||||||
|
nccl.NCCL_FLOAT64,
|
||||||
|
nccl.NCCL_SUM,
|
||||||
|
root,
|
||||||
|
stream_ptr,
|
||||||
|
)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
@@ -241,128 +280,144 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
|
|||||||
from cuquantum import Network
|
from cuquantum import Network
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
root = 0
|
root = 0
|
||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
rank = comm.Get_rank()
|
rank = comm.Get_rank()
|
||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
|
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
# Broadcast the operand data.
|
# Broadcast the operand data.
|
||||||
#operands = comm.bcast(operands, root)
|
# operands = comm.bcast(operands, root)
|
||||||
|
|
||||||
# Assign the device for each process.
|
# Assign the device for each process.
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
|
|
||||||
#dev = cp.cuda.Device(device_id)
|
# dev = cp.cuda.Device(device_id)
|
||||||
#free_mem, total_mem = dev.mem_info
|
# free_mem, total_mem = dev.mem_info
|
||||||
#print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
|
# print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
|
||||||
|
|
||||||
# Create network object.
|
# Create network object.
|
||||||
network = Network(*operands, options={'device_id' : device_id})
|
network = Network(*operands, options={"device_id": device_id})
|
||||||
|
|
||||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||||
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
|
path, info = network.contract_path(
|
||||||
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
|
||||||
|
)
|
||||||
|
# print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
|
||||||
|
|
||||||
# Select the best path from all ranks.
|
# Select the best path from all ranks.
|
||||||
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||||
|
|
||||||
#if rank == root:
|
# if rank == root:
|
||||||
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
|
||||||
|
|
||||||
# Broadcast info from the sender to all other ranks.
|
# Broadcast info from the sender to all other ranks.
|
||||||
info = comm.bcast(info, sender)
|
info = comm.bcast(info, sender)
|
||||||
|
|
||||||
# Set path and slices.
|
# Set path and slices.
|
||||||
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
|
path, info = network.contract_path(
|
||||||
|
optimize={"path": info.path, "slicing": info.slices}
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate this process's share of the slices.
|
# Calculate this process's share of the slices.
|
||||||
num_slices = info.num_slices
|
num_slices = info.num_slices
|
||||||
chunk, extra = num_slices // size, num_slices % size
|
chunk, extra = num_slices // size, num_slices % size
|
||||||
slice_begin = rank * chunk + min(rank, extra)
|
slice_begin = rank * chunk + min(rank, extra)
|
||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
slice_end = (
|
||||||
|
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||||
|
)
|
||||||
slices = range(slice_begin, slice_end)
|
slices = range(slice_begin, slice_end)
|
||||||
|
|
||||||
#print(f"Process {rank} is processing slice range: {slices}.")
|
# print(f"Process {rank} is processing slice range: {slices}.")
|
||||||
|
|
||||||
# Contract the group of slices the process is responsible for.
|
# Contract the group of slices the process is responsible for.
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
#print(f"Process {rank} result shape is : {result.shape}.")
|
# print(f"Process {rank} result shape is : {result.shape}.")
|
||||||
#print(f"Process {rank} result size is : {result.nbytes}.")
|
# print(f"Process {rank} result size is : {result.nbytes}.")
|
||||||
|
|
||||||
# Sum the partial contribution from each process on root.
|
# Sum the partial contribution from each process on root.
|
||||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
|
def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
|
||||||
from mpi4py import MPI # this line initializes MPI
|
from mpi4py import MPI # this line initializes MPI
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
ncpu_threads = multiprocessing.cpu_count() // 2
|
ncpu_threads = multiprocessing.cpu_count() // 2
|
||||||
|
|
||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
rank = comm.Get_rank()
|
rank = comm.Get_rank()
|
||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
cp.cuda.Device(device_id).use()
|
cp.cuda.Device(device_id).use()
|
||||||
|
|
||||||
handle = cutn.create()
|
handle = cutn.create()
|
||||||
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
|
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft network opts",mem_avail, "rank =",rank)
|
# print("Mem avail: aft network opts",mem_avail, "rank =",rank)
|
||||||
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
|
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
|
# print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
operands_interleave = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
|
operands_interleave = myconvertor.expectation_operands(
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
PauliStringGen(qibo_circ.nqubits)
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
|
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
|
||||||
network = cutn.Network(*operands_interleave, options=network_opts)
|
network = cutn.Network(*operands_interleave, options=network_opts)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
|
# print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
|
||||||
path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
|
path, opt_info = network.contract_path(
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
optimize={
|
||||||
#print("Mem avail: aft contract path",mem_avail, "rank =",rank)
|
"samples": n_samples,
|
||||||
|
"threads": ncpu_threads,
|
||||||
|
"slicing": {"min_slices": max(16, size)},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
|
# print("Mem avail: aft contract path",mem_avail, "rank =",rank)
|
||||||
# Execution: To execute the contraction using the optimal path found previously
|
# Execution: To execute the contraction using the optimal path found previously
|
||||||
#print("opt_cost",opt_info.opt_cost, "Process =",rank)
|
# print("opt_cost",opt_info.opt_cost, "Process =",rank)
|
||||||
|
|
||||||
|
num_slices = opt_info.num_slices # Andy
|
||||||
num_slices = opt_info.num_slices#Andy
|
chunk, extra = num_slices // size, num_slices % size # Andy
|
||||||
chunk, extra = num_slices // size, num_slices % size#Andy
|
slice_begin = rank * chunk + min(rank, extra) # Andy
|
||||||
slice_begin = rank * chunk + min(rank, extra)#Andy
|
slice_end = (
|
||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
|
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||||
slices = range(slice_begin, slice_end)#Andy
|
) # Andy
|
||||||
|
slices = range(slice_begin, slice_end) # Andy
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft contract",mem_avail, "rank =",rank)
|
# print("Mem avail: aft contract",mem_avail, "rank =",rank)
|
||||||
cutn.destroy(handle)
|
cutn.destroy(handle)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
|
|
||||||
|
|
||||||
def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
|
def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
|
||||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
|
"""Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
|
||||||
The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
|
The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
|
||||||
@@ -372,45 +427,52 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
|
|||||||
|
|
||||||
from mpi4py import MPI # this line initializes MPI
|
from mpi4py import MPI # this line initializes MPI
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
# Get the hostname
|
# Get the hostname
|
||||||
#hostname = socket.gethostname()
|
# hostname = socket.gethostname()
|
||||||
|
|
||||||
ncpu_threads = multiprocessing.cpu_count() // 2
|
ncpu_threads = multiprocessing.cpu_count() // 2
|
||||||
|
|
||||||
comm = MPI.COMM_WORLD
|
comm = MPI.COMM_WORLD
|
||||||
rank = comm.Get_rank()
|
rank = comm.Get_rank()
|
||||||
size = comm.Get_size()
|
size = comm.Get_size()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
# print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
|
||||||
device_id = rank % getDeviceCount()
|
device_id = rank % getDeviceCount()
|
||||||
cp.cuda.Device(device_id).use()
|
cp.cuda.Device(device_id).use()
|
||||||
|
|
||||||
handle = cutn.create()
|
handle = cutn.create()
|
||||||
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
|
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft network opts",mem_avail, "rank =",rank)
|
# print("Mem avail: aft network opts",mem_avail, "rank =",rank)
|
||||||
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
|
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
|
# print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
|
||||||
# Perform circuit conversion
|
# Perform circuit conversion
|
||||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
# print("Mem avail: aft convetor",mem_avail, "rank =",rank)
|
||||||
operands_interleave = myconvertor.state_vector_operands()
|
operands_interleave = myconvertor.state_vector_operands()
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
# print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
|
||||||
|
|
||||||
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
|
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
|
||||||
network = cutn.Network(*operands_interleave, options=network_opts)
|
network = cutn.Network(*operands_interleave, options=network_opts)
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
|
# print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
|
||||||
network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
|
network.contract_path(
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
optimize={
|
||||||
#print("Mem avail: aft contract path",mem_avail, "rank =",rank)
|
"samples": n_samples,
|
||||||
|
"threads": ncpu_threads,
|
||||||
|
"slicing": {"min_slices": max(16, size)},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
|
# print("Mem avail: aft contract path",mem_avail, "rank =",rank)
|
||||||
# Execution: To execute the contraction using the optimal path found previously
|
# Execution: To execute the contraction using the optimal path found previously
|
||||||
#print("opt_cost",opt_info.opt_cost, "Process =",rank)
|
# print("opt_cost",opt_info.opt_cost, "Process =",rank)
|
||||||
|
|
||||||
'''
|
"""
|
||||||
path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
|
path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
|
||||||
|
|
||||||
num_slices = opt_info.num_slices#Andy
|
num_slices = opt_info.num_slices#Andy
|
||||||
@@ -419,11 +481,11 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
|
|||||||
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
|
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
|
||||||
slices = range(slice_begin, slice_end)#Andy
|
slices = range(slice_begin, slice_end)#Andy
|
||||||
result = network.contract(slices=slices)
|
result = network.contract(slices=slices)
|
||||||
'''
|
"""
|
||||||
result = network.contract()
|
result = network.contract()
|
||||||
|
|
||||||
#mem_avail = cp.cuda.Device().mem_info[0]
|
# mem_avail = cp.cuda.Device().mem_info[0]
|
||||||
#print("Mem avail: aft contract",mem_avail, "rank =",rank)
|
# print("Mem avail: aft contract",mem_avail, "rank =",rank)
|
||||||
cutn.destroy(handle)
|
cutn.destroy(handle)
|
||||||
|
|
||||||
return result, rank
|
return result, rank
|
||||||
@@ -437,18 +499,18 @@ def eval_mps(qibo_circ, gate_algo, datatype):
|
|||||||
myconvertor.mps_tensors, {"handle": myconvertor.handle}
|
myconvertor.mps_tensors, {"handle": myconvertor.handle}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def PauliStringGen(nqubits):
|
def PauliStringGen(nqubits):
|
||||||
|
|
||||||
if nqubits <= 0:
|
if nqubits <= 0:
|
||||||
return "Invalid input. N should be a positive integer."
|
return "Invalid input. N should be a positive integer."
|
||||||
|
|
||||||
#characters = 'IXYZ'
|
# characters = 'IXYZ'
|
||||||
characters = 'XXXZ'
|
characters = "XXXZ"
|
||||||
|
|
||||||
result = ''
|
result = ""
|
||||||
|
|
||||||
for i in range(nqubits):
|
for i in range(nqubits):
|
||||||
char_to_add = characters[i % len(characters)]
|
char_to_add = characters[i % len(characters)]
|
||||||
result += char_to_add
|
result += char_to_add
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user