Updates to include expectation calculation

This commit is contained in:
tankya2
2024-01-24 11:40:51 +08:00
committed by Liwei Yang
parent 2a6b3a54f0
commit 0420cbfa52
4 changed files with 567 additions and 7 deletions

View File

@@ -21,6 +21,7 @@ class QiboCircuitToEinsum:
self.dtype = getattr(self.backend, dtype) self.dtype = getattr(self.backend, dtype)
self.init_basis_map(self.backend, dtype) self.init_basis_map(self.backend, dtype)
self.init_intermediate_circuit(circuit) self.init_intermediate_circuit(circuit)
self.circuit = circuit
def state_vector_operands(self): def state_vector_operands(self):
input_bitstring = "0" * len(self.active_qubits) input_bitstring = "0" * len(self.active_qubits)
@@ -109,3 +110,115 @@ class QiboCircuitToEinsum:
state_1 = asarray([0, 1], dtype=dtype) state_1 = asarray([0, 1], dtype=dtype)
self.basis_map = {"0": state_0, "1": state_1} self.basis_map = {"0": state_0, "1": state_1}
def init_inverse_circuit(self, circuit):
self.gate_tensors_inverse = []
gates_qubits_inverse = []
for gate in circuit.queue:
gate_qubits = gate.control_qubits + gate.target_qubits
gates_qubits_inverse.extend(gate_qubits)
# self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
# https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
required_shape = self.op_shape_from_qubits(len(gate_qubits))
self.gate_tensors_inverse.append(
(
cp.asarray(gate.matrix()).reshape(required_shape),
gate_qubits,
)
)
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
def get_pauli_gates(self, pauli_map, dtype='complex128', backend=cp):
"""
Populate the gates for all pauli operators.
Args:
pauli_map: A dictionary mapping qubits to pauli operators.
dtype: Data type for the tensor operands.
backend: The package the tensor operands belong to.
Returns:
A sequence of pauli gates.
"""
asarray = backend.asarray
pauli_i = asarray([[1,0], [0,1]], dtype=dtype)
pauli_x = asarray([[0,1], [1,0]], dtype=dtype)
pauli_y = asarray([[0,-1j], [1j,0]], dtype=dtype)
pauli_z = asarray([[1,0], [0,-1]], dtype=dtype)
operand_map = {'I': pauli_i,
'X': pauli_x,
'Y': pauli_y,
'Z': pauli_z}
gates = []
for qubit, pauli_char in pauli_map.items():
operand = operand_map.get(pauli_char)
if operand is None:
raise ValueError('pauli string character must be one of I/X/Y/Z')
gates.append((operand, (qubit,)))
return gates
def expectation_operands(self, pauli_string):
#assign pauli string to qubit
#_get_forward_inverse_metadata()
input_bitstring = "0" * self.circuit.nqubits #Need all qubits!
input_operands = self._get_bitstring_tensors(input_bitstring)
pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
pauli_map = pauli_string
coned_qubits = pauli_map.keys()
(
mode_labels,
qubits_frontier,
next_frontier,
) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
self.gate_tensors, qubits_frontier, next_frontier
)
operands = input_operands + gate_operands
mode_labels += gate_mode_labels
self.init_inverse_circuit(self.circuit.invert())
next_frontier = max(qubits_frontier.values()) + 1
#input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
pauli_gates = self.get_pauli_gates(pauli_map, dtype=self.dtype, backend=self.backend)
gates_inverse = pauli_gates + self.gate_tensors_inverse
gate_mode_labels_inverse, gate_operands_inverse = self._parse_gates_to_mode_labels_operands(
gates_inverse, qubits_frontier, next_frontier
)
mode_labels = mode_labels + gate_mode_labels_inverse + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
operands = operands + gate_operands_inverse + operands[:self.circuit.nqubits]
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
#expec = contract(*operand_exp_interleave)
#print(expec)
'''
gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates,
qubits_frontier,
next_frontier)
mode_labels = input_mode_labels + gate_mode_labels + [[qubits_frontier[ix]] for ix in self.qubits]
operands = input_operands + gate_operands + input_operands[:n_qubits]
output_mode_labels = []
expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
'''
return operand_exp_interleave

View File

@@ -21,7 +21,7 @@ class QiboCircuitToMPS:
self.handle = cutn.create() self.handle = cutn.create()
self.dtype = dtype self.dtype = dtype
self.mps_tensors = initial(self.num_qubits, dtype=dtype) self.mps_tensors = initial(self.num_qubits, dtype=dtype)
circuitconvertor = QiboCircuitToEinsum(circ_qibo) circuitconvertor = QiboCircuitToEinsum(circ_qibo, dtype=dtype)
for gate, qubits in circuitconvertor.gate_tensors: for gate, qubits in circuitconvertor.gate_tensors:
# mapping from qubits to qubit indices # mapping from qubits to qubit indices

View File

@@ -14,6 +14,13 @@ class QiboTNBackend(NumpyBackend):
platform == "cu_tensornet" platform == "cu_tensornet"
or platform == "cu_mps" or platform == "cu_mps"
or platform == "qu_tensornet" or platform == "qu_tensornet"
or platform == "cu_tensornet_mpi"
or platform == "cu_tensornet_mpi_expectation"
or platform == "cu_tensornet_expectation"
or platform == "cu_tensornet_nccl"
or platform == "cu_tensornet_nccl_expectation"
): # pragma: no cover ): # pragma: no cover
self.platform = platform self.platform = platform
else: else:
@@ -72,6 +79,52 @@ class QiboTNBackend(NumpyBackend):
init_state[0] = 1.0 init_state[0] = 1.0
state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy") state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
if self.platform == "cu_tensornet_mpi":
if initial_state is not None:
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype,32)
if rank > 0:
state = np.array(0)
if self.platform == "cu_tensornet_nccl":
if initial_state is not None:
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
state, rank = cutn.eval_tn_nccl(circuit, self.dtype,32)
if rank > 0:
state = np.array(0)
if self.platform == "cu_tensornet_expectation":
if initial_state is not None:
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
state = cutn.eval_expectation(circuit, self.dtype)
if self.platform == "cu_tensornet_mpi_expectation":
if initial_state is not None:
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
#state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype,32)
if rank > 0:
state = np.array(0)
if self.platform == "cu_tensornet_nccl_expectation":
if initial_state is not None:
raise_error(NotImplementedError, "QiboTN cannot support initial state.")
#state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
#state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype,32)
if rank > 0:
state = np.array(0)
if return_array: if return_array:
return state.flatten() return state.flatten()
else: else:

View File

@@ -13,6 +13,354 @@ def eval(qibo_circ, datatype):
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands()) return contract(*myconvertor.state_vector_operands())
def eval_expectation(qibo_circ, datatype):
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits)))
def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
from mpi4py import MPI # this line initializes MPI
import socket
from cuquantum import Network
# Get the hostname
#hostname = socket.gethostname()
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount()
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
operands = myconvertor.state_vector_operands()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
# Broadcast the operand data.
#operands = comm.bcast(operands, root)
# Assign the device for each process.
device_id = rank % getDeviceCount()
#dev = cp.cuda.Device(device_id)
#free_mem, total_mem = dev.mem_info
#print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
# Create network object.
network = Network(*operands, options={'device_id' : device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
#if rank == root:
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
slices = range(slice_begin, slice_end)
#print(f"Process {rank} is processing slice range: {slices}.")
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
#print(f"Process {rank} result shape is : {result.shape}.")
#print(f"Process {rank} result size is : {result.nbytes}.")
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
from mpi4py import MPI # this line initializes MPI
import socket
from cuquantum import Network
from cupy.cuda import nccl
# Get the hostname
#hostname = socket.gethostname()
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
operands = myconvertor.state_vector_operands()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
#if rank == root:
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
slices = range(slice_begin, slice_end)
#print(f"Process {rank} is processing slice range: {slices}.")
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
#print(f"Process {rank} result shape is : {result.shape}.")
#print(f"Process {rank} result size is : {result.nbytes}.")
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
return result, rank
def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
from mpi4py import MPI # this line initializes MPI
import socket
from cuquantum import Network
from cupy.cuda import nccl
# Get the hostname
#hostname = socket.gethostname()
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
#if rank == root:
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
slices = range(slice_begin, slice_end)
#print(f"Process {rank} is processing slice range: {slices}.")
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
#print(f"Process {rank} result shape is : {result.shape}.")
#print(f"Process {rank} result size is : {result.nbytes}.")
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
return result, rank
def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
from mpi4py import MPI # this line initializes MPI
import socket
from cuquantum import Network
# Get the hostname
#hostname = socket.gethostname()
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount()
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
# Broadcast the operand data.
#operands = comm.bcast(operands, root)
# Assign the device for each process.
device_id = rank % getDeviceCount()
#dev = cp.cuda.Device(device_id)
#free_mem, total_mem = dev.mem_info
#print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
# Create network object.
network = Network(*operands, options={'device_id' : device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
#print(f"Process {rank} has the path with the FLOP count {info.opt_cost}.")
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
#if rank == root:
# print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
slices = range(slice_begin, slice_end)
#print(f"Process {rank} is processing slice range: {slices}.")
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
#print(f"Process {rank} result shape is : {result.shape}.")
#print(f"Process {rank} result size is : {result.nbytes}.")
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
from mpi4py import MPI # this line initializes MPI
import socket
# Get the hostname
#hostname = socket.gethostname()
ncpu_threads = multiprocessing.cpu_count() // 2
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
handle = cutn.create()
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft network opts",mem_avail, "rank =",rank)
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands_interleave = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
network = cutn.Network(*operands_interleave, options=network_opts)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft contract path",mem_avail, "rank =",rank)
# Execution: To execute the contraction using the optimal path found previously
#print("opt_cost",opt_info.opt_cost, "Process =",rank)
num_slices = opt_info.num_slices#Andy
chunk, extra = num_slices // size, num_slices % size#Andy
slice_begin = rank * chunk + min(rank, extra)#Andy
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
slices = range(slice_begin, slice_end)#Andy
result = network.contract(slices=slices)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft contract",mem_avail, "rank =",rank)
cutn.destroy(handle)
return result, rank
def eval_tn_MPI(qibo_circ, datatype, n_samples=8): def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI. """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
@@ -22,29 +370,59 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
""" """
from mpi4py import MPI # this line initializes MPI from mpi4py import MPI # this line initializes MPI
import socket
# Get the hostname
#hostname = socket.gethostname()
ncpu_threads = multiprocessing.cpu_count() // 2 ncpu_threads = multiprocessing.cpu_count() // 2
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
rank = comm.Get_rank() rank = comm.Get_rank()
size = comm.Get_size()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
device_id = rank % getDeviceCount() device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use() cp.cuda.Device(device_id).use()
handle = cutn.create() handle = cutn.create()
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
network_opts = cutn.NetworkOptions(handle=handle, blocking="auto") network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft network opts",mem_avail, "rank =",rank)
cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
# Perform circuit conversion # Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft convetor",mem_avail, "rank =",rank)
operands_interleave = myconvertor.state_vector_operands() operands_interleave = myconvertor.state_vector_operands()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
# Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object. # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
network = cutn.Network(*operands_interleave, options=network_opts) network = cutn.Network(*operands_interleave, options=network_opts)
network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads}) #mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft contract path",mem_avail, "rank =",rank)
# Execution: To execute the contraction using the optimal path found previously # Execution: To execute the contraction using the optimal path found previously
#print("opt_cost",opt_info.opt_cost, "Process =",rank)
'''
path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
num_slices = opt_info.num_slices#Andy
chunk, extra = num_slices // size, num_slices % size#Andy
slice_begin = rank * chunk + min(rank, extra)#Andy
slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
slices = range(slice_begin, slice_end)#Andy
result = network.contract(slices=slices)
'''
result = network.contract() result = network.contract()
#mem_avail = cp.cuda.Device().mem_info[0]
#print("Mem avail: aft contract",mem_avail, "rank =",rank)
cutn.destroy(handle) cutn.destroy(handle)
return result, rank return result, rank
@@ -57,3 +435,19 @@ def eval_mps(qibo_circ, gate_algo, datatype):
return mps_helper.contract_state_vector( return mps_helper.contract_state_vector(
myconvertor.mps_tensors, {"handle": myconvertor.handle} myconvertor.mps_tensors, {"handle": myconvertor.handle}
) )
def PauliStringGen(nqubits):
if nqubits <= 0:
return "Invalid input. N should be a positive integer."
#characters = 'IXYZ'
characters = 'XXXZ'
result = ''
for i in range(nqubits):
char_to_add = characters[i % len(characters)]
result += char_to_add
return result