Merge pull request #74 from qiboteam/update_tn

Update tn
This commit is contained in:
Andy Tan
2025-09-08 10:13:43 +08:00
committed by GitHub
6 changed files with 854 additions and 371 deletions

View File

@@ -1,21 +1,28 @@
import numpy as np import numpy as np
from qibo import hamiltonians
from qibo.backends import NumpyBackend from qibo.backends import NumpyBackend
from qibo.config import raise_error from qibo.config import raise_error
from qibo.result import QuantumState
from qibotn.backends.abstract import QibotnBackend from qibotn.backends.abstract import QibotnBackend
from qibotn.result import TensorNetworkResult
CUDA_TYPES = {}
class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
# CI does not test for GPU # CI does not test for GPU
"""Creates CuQuantum backend for QiboTN.""" """Creates CuQuantum backend for QiboTN."""
def __init__(self, runcard): def __init__(self, runcard=None):
super().__init__() super().__init__()
from cuquantum import cutensornet as cutn # pylint: disable=import-error from cuquantum import __version__ # pylint: disable=import-error
self.name = "qibotn"
self.platform = "cutensornet"
self.versions["cuquantum"] = __version__
self.supports_multigpu = True
self.configure_tn_simulation(runcard)
def configure_tn_simulation(self, runcard):
self.rank = None
if runcard is not None: if runcard is not None:
self.MPI_enabled = runcard.get("MPI_enabled", False) self.MPI_enabled = runcard.get("MPI_enabled", False)
self.NCCL_enabled = runcard.get("NCCL_enabled", False) self.NCCL_enabled = runcard.get("NCCL_enabled", False)
@@ -23,15 +30,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
expectation_enabled_value = runcard.get("expectation_enabled") expectation_enabled_value = runcard.get("expectation_enabled")
if expectation_enabled_value is True: if expectation_enabled_value is True:
self.expectation_enabled = True self.expectation_enabled = True
self.pauli_string_pattern = "XXXZ" self.observable = None
elif expectation_enabled_value is False: elif expectation_enabled_value is False:
self.expectation_enabled = False self.expectation_enabled = False
elif isinstance(expectation_enabled_value, dict): elif isinstance(expectation_enabled_value, dict):
self.expectation_enabled = True self.expectation_enabled = True
expectation_enabled_dict = runcard.get("expectation_enabled", {}) self.observable = runcard.get("expectation_enabled", {})
self.pauli_string_pattern = expectation_enabled_dict.get( elif isinstance(
"pauli_string_pattern", None expectation_enabled_value, hamiltonians.SymbolicHamiltonian
) ):
self.expectation_enabled = True
self.observable = expectation_enabled_value
else: else:
raise TypeError("expectation_enabled has an unexpected type") raise TypeError("expectation_enabled has an unexpected type")
@@ -59,44 +68,6 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
self.NCCL_enabled = False self.NCCL_enabled = False
self.expectation_enabled = False self.expectation_enabled = False
self.name = "qibotn"
self.cuquantum = cuquantum
self.cutn = cutn
self.platform = "cutensornet"
self.versions["cuquantum"] = self.cuquantum.__version__
self.supports_multigpu = True
self.handle = self.cutn.create()
global CUDA_TYPES
CUDA_TYPES = {
"complex64": (
self.cuquantum.cudaDataType.CUDA_C_32F,
self.cuquantum.ComputeType.COMPUTE_32F,
),
"complex128": (
self.cuquantum.cudaDataType.CUDA_C_64F,
self.cuquantum.ComputeType.COMPUTE_64F,
),
}
def __del__(self):
if hasattr(self, "cutn"):
self.cutn.destroy(self.handle)
def cuda_type(self, dtype="complex64"):
"""Get CUDA Type.
Parameters:
dtype (str, optional): Either single ("complex64") or double (complex128) precision. Defaults to "complex64".
Returns:
CUDA Type: tuple of cuquantum.cudaDataType and cuquantum.ComputeType
"""
if dtype in CUDA_TYPES:
return CUDA_TYPES[dtype]
else:
raise TypeError("Type can be either complex64 or complex128")
def execute_circuit( def execute_circuit(
self, circuit, initial_state=None, nshots=None, return_array=False self, circuit, initial_state=None, nshots=None, return_array=False
): # pragma: no cover ): # pragma: no cover
@@ -136,8 +107,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == False and self.NCCL_enabled == False
and self.expectation_enabled == False and self.expectation_enabled == False
): ):
state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32) state, self.rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
if rank > 0: if self.rank > 0:
state = np.array(0) state = np.array(0)
elif ( elif (
self.MPI_enabled == False self.MPI_enabled == False
@@ -145,8 +116,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == True and self.NCCL_enabled == True
and self.expectation_enabled == False and self.expectation_enabled == False
): ):
state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32) state, self.rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
if rank > 0: if self.rank > 0:
state = np.array(0) state = np.array(0)
elif ( elif (
self.MPI_enabled == False self.MPI_enabled == False
@@ -154,19 +125,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == False and self.NCCL_enabled == False
and self.expectation_enabled == True and self.expectation_enabled == True
): ):
state = eval.expectation_pauli_tn( state = eval.expectation_tn(circuit, self.dtype, self.observable)
circuit, self.dtype, self.pauli_string_pattern
)
elif ( elif (
self.MPI_enabled == True self.MPI_enabled == True
and self.MPS_enabled == False and self.MPS_enabled == False
and self.NCCL_enabled == False and self.NCCL_enabled == False
and self.expectation_enabled == True and self.expectation_enabled == True
): ):
state, rank = eval.expectation_pauli_tn_MPI( state, self.rank = eval.expectation_tn_MPI(
circuit, self.dtype, self.pauli_string_pattern, 32 circuit, self.dtype, self.observable, 32
) )
if rank > 0: if self.rank > 0:
state = np.array(0) state = np.array(0)
elif ( elif (
self.MPI_enabled == False self.MPI_enabled == False
@@ -174,15 +143,27 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == True and self.NCCL_enabled == True
and self.expectation_enabled == True and self.expectation_enabled == True
): ):
state, rank = eval.expectation_pauli_tn_nccl( state, self.rank = eval.expectation_tn_nccl(
circuit, self.dtype, self.pauli_string_pattern, 32 circuit, self.dtype, self.observable, 32
) )
if rank > 0: if self.rank > 0:
state = np.array(0) state = np.array(0)
else: else:
raise_error(NotImplementedError, "Compute type not supported.") raise_error(NotImplementedError, "Compute type not supported.")
if return_array: if self.expectation_enabled:
return state.flatten() return state.flatten().real
else: else:
return QuantumState(state.flatten()) if return_array:
statevector = state.flatten()
else:
statevector = state
return TensorNetworkResult(
nqubits=circuit.nqubits,
backend=self,
measures=None,
measured_probabilities=None,
prob_type=None,
statevector=statevector,
)

View File

@@ -195,12 +195,12 @@ class QiboCircuitToEinsum:
gates.append((operand, (qubit,))) gates.append((operand, (qubit,)))
return gates return gates
def expectation_operands(self, pauli_string): def expectation_operands(self, ham_gates):
"""Create the operands for pauli string expectation computation in the """Create the operands for pauli string expectation computation in the
interleave format. interleave format.
Parameters: Parameters:
pauli_string: A string representating the list of pauli gates. ham_gates: A list of gates derived from Qibo hamiltonian object.
Returns: Returns:
Operands for the contraction in the interleave format. Operands for the contraction in the interleave format.
@@ -208,8 +208,6 @@ class QiboCircuitToEinsum:
input_bitstring = "0" * self.circuit.nqubits input_bitstring = "0" * self.circuit.nqubits
input_operands = self._get_bitstring_tensors(input_bitstring) input_operands = self._get_bitstring_tensors(input_bitstring)
pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
pauli_map = pauli_string
( (
mode_labels, mode_labels,
@@ -228,11 +226,7 @@ class QiboCircuitToEinsum:
next_frontier = max(qubits_frontier.values()) + 1 next_frontier = max(qubits_frontier.values()) + 1
pauli_gates = self.get_pauli_gates( gates_inverse = ham_gates + self.gate_tensors_inverse
pauli_map, dtype=self.dtype, backend=self.backend
)
gates_inverse = pauli_gates + self.gate_tensors_inverse
( (
gate_mode_labels_inverse, gate_mode_labels_inverse,

View File

@@ -1,45 +1,238 @@
import cupy as cp import cupy as cp
import cuquantum.cutensornet as cutn
from cupy.cuda import nccl
from cupy.cuda.runtime import getDeviceCount from cupy.cuda.runtime import getDeviceCount
from cuquantum import contract from cuquantum import Network, contract
from mpi4py import MPI
from qibo import hamiltonians
from qibo.symbols import I, X, Y, Z
from qibotn.circuit_convertor import QiboCircuitToEinsum from qibotn.circuit_convertor import QiboCircuitToEinsum
from qibotn.circuit_to_mps import QiboCircuitToMPS from qibotn.circuit_to_mps import QiboCircuitToMPS
from qibotn.mps_contraction_helper import MPSContractionHelper from qibotn.mps_contraction_helper import MPSContractionHelper
def dense_vector_tn(qibo_circ, datatype): def check_observable(observable, circuit_nqubit):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to """Checks the type of observable and returns the appropriate Hamiltonian."""
dense vector. if observable is None:
return build_observable(circuit_nqubit)
elif isinstance(observable, dict):
return create_hamiltonian_from_dict(observable, circuit_nqubit)
elif isinstance(observable, hamiltonians.SymbolicHamiltonian):
# TODO: check if the observable is compatible with the circuit
return observable
else:
raise TypeError("Invalid observable type.")
Parameters:
qibo_circ: The quantum circuit object. def build_observable(circuit_nqubit):
datatype (str): Either single ("complex64") or double (complex128) precision. """Helper function to construct a target observable."""
hamiltonian_form = 0
for i in range(circuit_nqubit):
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian
def create_hamiltonian_from_dict(data, circuit_nqubit):
"""Create a Qibo SymbolicHamiltonian from a dictionary representation.
Ensures that each Hamiltonian term explicitly acts on all circuit qubits
by adding identity (`I`) gates where needed.
Args:
data (dict): Dictionary containing Hamiltonian terms.
circuit_nqubit (int): Total number of qubits in the quantum circuit.
Returns: Returns:
Dense vector of quantum circuit. hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian.
""" """
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) PAULI_GATES = {"X": X, "Y": Y, "Z": Z}
return contract(*myconvertor.state_vector_operands())
terms = []
for term in data["terms"]:
coeff = term["coefficient"]
operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)]
# Convert the operator list into a dictionary {qubit_index: gate}
operator_dict = {q: PAULI_GATES[g] for g, q in operators}
# Build the full term ensuring all qubits are covered
full_term_expr = [
operator_dict[q](q) if q in operator_dict else I(q)
for q in range(circuit_nqubit)
]
# Multiply all operators together to form a single term
term_expr = full_term_expr[0]
for op in full_term_expr[1:]:
term_expr *= op
# Scale by the coefficient
final_term = coeff * term_expr
terms.append(final_term)
if not terms:
raise ValueError("No valid Hamiltonian terms were added.")
# Combine all terms
hamiltonian_form = sum(terms)
return hamiltonians.SymbolicHamiltonian(hamiltonian_form)
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern): def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to """Populate the gates for all pauli operators.
expectation of given Pauli string.
Parameters: Parameters:
qibo_circ: The quantum circuit object. pauli_map: A dictionary mapping qubits to pauli operators.
datatype (str): Either single ("complex64") or double (complex128) precision. dtype: Data type for the tensor operands.
pauli_string_pattern(str): pauli string pattern. backend: The package the tensor operands belong to.
Returns: Returns:
Expectation of quantum circuit due to pauli string. A sequence of pauli gates.
""" """
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) asarray = backend.asarray
return contract( pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
*myconvertor.expectation_operands( pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
gates = []
for qubit, pauli_char, coeff in pauli_map:
operand = operand_map.get(pauli_char)
if operand is None:
raise ValueError("pauli string character must be one of I/X/Y/Z")
operand = coeff * operand
gates.append((operand, (qubit,)))
return gates
def extract_gates_and_qubits(hamiltonian):
"""
Extracts the gates and their corresponding qubits from a Qibo Hamiltonian.
Parameters:
hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian):
A Qibo Hamiltonian object.
Returns:
list of tuples: [(coefficient, [(gate, qubit), ...]), ...]
- coefficient: The prefactor of the term.
- list of (gate, qubit): Each term's gates and the qubits they act on.
"""
extracted_terms = []
if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
for term in hamiltonian.terms:
coeff = term.coefficient # Extract coefficient
gate_qubit_list = []
# Extract gate and qubit information
for factor in term.factors:
gate_name = str(factor)[
0
] # Extract the gate type (X, Y, Z) from 'X0', 'Z1'
qubit = int(str(factor)[1:]) # Extract the qubit index
gate_qubit_list.append((qubit, gate_name, coeff))
coeff = 1.0
extracted_terms.append(gate_qubit_list)
else:
raise ValueError(
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
) )
return extracted_terms
def initialize_mpi():
"""Initialize MPI communication and device selection."""
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
return comm, rank, size, device_id
def initialize_nccl(comm_mpi, rank, size):
"""Initialize NCCL communication."""
nccl_id = nccl.get_unique_id() if rank == 0 else None
nccl_id = comm_mpi.bcast(nccl_id, root=0)
return nccl.NcclCommunicator(size, nccl_id, rank)
def get_operands(qibo_circ, datatype, rank, comm):
"""Perform circuit conversion and broadcast operands."""
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
return comm.bcast(operands, root=0)
def compute_optimal_path(network, n_samples, size, comm):
"""Compute contraction path and broadcast optimal selection."""
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
) )
opt_cost, sender = comm.allreduce(
sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
)
return comm.bcast(info, sender)
def compute_slices(info, rank, size):
"""Determine the slice range each process should compute."""
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
return range(slice_begin, slice_end)
def reduce_result(result, comm, method="MPI", root=0):
"""Reduce results across processes."""
if method == "MPI":
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
elif method == "NCCL":
stream_ptr = cp.cuda.get_current_stream().ptr
if result.dtype == cp.complex128:
count = result.size * 2 # complex128 has 2 float64 numbers
nccl_type = nccl.NCCL_FLOAT64
elif result.dtype == cp.complex64:
count = result.size * 2 # complex64 has 2 float32 numbers
nccl_type = nccl.NCCL_FLOAT32
else:
raise TypeError(f"Unsupported dtype for NCCL reduce: {result.dtype}")
comm.reduce(
result.data.ptr,
result.data.ptr,
count,
nccl_type,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result
else:
raise ValueError(f"Unknown reduce method: {method}")
def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,60 +254,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
Returns: Returns:
Dense vector of quantum circuit. Dense vector of quantum circuit.
""" """
comm, rank, size, device_id = initialize_mpi()
from cuquantum import Network operands = get_operands(qibo_circ, datatype, rank, comm)
from mpi4py import MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
# Assign the device for each process.
device_id = rank % getDeviceCount()
# Create network object.
network = Network(*operands, options={"device_id": device_id}) network = Network(*operands, options={"device_id": device_id})
info = compute_optimal_path(network, n_samples, size, comm)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
slices = compute_slices(info, rank, size)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices) result = network.contract(slices=slices)
return reduce_result(result, comm, method="MPI"), rank
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -136,74 +285,35 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
Returns: Returns:
Dense vector of quantum circuit. Dense vector of quantum circuit.
""" """
from cupy.cuda import nccl comm_mpi, rank, size, device_id = initialize_mpi()
from cuquantum import Network comm_nccl = initialize_nccl(comm_mpi, rank, size)
from mpi4py import MPI operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
network = Network(*operands) network = Network(*operands)
info = compute_optimal_path(network, n_samples, size, comm_mpi)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
slices = compute_slices(info, rank, size)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices) result = network.contract(slices=slices)
return reduce_result(result, comm_nccl, method="NCCL"), rank
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result, rank
def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8): def dense_vector_tn(qibo_circ, datatype):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
def expectation_tn_nccl(qibo_circ, datatype, observable, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to """Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string using multi node and multi GPU through expectation of given Pauli string using multi node and multi GPU through
NCCL. NCCL.
@@ -226,76 +336,53 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
Returns: Returns:
Expectation of quantum circuit due to pauli string. Expectation of quantum circuit due to pauli string.
""" """
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0 comm_mpi, rank, size, device_id = initialize_mpi()
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount() comm_nccl = initialize_nccl(comm_mpi, rank, size)
cp.cuda.Device(device_id).use() observable = check_observable(observable, qibo_circ.nqubits)
# Set up the NCCL communicator. ham_gate_map = extract_gates_and_qubits(observable)
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
network = Network(*operands) exp = 0
for each_ham in ham_gate_map:
ham_gates = get_ham_gates(each_ham)
# Perform circuit conversion
if rank == 0:
operands = myconvertor.expectation_operands(ham_gates)
else:
operands = None
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. operands = comm_mpi.bcast(operands, root=0)
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks. network = Network(*operands)
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks. # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
info = comm_mpi.bcast(info, sender) info = compute_optimal_path(network, n_samples, size, comm_mpi)
# Set path and slices. # Recompute path with the selected optimal settings
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
# Calculate this process's share of the slices. slices = compute_slices(info, rank, size)
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for. # Contract the group of slices the process is responsible for.
result = network.contract(slices=slices) result = network.contract(slices=slices)
# Sum the partial contribution from each process on root. # Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr result = reduce_result(result, comm_nccl, method="NCCL", root=0)
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result, rank exp += result
return exp, rank
def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8): def expectation_tn_MPI(qibo_circ, datatype, observable, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to """Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string using multi node and multi GPU through expectation of given Pauli string using multi node and multi GPU through
MPI. MPI.
@@ -318,61 +405,76 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
Returns: Returns:
Expectation of quantum circuit due to pauli string. Expectation of quantum circuit due to pauli string.
""" """
from cuquantum import Network # Initialize MPI and device
from mpi4py import MPI # this line initializes MPI comm, rank, size, device_id = initialize_mpi()
root = 0 observable = check_observable(observable, qibo_circ.nqubits)
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount() ham_gate_map = extract_gates_and_qubits(observable)
# Perform circuit conversion if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
exp = 0
for each_ham in ham_gate_map:
ham_gates = get_ham_gates(each_ham)
# Perform circuit conversion
# Perform circuit conversion
if rank == 0:
operands = myconvertor.expectation_operands(ham_gates)
else:
operands = None
operands = comm.bcast(operands, root=0)
# Create network object.
network = Network(*operands, options={"device_id": device_id})
# Compute optimal contraction path
info = compute_optimal_path(network, n_samples, size, comm)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Compute slice range for each rank
slices = compute_slices(info, rank, size)
# Perform contraction
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = reduce_result(result, comm, method="MPI", root=0)
if rank == 0:
exp += result
return exp, rank
def expectation_tn(qibo_circ, datatype, observable):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns:
Expectation of quantum circuit due to pauli string.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands( observable = check_observable(observable, qibo_circ.nqubits)
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
# Assign the device for each process. ham_gate_map = extract_gates_and_qubits(observable)
device_id = rank % getDeviceCount() exp = 0
for each_ham in ham_gate_map:
# Create network object. ham_gates = get_ham_gates(each_ham)
network = Network(*operands, options={"device_id": device_id}) expectation_operands = myconvertor.expectation_operands(ham_gates)
exp += contract(*expectation_operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. return exp
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
def dense_vector_mps(qibo_circ, gate_algo, datatype): def dense_vector_mps(qibo_circ, gate_algo, datatype):
@@ -393,27 +495,3 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
return mps_helper.contract_state_vector( return mps_helper.contract_state_vector(
myconvertor.mps_tensors, {"handle": myconvertor.handle} myconvertor.mps_tensors, {"handle": myconvertor.handle}
) )
def pauli_string_gen(nqubits, pauli_string_pattern):
"""Used internally to generate the string based on given pattern and number
of qubit.
Parameters:
nqubits(int): Number of qubits of Quantum Circuit
pauli_string_pattern(str): Strings representing sequence of pauli gates.
Returns:
String representation of the actual pauli string from the pattern.
Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
"""
if nqubits <= 0:
return "Invalid input. N should be a positive integer."
result = ""
for i in range(nqubits):
char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
result += char_to_add
return result

View File

@@ -9,16 +9,16 @@ import pytest
# backends to be tested # backends to be tested
# TODO: add cutensornet and quimb here as well # TODO: add cutensornet and quimb here as well
BACKENDS = ["qmatchatea"] BACKENDS = ["cutensornet"]
# BACKENDS = ["qmatchatea"]
def get_backend(backend_name): def get_backend(backend_name):
from qibotn.backends.cutensornet import CuTensorNet
from qibotn.backends.qmatchatea import QMatchaTeaBackend from qibotn.backends.qmatchatea import QMatchaTeaBackend
NAME2BACKEND = { NAME2BACKEND = {"qmatchatea": QMatchaTeaBackend, "cutensornet": CuTensorNet}
"qmatchatea": QMatchaTeaBackend,
}
return NAME2BACKEND[backend_name]() return NAME2BACKEND[backend_name]()

View File

@@ -1,11 +1,13 @@
from timeit import default_timer as timer import math
import config
import cupy as cp import cupy as cp
import numpy as np
import pytest import pytest
import qibo import qibo
from qibo import construct_backend, hamiltonians
from qibo.models import QFT from qibo.models import QFT
from qibo.symbols import X, Z
ABS_TOL = 1e-7
def qibo_qft(nqubits, swaps): def qibo_qft(nqubits, swaps):
@@ -14,37 +16,73 @@ def qibo_qft(nqubits, swaps):
return circ_qibo, state_vec return circ_qibo, state_vec
def time(func): def build_observable(nqubits):
start = timer() """Helper function to construct a target observable."""
res = func() hamiltonian_form = 0
end = timer() for i in range(nqubits):
time = end - start hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits)
return time, res
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian, hamiltonian_form
def build_observable_dict(nqubits):
"""Construct a target observable as a dictionary representation.
Returns a dictionary suitable for `create_hamiltonian_from_dict`.
"""
terms = []
for i in range(nqubits):
term = {
"coefficient": 0.5,
"operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)],
}
terms.append(term)
return {"terms": terms}
@pytest.mark.gpu @pytest.mark.gpu
@pytest.mark.parametrize("nqubits", [1, 2, 5, 10]) @pytest.mark.parametrize("nqubits", [1, 2, 5, 10])
def test_eval(nqubits: int, dtype="complex128"): def test_eval(nqubits: int, dtype="complex128"):
"""Evaluate QASM with cuQuantum. """
Args: Args:
nqubits (int): Total number of qubits in the system. nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single, dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double. 'complex128' for double.
""" """
import qibotn.eval
# Test qibo # Test qibo
qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform) qibo.set_backend(backend="numpy")
qibo_time, (qibo_circ, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True)) qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test Cuquantum # Test cutensornet
cutn_time, result_tn = time( backend = construct_backend(backend="qibotn", platform="cutensornet")
lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten() # Test with no settings specified. Default is dense vector calculation without MPI or NCCL.
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
) )
assert cp.allclose(
result_sv_cp, result_tn.statevector.flatten()
), "Resulting dense vectors do not match"
assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time # Test with explicit settings specified.
assert np.allclose(result_sv, result_tn), "Resulting dense vectors do not match" comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_sv_cp, result_tn.statevector.flatten()
), "Resulting dense vectors do not match"
@pytest.mark.gpu @pytest.mark.gpu
@@ -57,28 +95,105 @@ def test_mps(nqubits: int, dtype="complex128"):
dtype (str): The data type for precision, 'complex64' for single, dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double. 'complex128' for double.
""" """
import qibotn.eval
# Test qibo # Test qibo
qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform) qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
qibo_time, (circ_qibo, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))
result_sv_cp = cp.asarray(result_sv) result_sv_cp = cp.asarray(result_sv)
# Test of MPS # Test cutensornet
gate_algo = { backend = construct_backend(backend="qibotn", platform="cutensornet")
"qr_method": False, # Test with simple MPS settings specified using bool. Uses the default MPS parameters.
"svd_method": { comp_set_w_bool = {
"partition": "UV", "MPI_enabled": False,
"abs_cutoff": 1e-12, "MPS_enabled": True,
}, "NCCL_enabled": False,
"expectation_enabled": False,
} }
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_tn.statevector.flatten(), result_sv_cp
), "Resulting dense vectors do not match"
cutn_time, result_tn = time( # Test with explicit MPS computation settings specified using Dict. Users able to specify parameters like qr_method etc.
lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten() comp_set_w_MPS_config_para = {
"MPI_enabled": False,
"MPS_enabled": {
"qr_method": False,
"svd_method": {
"partition": "UV",
"abs_cutoff": 1e-12,
},
},
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_MPS_config_para)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_tn.statevector.flatten(), result_sv_cp
), "Resulting dense vectors do not match"
@pytest.mark.parametrize("nqubits", [2, 5, 10])
def test_expectation(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
) )
print(f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}") # Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
assert cp.allclose(result_tn, result_sv_cp) # Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)

View File

@@ -0,0 +1,315 @@
# mpirun --allow-run-as-root -np 2 python -m pytest --with-mpi test_cuquantum_cutensor_mpi_backend.py
import math
import cupy as cp
import numpy as np
import pytest
import qibo
from qibo import construct_backend, hamiltonians
from qibo.models import QFT
from qibo.symbols import X, Z
ABS_TOL = 1e-7
def qibo_qft(nqubits, swaps):
circ_qibo = QFT(nqubits, swaps)
state_vec = circ_qibo().state(numpy=True)
return circ_qibo, state_vec
def build_observable(nqubits):
"""Helper function to construct a target observable."""
hamiltonian_form = 0
for i in range(nqubits):
hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits)
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian, hamiltonian_form
def build_observable_dict(nqubits):
"""Construct a target observable as a dictionary representation.
Returns a dictionary suitable for `create_hamiltonian_from_dict`.
"""
terms = []
for i in range(nqubits):
term = {
"coefficient": 0.5,
"operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)],
}
terms.append(term)
return {"terms": terms}
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_eval_mpi(nqubits: int, dtype="complex128"):
"""
Args:
nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
# Test qibo
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with explicit settings specified.
comp_set_w_bool = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
result_tn_cp = cp.asarray(result_tn.statevector.flatten())
print(f"State vector difference: {abs(result_tn_cp - result_sv_cp).max():0.3e}")
if backend.rank == 0:
assert cp.allclose(
result_sv_cp, result_tn_cp
), "Resulting dense vectors do not match"
else:
assert (
isinstance(result_tn_cp, cp.ndarray)
and result_tn_cp.size == 1
and result_tn_cp.item() == 0
), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_expectation_mpi(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_eval_nccl(nqubits: int, dtype="complex128"):
"""
Args:
nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
# Test qibo
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with explicit settings specified.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
result_tn_cp = cp.asarray(result_tn.statevector.flatten())
if backend.rank == 0:
assert cp.allclose(
result_sv_cp, result_tn_cp
), "Resulting dense vectors do not match"
else:
assert (
isinstance(result_tn_cp, cp.ndarray)
and result_tn_cp.size == 1
and result_tn_cp.item() == 0
), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_expectation_NCCL(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"