From 8e78420c379cb0124e342affc6779fb38377f1c1 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 3 Nov 2023 16:48:14 +0800
Subject: [PATCH 01/85] add backend file

---
 src/qibotn/backends.py | 69 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 src/qibotn/backends.py

diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
new file mode 100644
index 0000000..98294ec
--- /dev/null
+++ b/src/qibotn/backends.py
@@ -0,0 +1,69 @@
+from qibo.backends import NumpyBackend
+from qibo.config import raise_error
+from qibotn import cutn
+from qibotn import quimb
+from qibo.states import CircuitResult
+
+
+class QiboTNBackend(NumpyBackend):
+    def __init__(self, platform):
+        super().__init__()
+        self.name = "qibotn"
+        if (
+            platform == "cu_tensornet"
+            or platform == "cu_mps"
+            or platform == "qu_tensornet"
+        ):  # pragma: no cover
+            self.platform = platform
+        else:
+            raise_error(
+                NotImplementedError, "QiboTN cannot support the specified backend."
+            )
+
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+
+        """
+        if initial_state is not None:
+            raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+        if self.platform == "cu_tensornet":
+            state = cutn.eval(circuit, self.dtype)
+
+        if self.platform == "cu_mps":
+            gate_algo = {
+                "qr_method": False,
+                "svd_method": {
+                    "partition": "UV",
+                    "abs_cutoff": 1e-12,
+                },
+            }  # make this user input
+            state = cutn.eval_mps(circuit, gate_algo, self.dtype)
+
+        if self.platform == "qu_tensornet":
+            state = quimb.eval(circuit.to_qasm(), initial_state, backend="numpy")
+
+        if return_array:
+            return state.flatten()
+        else:
+            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
+            return circuit._final_state

From 3f046beb1ca5570e5ba8c1b7c0ad8ec0d85d9eb0 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 3 Nov 2023 17:08:03 +0800
Subject: [PATCH 02/85] Added check for initial conditions

---
 src/qibotn/backends.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
index 98294ec..f1d0baf 100644
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -43,13 +43,17 @@ class QiboTNBackend(NumpyBackend):
             xxx.
 
         """
-        if initial_state is not None:
-            raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
         if self.platform == "cu_tensornet":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
             state = cutn.eval(circuit, self.dtype)
 
         if self.platform == "cu_mps":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
             gate_algo = {
                 "qr_method": False,
                 "svd_method": {

From 2d48d3ddbe1c38b5e57422284492f86cbdce416c Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 7 Nov 2023 17:08:57 +0800
Subject: [PATCH 03/85] Added initial state for Quimb

---
 src/qibotn/backends.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
index f1d0baf..3193e6b 100644
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -3,6 +3,7 @@ from qibo.config import raise_error
 from qibotn import cutn
 from qibotn import quimb
 from qibo.states import CircuitResult
+import numpy as np
 
 
 class QiboTNBackend(NumpyBackend):
@@ -30,7 +31,7 @@ class QiboTNBackend(NumpyBackend):
         raise_error(NotImplementedError, "Not implemented in QiboTN.")
 
     def execute_circuit(
-        self, circuit, initial_state=None, nshots=None, return_array=False
+        self, circuit, initial_state=None, nshots=None, return_array=True
     ):  # pragma: no cover
         """Executes a quantum circuit.
 
@@ -64,7 +65,12 @@ class QiboTNBackend(NumpyBackend):
             state = cutn.eval_mps(circuit, gate_algo, self.dtype)
 
         if self.platform == "qu_tensornet":
-            state = quimb.eval(circuit.to_qasm(), initial_state, backend="numpy")
+            
+            #init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
+            #init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
+            init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
+            init_state[0] = 1.0
+            state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
 
         if return_array:
             return state.flatten()

From 2a6b3a54f0fc7fd1a7881d5fd663e74013aa30d8 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 9 Nov 2023 00:18:27 +0800
Subject: [PATCH 04/85] Reverted back return_array to False

---
 src/qibotn/backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
index 3193e6b..9d39973 100644
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -31,7 +31,7 @@ class QiboTNBackend(NumpyBackend):
         raise_error(NotImplementedError, "Not implemented in QiboTN.")
 
     def execute_circuit(
-        self, circuit, initial_state=None, nshots=None, return_array=True
+        self, circuit, initial_state=None, nshots=None, return_array=False
     ):  # pragma: no cover
         """Executes a quantum circuit.
 

From 0420cbfa520f91fbd24e8d24e7f98585e911ce41 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 24 Jan 2024 11:40:51 +0800
Subject: [PATCH 05/85] Updates to include expectation calculation

---
 src/qibotn/QiboCircuitConvertor.py | 113 ++++++++
 src/qibotn/QiboCircuitToMPS.py     |   2 +-
 src/qibotn/backends.py             |  53 ++++
 src/qibotn/cutn.py                 | 406 ++++++++++++++++++++++++++++-
 4 files changed, 567 insertions(+), 7 deletions(-)

diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index d72a09c..11aaa71 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -21,6 +21,7 @@ class QiboCircuitToEinsum:
         self.dtype = getattr(self.backend, dtype)
         self.init_basis_map(self.backend, dtype)
         self.init_intermediate_circuit(circuit)
+        self.circuit = circuit
 
     def state_vector_operands(self):
         input_bitstring = "0" * len(self.active_qubits)
@@ -109,3 +110,115 @@ class QiboCircuitToEinsum:
         state_1 = asarray([0, 1], dtype=dtype)
 
         self.basis_map = {"0": state_0, "1": state_1}
+
+
+    def init_inverse_circuit(self, circuit):
+        self.gate_tensors_inverse = []
+        gates_qubits_inverse = []
+
+        for gate in circuit.queue:
+            gate_qubits = gate.control_qubits + gate.target_qubits
+            gates_qubits_inverse.extend(gate_qubits)
+
+            # self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
+            # https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
+            required_shape = self.op_shape_from_qubits(len(gate_qubits))
+            self.gate_tensors_inverse.append(
+                (
+                    cp.asarray(gate.matrix()).reshape(required_shape),
+                    gate_qubits,
+                )
+            )
+
+        # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
+        self.active_qubits_inverse = np.unique(gates_qubits_inverse)
+        
+        
+    def get_pauli_gates(self, pauli_map, dtype='complex128', backend=cp):
+        """
+        Populate the gates for all pauli operators.
+
+        Args:
+            pauli_map: A dictionary mapping qubits to pauli operators. 
+            dtype: Data type for the tensor operands.
+            backend: The package the tensor operands belong to.
+
+        Returns:
+            A sequence of pauli gates.
+        """
+        asarray = backend.asarray
+        pauli_i = asarray([[1,0], [0,1]], dtype=dtype)
+        pauli_x = asarray([[0,1], [1,0]], dtype=dtype)
+        pauli_y = asarray([[0,-1j], [1j,0]], dtype=dtype)
+        pauli_z = asarray([[1,0], [0,-1]], dtype=dtype)
+        
+        operand_map = {'I': pauli_i,
+                    'X': pauli_x,
+                    'Y': pauli_y,
+                    'Z': pauli_z}
+        gates = []
+        for qubit, pauli_char in pauli_map.items():
+            operand = operand_map.get(pauli_char)
+            if operand is None:
+                raise ValueError('pauli string character must be one of I/X/Y/Z')
+            gates.append((operand, (qubit,)))
+        return gates
+
+    def expectation_operands(self, pauli_string):
+        #assign pauli string to qubit
+        #_get_forward_inverse_metadata()
+        input_bitstring = "0" * self.circuit.nqubits #Need all qubits!
+
+        input_operands = self._get_bitstring_tensors(input_bitstring)
+        pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))        
+        pauli_map = pauli_string
+        coned_qubits = pauli_map.keys()
+        
+        (
+            mode_labels,
+            qubits_frontier,
+            next_frontier,
+        ) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
+        
+        gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
+            self.gate_tensors, qubits_frontier, next_frontier
+        )
+        
+        operands = input_operands + gate_operands
+        mode_labels += gate_mode_labels
+        
+        self.init_inverse_circuit(self.circuit.invert())
+        
+        
+        next_frontier = max(qubits_frontier.values()) + 1
+
+        #input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
+
+        pauli_gates = self.get_pauli_gates(pauli_map, dtype=self.dtype, backend=self.backend)
+        
+        
+        gates_inverse = pauli_gates + self.gate_tensors_inverse
+        
+        gate_mode_labels_inverse, gate_operands_inverse = self._parse_gates_to_mode_labels_operands(
+            gates_inverse, qubits_frontier, next_frontier
+        )
+        mode_labels = mode_labels + gate_mode_labels_inverse + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
+        operands = operands + gate_operands_inverse + operands[:self.circuit.nqubits]
+        
+        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
+        
+        #expec = contract(*operand_exp_interleave)
+        #print(expec)
+
+        '''
+        gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates, 
+                                                                                         qubits_frontier, 
+                                                                                         next_frontier)
+        
+        mode_labels = input_mode_labels + gate_mode_labels + [[qubits_frontier[ix]] for ix in self.qubits]
+        operands = input_operands + gate_operands + input_operands[:n_qubits]
+
+        output_mode_labels = []
+        expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
+        '''
+        return operand_exp_interleave
\ No newline at end of file
diff --git a/src/qibotn/QiboCircuitToMPS.py b/src/qibotn/QiboCircuitToMPS.py
index d51093f..816b17c 100644
--- a/src/qibotn/QiboCircuitToMPS.py
+++ b/src/qibotn/QiboCircuitToMPS.py
@@ -21,7 +21,7 @@ class QiboCircuitToMPS:
         self.handle = cutn.create()
         self.dtype = dtype
         self.mps_tensors = initial(self.num_qubits, dtype=dtype)
-        circuitconvertor = QiboCircuitToEinsum(circ_qibo)
+        circuitconvertor = QiboCircuitToEinsum(circ_qibo, dtype=dtype)
 
         for gate, qubits in circuitconvertor.gate_tensors:
             # mapping from qubits to qubit indices
diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
index 9d39973..4b28431 100644
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -14,6 +14,13 @@ class QiboTNBackend(NumpyBackend):
             platform == "cu_tensornet"
             or platform == "cu_mps"
             or platform == "qu_tensornet"
+            or platform == "cu_tensornet_mpi"
+            or platform == "cu_tensornet_mpi_expectation"
+            or platform == "cu_tensornet_expectation"
+            or platform == "cu_tensornet_nccl"
+            or platform == "cu_tensornet_nccl_expectation"
+
+
         ):  # pragma: no cover
             self.platform = platform
         else:
@@ -71,6 +78,52 @@ class QiboTNBackend(NumpyBackend):
             init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
             init_state[0] = 1.0
             state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
+            
+        if self.platform == "cu_tensornet_mpi":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype,32)
+            if rank > 0:
+                state = np.array(0)
+             
+        if self.platform == "cu_tensornet_nccl":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_nccl(circuit, self.dtype,32)
+            if rank > 0:
+                state = np.array(0)
+        
+        if self.platform == "cu_tensornet_expectation":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+                
+            state = cutn.eval_expectation(circuit, self.dtype)
+        
+        if self.platform == "cu_tensornet_mpi_expectation":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            #state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype,32)
+            
+            if rank > 0:
+                state = np.array(0)
+
+        if self.platform == "cu_tensornet_nccl_expectation":
+            if initial_state is not None:
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            #state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype,32)
+            
+            if rank > 0:
+                state = np.array(0)
 
         if return_array:
             return state.flatten()
diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index eb0e0d4..67d70c4 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -13,6 +13,354 @@ def eval(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
+def eval_expectation(qibo_circ, datatype):
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    return contract(*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits)))
+
+def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
+    from mpi4py import MPI  # this line initializes MPI
+    import socket
+    from cuquantum import Network
+
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
+    root = 0
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    device_id = rank % getDeviceCount()
+    
+    
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    operands = myconvertor.state_vector_operands()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    
+    # Broadcast the operand data.
+    #operands = comm.bcast(operands, root)
+        
+    # Assign the device for each process.
+    device_id = rank % getDeviceCount()
+    
+    #dev = cp.cuda.Device(device_id)
+    #free_mem, total_mem = dev.mem_info
+    #print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
+
+    # Create network object.
+    network = Network(*operands, options={'device_id' : device_id})
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    #if rank == root:
+    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slices = range(slice_begin, slice_end)
+
+    #print(f"Process {rank} is processing slice range: {slices}.")
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+    #print(f"Process {rank} result shape is : {result.shape}.")
+    #print(f"Process {rank} result size is : {result.nbytes}.")
+
+    # Sum the partial contribution from each process on root.
+    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    
+    return result, rank
+
+def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
+    from mpi4py import MPI  # this line initializes MPI
+    import socket
+    from cuquantum import Network
+    from cupy.cuda import nccl
+
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
+    root = 0
+    comm_mpi = MPI.COMM_WORLD
+    rank = comm_mpi.Get_rank()
+    size = comm_mpi.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    device_id = rank % getDeviceCount()
+    
+    cp.cuda.Device(device_id).use()
+    
+    # Set up the NCCL communicator.
+    nccl_id = nccl.get_unique_id() if rank == root else None
+    nccl_id = comm_mpi.bcast(nccl_id, root)
+    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    operands = myconvertor.state_vector_operands()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+
+    network = Network(*operands)
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+
+    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    #if rank == root:
+    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm_mpi.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slices = range(slice_begin, slice_end)
+
+    #print(f"Process {rank} is processing slice range: {slices}.")
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+    #print(f"Process {rank} result shape is : {result.shape}.")
+    #print(f"Process {rank} result size is : {result.nbytes}.")
+
+    # Sum the partial contribution from each process on root.
+    stream_ptr = cp.cuda.get_current_stream().ptr
+    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
+    
+    return result, rank
+
+def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
+    from mpi4py import MPI  # this line initializes MPI
+    import socket
+    from cuquantum import Network
+    from cupy.cuda import nccl
+
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
+    root = 0
+    comm_mpi = MPI.COMM_WORLD
+    rank = comm_mpi.Get_rank()
+    size = comm_mpi.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    device_id = rank % getDeviceCount()
+    
+    cp.cuda.Device(device_id).use()
+    
+    # Set up the NCCL communicator.
+    nccl_id = nccl.get_unique_id() if rank == root else None
+    nccl_id = comm_mpi.bcast(nccl_id, root)
+    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+
+    network = Network(*operands)
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+
+    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    #if rank == root:
+    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm_mpi.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slices = range(slice_begin, slice_end)
+
+    #print(f"Process {rank} is processing slice range: {slices}.")
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+    #print(f"Process {rank} result shape is : {result.shape}.")
+    #print(f"Process {rank} result size is : {result.nbytes}.")
+
+    # Sum the partial contribution from each process on root.
+    stream_ptr = cp.cuda.get_current_stream().ptr
+    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
+    
+    return result, rank
+
+
+def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
+    from mpi4py import MPI  # this line initializes MPI
+    import socket
+    from cuquantum import Network
+
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
+    root = 0
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    device_id = rank % getDeviceCount()
+    
+    
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    
+    # Broadcast the operand data.
+    #operands = comm.bcast(operands, root)
+        
+    # Assign the device for each process.
+    device_id = rank % getDeviceCount()
+    
+    #dev = cp.cuda.Device(device_id)
+    #free_mem, total_mem = dev.mem_info
+    #print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
+
+    # Create network object.
+    network = Network(*operands, options={'device_id' : device_id})
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    #if rank == root:
+    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slices = range(slice_begin, slice_end)
+
+    #print(f"Process {rank} is processing slice range: {slices}.")
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+    #print(f"Process {rank} result shape is : {result.shape}.")
+    #print(f"Process {rank} result size is : {result.nbytes}.")
+
+    # Sum the partial contribution from each process on root.
+    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    
+    return result, rank
+
+
+def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
+    from mpi4py import MPI  # this line initializes MPI
+    import socket
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
+    ncpu_threads = multiprocessing.cpu_count() // 2
+    
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    device_id = rank % getDeviceCount()
+    cp.cuda.Device(device_id).use()
+
+    handle = cutn.create()
+    network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft network opts",mem_avail, "rank =",rank)
+    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    operands_interleave = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+
+    # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
+    network = cutn.Network(*operands_interleave, options=network_opts)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
+    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft contract path",mem_avail, "rank =",rank)
+    # Execution: To execute the contraction using the optimal path found previously
+    #print("opt_cost",opt_info.opt_cost, "Process =",rank)
+
+    
+    num_slices = opt_info.num_slices#Andy
+    chunk, extra = num_slices // size, num_slices % size#Andy
+    slice_begin = rank * chunk + min(rank, extra)#Andy
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
+    slices = range(slice_begin, slice_end)#Andy
+    result = network.contract(slices=slices)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft contract",mem_avail, "rank =",rank)
+    cutn.destroy(handle)
+
+    return result, rank
 
 def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
@@ -22,29 +370,59 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     """
 
     from mpi4py import MPI  # this line initializes MPI
-
+    import socket
+    # Get the hostname
+    #hostname = socket.gethostname()
+    
     ncpu_threads = multiprocessing.cpu_count() // 2
-
+    
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
+    size = comm.Get_size()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
     cp.cuda.Device(device_id).use()
 
     handle = cutn.create()
-    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
     network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft network opts",mem_avail, "rank =",rank)
+    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands_interleave = myconvertor.state_vector_operands()
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
     network = cutn.Network(*operands_interleave, options=network_opts)
-    network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads})
-
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
+    network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft contract path",mem_avail, "rank =",rank)
     # Execution: To execute the contraction using the optimal path found previously
+    #print("opt_cost",opt_info.opt_cost, "Process =",rank)
+
+    '''
+    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+
+    num_slices = opt_info.num_slices#Andy
+    chunk, extra = num_slices // size, num_slices % size#Andy
+    slice_begin = rank * chunk + min(rank, extra)#Andy
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
+    slices = range(slice_begin, slice_end)#Andy
+    result = network.contract(slices=slices)
+    '''
     result = network.contract()
 
+    #mem_avail = cp.cuda.Device().mem_info[0]
+    #print("Mem avail: aft contract",mem_avail, "rank =",rank)
     cutn.destroy(handle)
 
     return result, rank
@@ -57,3 +435,19 @@ def eval_mps(qibo_circ, gate_algo, datatype):
     return mps_helper.contract_state_vector(
         myconvertor.mps_tensors, {"handle": myconvertor.handle}
     )
+
+def PauliStringGen(nqubits):
+    
+    if nqubits <= 0:
+        return "Invalid input. N should be a positive integer."
+
+    #characters = 'IXYZ'
+    characters = 'XXXZ'
+
+    result = ''
+
+    for i in range(nqubits):
+        char_to_add = characters[i % len(characters)]
+        result += char_to_add
+
+    return result
\ No newline at end of file

From 37212a388d09bda535ea1c0d6fc1a95a0ed9b783 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 24 Jan 2024 11:47:32 +0800
Subject: [PATCH 06/85] Format with black

---
 src/qibotn/QiboCircuitConvertor.py |  87 +++----
 src/qibotn/backends.py             |  41 ++--
 src/qibotn/cutn.py                 | 366 +++++++++++++++++------------
 3 files changed, 277 insertions(+), 217 deletions(-)

diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index 11aaa71..d3a0569 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -95,8 +95,7 @@ class QiboCircuitToEinsum:
             required_shape = self.op_shape_from_qubits(len(gate_qubits))
             self.gate_tensors.append(
                 (
-                    cp.asarray(gate.matrix(), dtype=self.dtype).reshape(
-                        required_shape),
+                    cp.asarray(gate.matrix(), dtype=self.dtype).reshape(required_shape),
                     gate_qubits,
                 )
             )
@@ -111,7 +110,6 @@ class QiboCircuitToEinsum:
 
         self.basis_map = {"0": state_0, "1": state_1}
 
-
     def init_inverse_circuit(self, circuit):
         self.gate_tensors_inverse = []
         gates_qubits_inverse = []
@@ -132,14 +130,13 @@ class QiboCircuitToEinsum:
 
         # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
         self.active_qubits_inverse = np.unique(gates_qubits_inverse)
-        
-        
-    def get_pauli_gates(self, pauli_map, dtype='complex128', backend=cp):
+
+    def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
         """
         Populate the gates for all pauli operators.
 
         Args:
-            pauli_map: A dictionary mapping qubits to pauli operators. 
+            pauli_map: A dictionary mapping qubits to pauli operators.
             dtype: Data type for the tensor operands.
             backend: The package the tensor operands belong to.
 
@@ -147,70 +144,74 @@ class QiboCircuitToEinsum:
             A sequence of pauli gates.
         """
         asarray = backend.asarray
-        pauli_i = asarray([[1,0], [0,1]], dtype=dtype)
-        pauli_x = asarray([[0,1], [1,0]], dtype=dtype)
-        pauli_y = asarray([[0,-1j], [1j,0]], dtype=dtype)
-        pauli_z = asarray([[1,0], [0,-1]], dtype=dtype)
-        
-        operand_map = {'I': pauli_i,
-                    'X': pauli_x,
-                    'Y': pauli_y,
-                    'Z': pauli_z}
+        pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
+        pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
+        pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
+        pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
+
+        operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
         gates = []
         for qubit, pauli_char in pauli_map.items():
             operand = operand_map.get(pauli_char)
             if operand is None:
-                raise ValueError('pauli string character must be one of I/X/Y/Z')
+                raise ValueError("pauli string character must be one of I/X/Y/Z")
             gates.append((operand, (qubit,)))
         return gates
 
     def expectation_operands(self, pauli_string):
-        #assign pauli string to qubit
-        #_get_forward_inverse_metadata()
-        input_bitstring = "0" * self.circuit.nqubits #Need all qubits!
+        # assign pauli string to qubit
+        # _get_forward_inverse_metadata()
+        input_bitstring = "0" * self.circuit.nqubits  # Need all qubits!
 
         input_operands = self._get_bitstring_tensors(input_bitstring)
-        pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))        
+        pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
         pauli_map = pauli_string
         coned_qubits = pauli_map.keys()
-        
+
         (
             mode_labels,
             qubits_frontier,
             next_frontier,
         ) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
-        
+
         gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
             self.gate_tensors, qubits_frontier, next_frontier
         )
-        
+
         operands = input_operands + gate_operands
         mode_labels += gate_mode_labels
-        
+
         self.init_inverse_circuit(self.circuit.invert())
-        
-        
+
         next_frontier = max(qubits_frontier.values()) + 1
 
-        #input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
+        # input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
+
+        pauli_gates = self.get_pauli_gates(
+            pauli_map, dtype=self.dtype, backend=self.backend
+        )
 
-        pauli_gates = self.get_pauli_gates(pauli_map, dtype=self.dtype, backend=self.backend)
-        
-        
         gates_inverse = pauli_gates + self.gate_tensors_inverse
-        
-        gate_mode_labels_inverse, gate_operands_inverse = self._parse_gates_to_mode_labels_operands(
+
+        (
+            gate_mode_labels_inverse,
+            gate_operands_inverse,
+        ) = self._parse_gates_to_mode_labels_operands(
             gates_inverse, qubits_frontier, next_frontier
         )
-        mode_labels = mode_labels + gate_mode_labels_inverse + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
-        operands = operands + gate_operands_inverse + operands[:self.circuit.nqubits]
-        
-        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
-        
-        #expec = contract(*operand_exp_interleave)
-        #print(expec)
+        mode_labels = (
+            mode_labels
+            + gate_mode_labels_inverse
+            + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
+        )
+        operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]
 
-        '''
+        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
+
+        # expec = contract(*operand_exp_interleave)
+        # print(expec)
+
+        """
         gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates, 
                                                                                          qubits_frontier, 
                                                                                          next_frontier)
@@ -220,5 +221,5 @@ class QiboCircuitToEinsum:
 
         output_mode_labels = []
         expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
-        '''
-        return operand_exp_interleave
\ No newline at end of file
+        """
+        return operand_exp_interleave
diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
index 4b28431..3728a99 100644
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -19,8 +19,6 @@ class QiboTNBackend(NumpyBackend):
             or platform == "cu_tensornet_expectation"
             or platform == "cu_tensornet_nccl"
             or platform == "cu_tensornet_nccl_expectation"
-
-
         ):  # pragma: no cover
             self.platform = platform
         else:
@@ -72,45 +70,44 @@ class QiboTNBackend(NumpyBackend):
             state = cutn.eval_mps(circuit, gate_algo, self.dtype)
 
         if self.platform == "qu_tensornet":
-            
-            #init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
-            #init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
+            # init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
+            # init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
             init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
             init_state[0] = 1.0
             state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
-            
+
         if self.platform == "cu_tensornet_mpi":
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype,32)
+            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
-             
+
         if self.platform == "cu_tensornet_nccl":
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_nccl(circuit, self.dtype,32)
+            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_nccl(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
-        
+
         if self.platform == "cu_tensornet_expectation":
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-                
+
             state = cutn.eval_expectation(circuit, self.dtype)
-        
+
         if self.platform == "cu_tensornet_mpi_expectation":
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            #state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype,32)
-            
+            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            # state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype, 32)
+
             if rank > 0:
                 state = np.array(0)
 
@@ -118,10 +115,10 @@ class QiboTNBackend(NumpyBackend):
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            #state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            #state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype,32)
-            
+            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
+            # state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
+            state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype, 32)
+
             if rank > 0:
                 state = np.array(0)
 
diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index 67d70c4..aca33ff 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -13,9 +13,13 @@ def eval(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
+
 def eval_expectation(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    return contract(*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits)))
+    return contract(
+        *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    )
+
 
 def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
@@ -23,73 +27,79 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
     from cuquantum import Network
 
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     root = 0
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
-    
-    
+
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.state_vector_operands()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
-    
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+
     # Broadcast the operand data.
-    #operands = comm.bcast(operands, root)
-        
+    # operands = comm.bcast(operands, root)
+
     # Assign the device for each process.
     device_id = rank % getDeviceCount()
-    
-    #dev = cp.cuda.Device(device_id)
-    #free_mem, total_mem = dev.mem_info
-    #print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
+
+    # dev = cp.cuda.Device(device_id)
+    # free_mem, total_mem = dev.mem_info
+    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
 
     # Create network object.
-    network = Network(*operands, options={'device_id' : device_id})
+    network = Network(*operands, options={"device_id": device_id})
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
-    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
+    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    #if rank == root:
+    # if rank == root:
     #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
 
     # Broadcast info from the sender to all other ranks.
     info = comm.bcast(info, sender)
 
     # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
 
     # Calculate this process's share of the slices.
     num_slices = info.num_slices
     chunk, extra = num_slices // size, num_slices % size
     slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
     slices = range(slice_begin, slice_end)
 
-    #print(f"Process {rank} is processing slice range: {slices}.")
+    # print(f"Process {rank} is processing slice range: {slices}.")
 
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    #print(f"Process {rank} result shape is : {result.shape}.")
-    #print(f"Process {rank} result size is : {result.nbytes}.")
+    # print(f"Process {rank} result shape is : {result.shape}.")
+    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
-    
+
     return result, rank
 
+
 def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
@@ -97,18 +107,18 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
     from cupy.cuda import nccl
 
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     root = 0
     comm_mpi = MPI.COMM_WORLD
     rank = comm_mpi.Get_rank()
     size = comm_mpi.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
-    
+
     cp.cuda.Device(device_id).use()
-    
+
     # Set up the NCCL communicator.
     nccl_id = nccl.get_unique_id() if rank == root else None
     nccl_id = comm_mpi.bcast(nccl_id, root)
@@ -116,51 +126,66 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.state_vector_operands()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     network = Network(*operands)
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
 
-    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    #if rank == root:
+    # if rank == root:
     #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
 
     # Broadcast info from the sender to all other ranks.
     info = comm_mpi.bcast(info, sender)
 
     # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
 
     # Calculate this process's share of the slices.
     num_slices = info.num_slices
     chunk, extra = num_slices // size, num_slices % size
     slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
     slices = range(slice_begin, slice_end)
 
-    #print(f"Process {rank} is processing slice range: {slices}.")
+    # print(f"Process {rank} is processing slice range: {slices}.")
 
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    #print(f"Process {rank} result shape is : {result.shape}.")
-    #print(f"Process {rank} result size is : {result.nbytes}.")
+    # print(f"Process {rank} result shape is : {result.shape}.")
+    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
-    
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )
+
     return result, rank
 
+
 def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
@@ -168,18 +193,18 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
     from cupy.cuda import nccl
 
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     root = 0
     comm_mpi = MPI.COMM_WORLD
     rank = comm_mpi.Get_rank()
     size = comm_mpi.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
-    
+
     cp.cuda.Device(device_id).use()
-    
+
     # Set up the NCCL communicator.
     nccl_id = nccl.get_unique_id() if rank == root else None
     nccl_id = comm_mpi.bcast(nccl_id, root)
@@ -187,50 +212,64 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
 
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     network = Network(*operands)
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
 
-    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    #if rank == root:
+    # if rank == root:
     #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
 
     # Broadcast info from the sender to all other ranks.
     info = comm_mpi.bcast(info, sender)
 
     # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
 
     # Calculate this process's share of the slices.
     num_slices = info.num_slices
     chunk, extra = num_slices // size, num_slices % size
     slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
     slices = range(slice_begin, slice_end)
 
-    #print(f"Process {rank} is processing slice range: {slices}.")
+    # print(f"Process {rank} is processing slice range: {slices}.")
 
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    #print(f"Process {rank} result shape is : {result.shape}.")
-    #print(f"Process {rank} result size is : {result.nbytes}.")
+    # print(f"Process {rank} result shape is : {result.shape}.")
+    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
-    
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )
+
     return result, rank
 
 
@@ -240,128 +279,144 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
     from cuquantum import Network
 
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     root = 0
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
-    
-    
+
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
-    
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+
     # Broadcast the operand data.
-    #operands = comm.bcast(operands, root)
-        
+    # operands = comm.bcast(operands, root)
+
     # Assign the device for each process.
     device_id = rank % getDeviceCount()
-    
-    #dev = cp.cuda.Device(device_id)
-    #free_mem, total_mem = dev.mem_info
-    #print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
+
+    # dev = cp.cuda.Device(device_id)
+    # free_mem, total_mem = dev.mem_info
+    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
 
     # Create network object.
-    network = Network(*operands, options={'device_id' : device_id})
+    network = Network(*operands, options={"device_id": device_id})
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
-    #print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
+    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    #if rank == root:
+    # if rank == root:
     #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
 
     # Broadcast info from the sender to all other ranks.
     info = comm.bcast(info, sender)
 
     # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
 
     # Calculate this process's share of the slices.
     num_slices = info.num_slices
     chunk, extra = num_slices // size, num_slices % size
     slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
     slices = range(slice_begin, slice_end)
 
-    #print(f"Process {rank} is processing slice range: {slices}.")
+    # print(f"Process {rank} is processing slice range: {slices}.")
 
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    #print(f"Process {rank} result shape is : {result.shape}.")
-    #print(f"Process {rank} result size is : {result.nbytes}.")
+    # print(f"Process {rank} result shape is : {result.shape}.")
+    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
-    
+
     return result, rank
 
 
 def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
+
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     ncpu_threads = multiprocessing.cpu_count() // 2
-    
+
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
     cp.cuda.Device(device_id).use()
 
     handle = cutn.create()
     network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft network opts",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft network opts",mem_avail, "rank =",rank)
     cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    operands_interleave = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    operands_interleave = myconvertor.expectation_operands(
+        PauliStringGen(qibo_circ.nqubits)
+    )
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
     network = cutn.Network(*operands_interleave, options=network_opts)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft contract path",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
+    path, opt_info = network.contract_path(
+        optimize={
+            "samples": n_samples,
+            "threads": ncpu_threads,
+            "slicing": {"min_slices": max(16, size)},
+        }
+    )
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
     # Execution: To execute the contraction using the optimal path found previously
-    #print("opt_cost",opt_info.opt_cost, "Process =",rank)
+    # print("opt_cost",opt_info.opt_cost, "Process =",rank)
 
-    
-    num_slices = opt_info.num_slices#Andy
-    chunk, extra = num_slices // size, num_slices % size#Andy
-    slice_begin = rank * chunk + min(rank, extra)#Andy
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
-    slices = range(slice_begin, slice_end)#Andy
+    num_slices = opt_info.num_slices  # Andy
+    chunk, extra = num_slices // size, num_slices % size  # Andy
+    slice_begin = rank * chunk + min(rank, extra)  # Andy
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )  # Andy
+    slices = range(slice_begin, slice_end)  # Andy
     result = network.contract(slices=slices)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft contract",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft contract",mem_avail, "rank =",rank)
     cutn.destroy(handle)
 
     return result, rank
 
+
 def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
     The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
@@ -371,45 +426,52 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
 
     from mpi4py import MPI  # this line initializes MPI
     import socket
+
     # Get the hostname
-    #hostname = socket.gethostname()
-    
+    # hostname = socket.gethostname()
+
     ncpu_threads = multiprocessing.cpu_count() // 2
-    
+
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
     device_id = rank % getDeviceCount()
     cp.cuda.Device(device_id).use()
 
     handle = cutn.create()
     network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft network opts",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft network opts",mem_avail, "rank =",rank)
     cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft convetor",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands_interleave = myconvertor.state_vector_operands()
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
     network = cutn.Network(*operands_interleave, options=network_opts)
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft contract path",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
+    network.contract_path(
+        optimize={
+            "samples": n_samples,
+            "threads": ncpu_threads,
+            "slicing": {"min_slices": max(16, size)},
+        }
+    )
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
     # Execution: To execute the contraction using the optimal path found previously
-    #print("opt_cost",opt_info.opt_cost, "Process =",rank)
+    # print("opt_cost",opt_info.opt_cost, "Process =",rank)
 
-    '''
+    """
     path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
 
     num_slices = opt_info.num_slices#Andy
@@ -418,16 +480,16 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
     slices = range(slice_begin, slice_end)#Andy
     result = network.contract(slices=slices)
-    '''
+    """
     result = network.contract()
 
-    #mem_avail = cp.cuda.Device().mem_info[0]
-    #print("Mem avail: aft contract",mem_avail, "rank =",rank)
+    # mem_avail = cp.cuda.Device().mem_info[0]
+    # print("Mem avail: aft contract",mem_avail, "rank =",rank)
     cutn.destroy(handle)
 
     return result, rank
 
-  
+
 def eval_mps(qibo_circ, gate_algo, datatype):
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
@@ -436,18 +498,18 @@ def eval_mps(qibo_circ, gate_algo, datatype):
         myconvertor.mps_tensors, {"handle": myconvertor.handle}
     )
 
+
 def PauliStringGen(nqubits):
-    
     if nqubits <= 0:
         return "Invalid input. N should be a positive integer."
 
-    #characters = 'IXYZ'
-    characters = 'XXXZ'
+    # characters = 'IXYZ'
+    characters = "XXXZ"
 
-    result = ''
+    result = ""
 
     for i in range(nqubits):
         char_to_add = characters[i % len(characters)]
         result += char_to_add
 
-    return result
\ No newline at end of file
+    return result

From b4b2fec1b3f21477023ff73d7b2d6c41f67bdc89 Mon Sep 17 00:00:00 2001
From: Liwei Yang <yang0345@e.ntu.edu.sg>
Date: Wed, 24 Jan 2024 17:50:54 +0800
Subject: [PATCH 07/85] Add CPU and GPU into backends

---
 src/qibotn/backends/__init__.py |   2 +
 src/qibotn/backends/cpu.py      | 302 ++++++++++++++++++++++++++++++++
 src/qibotn/backends/gpu.py      |  39 +++++
 3 files changed, 343 insertions(+)
 create mode 100644 src/qibotn/backends/__init__.py
 create mode 100644 src/qibotn/backends/cpu.py
 create mode 100644 src/qibotn/backends/gpu.py

diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
new file mode 100644
index 0000000..ebc3a20
--- /dev/null
+++ b/src/qibotn/backends/__init__.py
@@ -0,0 +1,2 @@
+from qibotn.backends.cpu import NumbaBackend
+from qibotn.backends.gpu import CuTensorNet
diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
new file mode 100644
index 0000000..22afcdc
--- /dev/null
+++ b/src/qibotn/backends/cpu.py
@@ -0,0 +1,302 @@
+import numpy as np
+from qibo.backends.numpy import NumpyBackend
+from qibo.config import log
+from qibo.gates.abstract import ParametrizedGate
+from qibo.gates.channels import ReadoutErrorChannel
+from qibo.gates.special import FusedGate
+
+from qibojit.backends.matrices import CustomMatrices
+
+GATE_OPS = {
+    "X": "apply_x",
+    "CNOT": "apply_x",
+    "TOFFOLI": "apply_x",
+    "Y": "apply_y",
+    "Z": "apply_z",
+    "CZ": "apply_z",
+    "U1": "apply_z_pow",
+    "CU1": "apply_z_pow",
+    "SWAP": "apply_swap",
+    "fSim": "apply_fsim",
+    "GeneralizedfSim": "apply_fsim",
+}
+
+
+class NumbaBackend(NumpyBackend):
+    def __init__(self):
+        super().__init__()
+        import sys
+
+        import psutil
+        from numba import __version__ as numba_version
+
+        from qibotn import __version__ as qibotn_version
+
+        self.name = "qibotn"
+        self.platform = "numba"
+        self.versions.update(
+            {
+                "qibotn": qibotn_version,
+                "numba": numba_version,
+            }
+        )
+        self.numeric_types = (
+            int,
+            float,
+            complex,
+            np.int32,
+            np.int64,
+            np.float32,
+            np.float64,
+            np.complex64,
+            np.complex128,
+        )
+        self.tensor_types = (np.ndarray,)
+        self.device = "/CPU:0"
+        self.custom_matrices = CustomMatrices(self.dtype)
+        self.gates = gates
+        self.ops = ops
+        self.measure_frequencies_op = ops.measure_frequencies
+        self.multi_qubit_kernels = {
+            3: self.gates.apply_three_qubit_gate_kernel,
+            4: self.gates.apply_four_qubit_gate_kernel,
+            5: self.gates.apply_five_qubit_gate_kernel,
+        }
+        if sys.platform == "darwin":  # pragma: no cover
+            self.set_threads(psutil.cpu_count(logical=False))
+        else:
+            self.set_threads(len(psutil.Process().cpu_affinity()))
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+            if self.custom_matrices:
+                self.custom_matrices = CustomMatrices(self.dtype)
+
+    def set_threads(self, nthreads):
+        import numba
+
+        numba.set_num_threads(nthreads)
+        self.nthreads = nthreads
+
+    # def cast(self, x, dtype=None, copy=False): Inherited from ``NumpyBackend``
+
+    # def to_numpy(self, x): Inherited from ``NumpyBackend``
+
+    def zero_state(self, nqubits):
+        size = 2**nqubits
+        state = np.empty((size,), dtype=self.dtype)
+        return self.ops.initial_state_vector(state)
+
+    def zero_density_matrix(self, nqubits):
+        size = 2**nqubits
+        state = np.empty((size, size), dtype=self.dtype)
+        return self.ops.initial_density_matrix(state)
+
+    # def plus_state(self, nqubits): Inherited from ``NumpyBackend``
+
+    # def plus_density_matrix(self, nqubits): Inherited from ``NumpyBackend``
+
+    # def asmatrix_special(self, gate): Inherited from ``NumpyBackend``
+
+    # def control_matrix(self, gate): Inherited from ``NumpyBackend``
+
+    def one_qubit_base(self, state, nqubits, target, kernel, gate, qubits):
+        ncontrols = len(qubits) - 1 if qubits is not None else 0
+        m = nqubits - target - 1
+        nstates = 1 << (nqubits - ncontrols - 1)
+        if ncontrols:
+            kernel = getattr(self.gates, "multicontrol_{}_kernel".format(kernel))
+            return kernel(state, gate, qubits, nstates, m)
+        kernel = getattr(self.gates, "{}_kernel".format(kernel))
+        return kernel(state, gate, nstates, m)
+
+    def two_qubit_base(self, state, nqubits, target1, target2, kernel, gate, qubits):
+        ncontrols = len(qubits) - 2 if qubits is not None else 0
+        if target1 > target2:
+            swap_targets = True
+            m1 = nqubits - target1 - 1
+            m2 = nqubits - target2 - 1
+        else:
+            swap_targets = False
+            m1 = nqubits - target2 - 1
+            m2 = nqubits - target1 - 1
+        nstates = 1 << (nqubits - 2 - ncontrols)
+        if ncontrols:
+            kernel = getattr(self.gates, "multicontrol_{}_kernel".format(kernel))
+            return kernel(state, gate, qubits, nstates, m1, m2, swap_targets)
+        kernel = getattr(self.gates, "{}_kernel".format(kernel))
+        return kernel(state, gate, nstates, m1, m2, swap_targets)
+
+    def multi_qubit_base(self, state, nqubits, targets, gate, qubits):
+        if qubits is None:
+            qubits = np.array(sorted(nqubits - q - 1 for q in targets), dtype="int32")
+        nstates = 1 << (nqubits - len(qubits))
+        targets = np.array(
+            [1 << (nqubits - t - 1) for t in targets[::-1]], dtype="int64"
+        )
+        if len(targets) > 5:
+            kernel = self.gates.apply_multi_qubit_gate_kernel
+        else:
+            kernel = self.multi_qubit_kernels.get(len(targets))
+        return kernel(state, gate, qubits, nstates, targets)
+
+    @staticmethod
+    def _create_qubits_tensor(gate, nqubits):
+        # TODO: Treat density matrices
+        qubits = [nqubits - q - 1 for q in gate.control_qubits]
+        qubits.extend(nqubits - q - 1 for q in gate.target_qubits)
+        return np.array(sorted(qubits), dtype="int32")
+
+    def _as_custom_matrix(self, gate):
+        name = gate.__class__.__name__
+        if isinstance(gate, ParametrizedGate):
+            return getattr(self.custom_matrices, name)(*gate.parameters)
+        elif isinstance(gate, FusedGate):  # pragma: no cover
+            # fusion is tested in qibo tests
+            return self.asmatrix_fused(gate)
+        else:
+            return getattr(self.custom_matrices, name)
+
+    def apply_gate(self, gate, state, nqubits):
+        matrix = self._as_custom_matrix(gate)
+        qubits = self._create_qubits_tensor(gate, nqubits)
+        targets = gate.target_qubits
+        state = self.cast(state)
+        if len(targets) == 1:
+            op = GATE_OPS.get(gate.__class__.__name__, "apply_gate")
+            return self.one_qubit_base(state, nqubits, *targets, op, matrix, qubits)
+        elif len(targets) == 2:
+            op = GATE_OPS.get(gate.__class__.__name__, "apply_two_qubit_gate")
+            return self.two_qubit_base(state, nqubits, *targets, op, matrix, qubits)
+        else:
+            return self.multi_qubit_base(state, nqubits, targets, matrix, qubits)
+
+    def apply_gate_density_matrix(self, gate, state, nqubits, inverse=False):
+        name = gate.__class__.__name__
+        if name == "Y":
+            return self._apply_ygate_density_matrix(gate, state, nqubits)
+        if inverse:
+            # used to reset the state when applying channels
+            # see :meth:`qibojit.backend.NumpyBackend.apply_channel_density_matrix` below
+            matrix = np.linalg.inv(gate.asmatrix(self))
+            matrix = self.cast(matrix)
+        else:
+            matrix = self._as_custom_matrix(gate)
+        qubits = self._create_qubits_tensor(gate, nqubits)
+        qubits_dm = qubits + nqubits
+        targets = gate.target_qubits
+        targets_dm = tuple(q + nqubits for q in targets)
+
+        state = self.cast(state)
+        shape = state.shape
+        if len(targets) == 1:
+            op = GATE_OPS.get(name, "apply_gate")
+            state = self.one_qubit_base(
+                state.ravel(), 2 * nqubits, *targets, op, matrix, qubits_dm
+            )
+            state = self.one_qubit_base(
+                state, 2 * nqubits, *targets_dm, op, np.conj(matrix), qubits
+            )
+        elif len(targets) == 2:
+            op = GATE_OPS.get(name, "apply_two_qubit_gate")
+            state = self.two_qubit_base(
+                state.ravel(), 2 * nqubits, *targets, op, matrix, qubits_dm
+            )
+            state = self.two_qubit_base(
+                state, 2 * nqubits, *targets_dm, op, np.conj(matrix), qubits
+            )
+        else:
+            state = self.multi_qubit_base(
+                state.ravel(), 2 * nqubits, targets, matrix, qubits_dm
+            )
+            state = self.multi_qubit_base(
+                state, 2 * nqubits, targets_dm, np.conj(matrix), qubits
+            )
+        return np.reshape(state, shape)
+
+    def _apply_ygate_density_matrix(self, gate, state, nqubits):
+        matrix = self._as_custom_matrix(gate)
+        qubits = self._create_qubits_tensor(gate, nqubits)
+        qubits_dm = qubits + nqubits
+        targets = gate.target_qubits
+        targets_dm = tuple(q + nqubits for q in targets)
+        state = self.cast(state)
+        shape = state.shape
+        state = self.one_qubit_base(
+            state.ravel(), 2 * nqubits, *targets, "apply_y", matrix, qubits_dm
+        )
+        # force using ``apply_gate`` kernel so that conjugate is properly applied
+        state = self.one_qubit_base(
+            state, 2 * nqubits, *targets_dm, "apply_gate", np.conj(matrix), qubits
+        )
+        return np.reshape(state, shape)
+
+    # def apply_channel(self, gate): Inherited from ``NumpyBackend``
+
+    def apply_channel_density_matrix(self, channel, state, nqubits):
+        state = self.cast(state)
+        if isinstance(channel, ReadoutErrorChannel) is True:
+            state_copy = self.cast(state, copy=True)
+        new_state = (1 - channel.coefficient_sum) * state
+        for coeff, gate in zip(channel.coefficients, channel.gates):
+            state = self.apply_gate_density_matrix(gate, state, nqubits)
+            new_state += coeff * state
+            # reset the state
+            if isinstance(channel, ReadoutErrorChannel) is True:
+                state = self.cast(state_copy, copy=True)
+            else:
+                state = self.apply_gate_density_matrix(
+                    gate, state, nqubits, inverse=True
+                )
+        return new_state
+
+    def collapse_state(self, state, qubits, shot, nqubits, normalize=True):
+        state = self.cast(state)
+        qubits = self.cast([nqubits - q - 1 for q in reversed(qubits)], dtype="int32")
+        if normalize:
+            return self.ops.collapse_state_normalized(state, qubits, int(shot), nqubits)
+        else:
+            return self.ops.collapse_state(state, qubits, int(shot), nqubits)
+
+    def collapse_density_matrix(self, state, qubits, shot, nqubits, normalize=True):
+        state = self.cast(state)
+        shape = state.shape
+        dm_qubits = [q + nqubits for q in qubits]
+        state = self.collapse_state(state.ravel(), dm_qubits, shot, 2 * nqubits, False)
+        state = self.collapse_state(state, qubits, shot, 2 * nqubits, False)
+        state = self.np.reshape(state, shape)
+        if normalize:
+            state = state / self.np.trace(state)
+        return state
+
+    # def calculate_probabilities(self, state, qubits, nqubits): Inherited from ``NumpyBackend``
+
+    # def sample_shots(self, probabilities, nshots): Inherited from ``NumpyBackend``
+
+    # def aggregate_shots(self, shots): Inherited from ``NumpyBackend``
+
+    # def samples_to_binary(self, samples, nqubits): Inherited from ``NumpyBackend``
+
+    # def samples_to_decimal(self, samples, nqubits): Inherited from ``NumpyBackend``
+
+    def sample_frequencies(self, probabilities, nshots):
+        from qibo.config import SHOT_METROPOLIS_THRESHOLD
+
+        if nshots < SHOT_METROPOLIS_THRESHOLD:
+            return super().sample_frequencies(probabilities, nshots)
+
+        import collections
+
+        seed = np.random.randint(0, int(1e8), dtype="int64")
+        nqubits = int(np.log2(tuple(probabilities.shape)[0]))
+        frequencies = np.zeros(2**nqubits, dtype="int64")
+        # always fall back to numba CPU backend because for ops not implemented on GPU
+        frequencies = self.measure_frequencies_op(
+            frequencies, probabilities, nshots, nqubits, seed, self.nthreads
+        )
+        return collections.Counter({i: f for i, f in enumerate(frequencies) if f > 0})
+
+    # def calculate_frequencies(self, samples): Inherited from ``NumpyBackend``
+
+    # def assert_allclose(self, value, target, rtol=1e-7, atol=0.0): Inherited from ``NumpyBackend``
diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
new file mode 100644
index 0000000..bca295d
--- /dev/null
+++ b/src/qibotn/backends/gpu.py
@@ -0,0 +1,39 @@
+from qibo.backends.numpy import NumpyBackend
+
+
+class CuTensorNet(NumpyBackend):  # pragma: no cover
+    # CI does not test for GPU
+
+    def __init__(self):
+        super().__init__()
+        import cuquantum  # pylint: disable=import-error
+        from cuquantum import cutensornet as cutn  # pylint: disable=import-error
+
+        self.cuquantum = cuquantum
+        self.cutn = cutn
+        self.platform = "cutensornet"
+        self.versions["cuquantum"] = self.cuquantum.__version__
+        self.supports_multigpu = True
+        self.handle = self.cutn.create()
+
+    def __del__(self):
+        if hasattr(self, "cutn"):
+            self.cutn.destroy(self.handle)
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+
+    def get_cuda_type(self, dtype="complex64"):
+        if dtype == "complex128":
+            return (
+                self.cuquantum.cudaDataType.CUDA_C_64F,
+                self.cuquantum.ComputeType.COMPUTE_64F,
+            )
+        elif dtype == "complex64":
+            return (
+                self.cuquantum.cudaDataType.CUDA_C_32F,
+                self.cuquantum.ComputeType.COMPUTE_32F,
+            )
+        else:
+            raise TypeError("Type can be either complex64 or complex128")

From f8c2b88dc08b4171b911ec6839be21851fa9aed0 Mon Sep 17 00:00:00 2001
From: Liwei Yang <yang0345@e.ntu.edu.sg>
Date: Fri, 26 Jan 2024 17:56:11 +0800
Subject: [PATCH 08/85] Finish CuTensorNet class for cuQuantum cutensornet

---
 src/qibotn/backends/gpu.py | 144 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 143 insertions(+), 1 deletion(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index bca295d..e44b666 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -1,21 +1,38 @@
+import numpy as np
+
 from qibo.backends.numpy import NumpyBackend
+from qibo.result import CircuitResult
+from qibo.config import raise_error
 
 
 class CuTensorNet(NumpyBackend):  # pragma: no cover
     # CI does not test for GPU
 
-    def __init__(self):
+    def __init__(self, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False):
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
 
+        self.name = "qibotn"
         self.cuquantum = cuquantum
         self.cutn = cutn
         self.platform = "cutensornet"
         self.versions["cuquantum"] = self.cuquantum.__version__
         self.supports_multigpu = True
+        self.MPI_enabled = MPI_enabled
+        self.MPS_enabled = MPS_enabled
+        self.NCCL_enabled = NCCL_enabled
         self.handle = self.cutn.create()
 
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
     def __del__(self):
         if hasattr(self, "cutn"):
             self.cutn.destroy(self.handle)
@@ -37,3 +54,128 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             )
         else:
             raise TypeError("Type can be either complex64 or complex128")
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+
+        """
+
+        import qibotn.cutn
+
+        cutn = qibotn.cutn
+        MPI_enabled = self.MPI_enabled
+        MPS_enabled = self.MPS_enabled
+        NCCL_enabled = self.NCCL_enabled
+
+        if (
+            MPI_enabled == False
+            and MPS_enabled == False
+            and NCCL_enabled == False
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state = cutn.eval(circuit, self.dtype)
+
+        if (
+            MPI_enabled == False
+            and MPS_enabled == True
+            and NCCL_enabled == False
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            gate_algo = {
+                "qr_method": False,
+                "svd_method": {
+                    "partition": "UV",
+                    "abs_cutoff": 1e-12,
+                },
+            }  # make this user input
+            state = cutn.eval_mps(circuit, gate_algo, self.dtype)
+
+        if (
+            MPI_enabled == True
+            and MPS_enabled == False
+            and NCCL_enabled == False
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype, 32)
+            if rank > 0:
+                state = np.array(0)
+
+        if (
+            MPI_enabled == False
+            and MPS_enabled == False
+            and NCCL_enabled == True
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state, rank = cutn.eval_tn_nccl(circuit, self.dtype, 32)
+            if rank > 0:
+                state = np.array(0)
+
+        if (
+            MPI_enabled == False
+            and MPS_enabled == False
+            and NCCL_enabled == False
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state = cutn.eval_expectation(circuit, self.dtype)
+
+        if (
+            MPI_enabled == True
+            and MPS_enabled == False
+            and NCCL_enabled == False
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state, rank = cutn.eval_tn_MPI_2_expectation(
+                circuit, self.dtype, 32)
+
+            if rank > 0:
+                state = np.array(0)
+
+        if (
+            MPI_enabled == False
+            and MPS_enabled == False
+            and NCCL_enabled == True
+        ):
+            if initial_state is not None:
+                raise_error(NotImplementedError,
+                            "QiboTN cannot support initial state.")
+
+            state, rank = cutn.eval_tn_nccl_expectation(
+                circuit, self.dtype, 32)
+
+            if rank > 0:
+                state = np.array(0)
+
+        if return_array:
+            return state.flatten()
+        else:
+            circuit._final_state = CircuitResult(
+                self, circuit, state.flatten(), nshots)
+            return circuit._final_state

From d3c9266dcfa9fd5ad9376c11aaafe2a3d2bb67fc Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:12:09 +0800
Subject: [PATCH 09/85] Rename function call

---
 src/qibotn/backends/gpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index e44b666..e2087db 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -115,7 +115,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype, 32)
+            state, rank = cutn.eval_tn_MPI(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
 
@@ -152,7 +152,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_MPI_2_expectation(
+            state, rank = cutn.eval_tn_MPI_expectation(
                 circuit, self.dtype, 32)
 
             if rank > 0:

From 293af81b43879465f1027dcd6f7ae80be85570dc Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:22:39 +0800
Subject: [PATCH 10/85] Remove eval_tn_mpi

---
 src/qibotn/cutn.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index aca33ff..e4ff2a6 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -22,6 +22,12 @@ def eval_expectation(qibo_circ, datatype):
 
 
 def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
+    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
+    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
+    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """
+    
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -96,6 +102,17 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    
+    """
+    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+
+    num_slices = opt_info.num_slices#Andy
+    chunk, extra = num_slices // size, num_slices % size#Andy
+    slice_begin = rank * chunk + min(rank, extra)#Andy
+    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
+    slices = range(slice_begin, slice_end)#Andy
+    result = network.contract(slices=slices)
+    """
 
     return result, rank
 

From 1035629745341e35a50ca23b508bc375d016deaf Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:23:33 +0800
Subject: [PATCH 11/85] Remove eval_tn_mpi

---
 src/qibotn/cutn.py | 75 ----------------------------------------------
 1 file changed, 75 deletions(-)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index e4ff2a6..e37785b 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -433,80 +433,6 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
 
     return result, rank
 
-
-def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
-    """
-
-    from mpi4py import MPI  # this line initializes MPI
-    import socket
-
-    # Get the hostname
-    # hostname = socket.gethostname()
-
-    ncpu_threads = multiprocessing.cpu_count() // 2
-
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
-    device_id = rank % getDeviceCount()
-    cp.cuda.Device(device_id).use()
-
-    handle = cutn.create()
-    network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft network opts",mem_avail, "rank =",rank)
-    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
-    # Perform circuit conversion
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands_interleave = myconvertor.state_vector_operands()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
-
-    # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
-    network = cutn.Network(*operands_interleave, options=network_opts)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "threads": ncpu_threads,
-            "slicing": {"min_slices": max(16, size)},
-        }
-    )
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
-    # Execution: To execute the contraction using the optimal path found previously
-    # print("opt_cost",opt_info.opt_cost, "Process =",rank)
-
-    """
-    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
-
-    num_slices = opt_info.num_slices#Andy
-    chunk, extra = num_slices // size, num_slices % size#Andy
-    slice_begin = rank * chunk + min(rank, extra)#Andy
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
-    slices = range(slice_begin, slice_end)#Andy
-    result = network.contract(slices=slices)
-    """
-    result = network.contract()
-
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft contract",mem_avail, "rank =",rank)
-    cutn.destroy(handle)
-
-    return result, rank
-
-
 def eval_mps(qibo_circ, gate_algo, datatype):
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
@@ -515,7 +441,6 @@ def eval_mps(qibo_circ, gate_algo, datatype):
         myconvertor.mps_tensors, {"handle": myconvertor.handle}
     )
 
-
 def PauliStringGen(nqubits):
     if nqubits <= 0:
         return "Invalid input. N should be a positive integer."

From 66aaf0e073f55ee54c0795b81ebff8d26831f24f Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:24:11 +0800
Subject: [PATCH 12/85] Rename eval_tn_MPI_2 to eval_tn_MPI

---
 src/qibotn/cutn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index e37785b..84eda78 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -21,7 +21,7 @@ def eval_expectation(qibo_circ, datatype):
     )
 
 
-def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
+def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
     The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
     The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.

From fea2b1150f679a19d6cc852c85da41def0d47366 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:34:10 +0800
Subject: [PATCH 13/85] Remove eval_tn_MPI_expectation

---
 src/qibotn/cutn.py | 65 ----------------------------------------------
 1 file changed, 65 deletions(-)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index 84eda78..36d6a1c 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -368,71 +368,6 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
 
     return result, rank
 
-
-def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
-    from mpi4py import MPI  # this line initializes MPI
-    import socket
-
-    # Get the hostname
-    # hostname = socket.gethostname()
-
-    ncpu_threads = multiprocessing.cpu_count() // 2
-
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
-    device_id = rank % getDeviceCount()
-    cp.cuda.Device(device_id).use()
-
-    handle = cutn.create()
-    network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft network opts",mem_avail, "rank =",rank)
-    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
-    # Perform circuit conversion
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    operands_interleave = myconvertor.expectation_operands(
-        PauliStringGen(qibo_circ.nqubits)
-    )
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
-
-    # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
-    network = cutn.Network(*operands_interleave, options=network_opts)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    path, opt_info = network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "threads": ncpu_threads,
-            "slicing": {"min_slices": max(16, size)},
-        }
-    )
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
-    # Execution: To execute the contraction using the optimal path found previously
-    # print("opt_cost",opt_info.opt_cost, "Process =",rank)
-
-    num_slices = opt_info.num_slices  # Andy
-    chunk, extra = num_slices // size, num_slices % size  # Andy
-    slice_begin = rank * chunk + min(rank, extra)  # Andy
-    slice_end = (
-        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
-    )  # Andy
-    slices = range(slice_begin, slice_end)  # Andy
-    result = network.contract(slices=slices)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft contract",mem_avail, "rank =",rank)
-    cutn.destroy(handle)
-
-    return result, rank
-
 def eval_mps(qibo_circ, gate_algo, datatype):
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)

From 827c285656580f12d50e4ab42bf76f5942dedfff Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:34:47 +0800
Subject: [PATCH 14/85] Rename eval_tn_MPI_2_expectation to
 eval_tn_MPI_expectation

---
 src/qibotn/cutn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index 36d6a1c..3df40e1 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -290,7 +290,7 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
+def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network

From 6181161133680ef3056ac69c95cc415089f1e6fa Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:41:26 +0800
Subject: [PATCH 15/85] Format with black

---
 src/qibotn/cutn.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py
index 3df40e1..39cca9c 100644
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -27,7 +27,7 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
     After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
     """
-    
+
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -102,7 +102,7 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
-    
+
     """
     path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
 
@@ -368,6 +368,7 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
 
     return result, rank
 
+
 def eval_mps(qibo_circ, gate_algo, datatype):
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
@@ -376,6 +377,7 @@ def eval_mps(qibo_circ, gate_algo, datatype):
         myconvertor.mps_tensors, {"handle": myconvertor.handle}
     )
 
+
 def PauliStringGen(nqubits):
     if nqubits <= 0:
         return "Invalid input. N should be a positive integer."

From 44ac0648d510b58a7934ca8590cba1474fd030ab Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:43:28 +0800
Subject: [PATCH 16/85] Rename to better reflect function of file

---
 src/qibotn/backends/gpu.py      | 4 ++--
 src/qibotn/{cutn.py => eval.py} | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename src/qibotn/{cutn.py => eval.py} (100%)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index e2087db..96eaab3 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -70,9 +70,9 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         """
 
-        import qibotn.cutn
+        import qibotn.src.qibotn.eval
 
-        cutn = qibotn.cutn
+        cutn = qibotn.eval
         MPI_enabled = self.MPI_enabled
         MPS_enabled = self.MPS_enabled
         NCCL_enabled = self.NCCL_enabled
diff --git a/src/qibotn/cutn.py b/src/qibotn/eval.py
similarity index 100%
rename from src/qibotn/cutn.py
rename to src/qibotn/eval.py

From e11214060f1ff540443bd468d626c5eb02bdf959 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 10:55:22 +0800
Subject: [PATCH 17/85] Add condition to trigger expectation calculation

---
 src/qibotn/backends/gpu.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 96eaab3..2d28987 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -8,7 +8,7 @@ from qibo.config import raise_error
 class CuTensorNet(NumpyBackend):  # pragma: no cover
     # CI does not test for GPU
 
-    def __init__(self, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False):
+    def __init__(self, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False):
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
@@ -22,6 +22,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         self.MPI_enabled = MPI_enabled
         self.MPS_enabled = MPS_enabled
         self.NCCL_enabled = NCCL_enabled
+        self.expectation_enabled = expectation_enabled
         self.handle = self.cutn.create()
 
     def apply_gate(self, gate, state, nqubits):  # pragma: no cover
@@ -76,11 +77,13 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         MPI_enabled = self.MPI_enabled
         MPS_enabled = self.MPS_enabled
         NCCL_enabled = self.NCCL_enabled
+        expectation_enabled = self.expectation_enabled
 
         if (
             MPI_enabled == False
             and MPS_enabled == False
             and NCCL_enabled == False
+            and expectation_enabled == False
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -92,6 +95,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == False
             and MPS_enabled == True
             and NCCL_enabled == False
+            and expectation_enabled == False
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -110,6 +114,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == True
             and MPS_enabled == False
             and NCCL_enabled == False
+            and expectation_enabled == False
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -123,6 +128,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == False
             and MPS_enabled == False
             and NCCL_enabled == True
+            and expectation_enabled == False
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -136,6 +142,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == False
             and MPS_enabled == False
             and NCCL_enabled == False
+            and expectation_enabled == True
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -147,6 +154,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == True
             and MPS_enabled == False
             and NCCL_enabled == False
+            and expectation_enabled == True
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,
@@ -162,6 +170,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             MPI_enabled == False
             and MPS_enabled == False
             and NCCL_enabled == True
+            and expectation_enabled == True
         ):
             if initial_state is not None:
                 raise_error(NotImplementedError,

From 34125d131cbee4ac0d393727e20fbe0683706e89 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 14:37:33 +0800
Subject: [PATCH 18/85] Change import from cutn to eval [skip CI]

---
 tests/test_cuquantum_cutensor_backend.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py
index 3a87206..5802074 100644
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -32,7 +32,7 @@ def test_eval(nqubits: int, dtype="complex128"):
         dtype (str): The data type for precision, 'complex64' for single,
             'complex128' for double.
     """
-    import qibotn.cutn
+    import qibotn.eval
 
     # Test qibo
     qibo.set_backend(backend=config.qibo.backend,
@@ -59,7 +59,7 @@ def test_mps(nqubits: int, dtype="complex128"):
         dtype (str): The data type for precision, 'complex64' for single,
             'complex128' for double.
     """
-    import qibotn.cutn
+    import qibotn.eval
 
     # Test qibo
     qibo.set_backend(backend=config.qibo.backend,
@@ -78,7 +78,7 @@ def test_mps(nqubits: int, dtype="complex128"):
                  }}
 
     cutn_time, result_tn = time(
-        lambda: qibotn.cutn.eval_mps(circ_qibo, gate_algo, dtype).flatten())
+        lambda: qibotn.eval.eval_mps(circ_qibo, gate_algo, dtype).flatten())
 
     print(
         f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")

From 93331aa12c864a4a7d53f856836efd1ecdbaec52 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 15:40:11 +0800
Subject: [PATCH 19/85] Rename function name to be more descriptive [skip CI]

---
 src/qibotn/eval.py                       | 14 +++++++-------
 tests/test_cuquantum_cutensor_backend.py |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index 39cca9c..a283ff3 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -9,19 +9,19 @@ from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper
 
 
-def eval(qibo_circ, datatype):
+def dense_vector_tn(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
 
-def eval_expectation(qibo_circ, datatype):
+def expectation_tn(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
     )
 
 
-def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
+def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
     """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
     The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
     The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
@@ -117,7 +117,7 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
+def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -203,7 +203,7 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
+def expectation_tn_nccl(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -290,7 +290,7 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
+def expectation_tn_MPI(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -369,7 +369,7 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def eval_mps(qibo_circ, gate_algo, datatype):
+def dense_vector_mps(qibo_circ, gate_algo, datatype):
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
 
diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py
index 5802074..3de5c17 100644
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -42,7 +42,7 @@ def test_eval(nqubits: int, dtype="complex128"):
 
     # Test Cuquantum
     cutn_time, result_tn = time(
-        lambda: qibotn.cutn.eval(qibo_circ, dtype).flatten())
+        lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten())
 
     assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time
     assert np.allclose(
@@ -78,7 +78,7 @@ def test_mps(nqubits: int, dtype="complex128"):
                  }}
 
     cutn_time, result_tn = time(
-        lambda: qibotn.eval.eval_mps(circ_qibo, gate_algo, dtype).flatten())
+        lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten())
 
     print(
         f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")

From 391d25230706f9ea9be12315a3d96968f7c367ef Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 15:54:53 +0800
Subject: [PATCH 20/85] Update function call name [skip CI]

---
 src/qibotn/backends/gpu.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 2d28987..47f667f 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -71,9 +71,8 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         """
 
-        import qibotn.src.qibotn.eval
+        import qibotn.eval as eval
 
-        cutn = qibotn.eval
         MPI_enabled = self.MPI_enabled
         MPS_enabled = self.MPS_enabled
         NCCL_enabled = self.NCCL_enabled
@@ -89,7 +88,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state = cutn.eval(circuit, self.dtype)
+            state = eval.dense_vector_tn(circuit, self.dtype)
 
         if (
             MPI_enabled == False
@@ -108,7 +107,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                     "abs_cutoff": 1e-12,
                 },
             }  # make this user input
-            state = cutn.eval_mps(circuit, gate_algo, self.dtype)
+            state = eval.dense_vector_mps(circuit, gate_algo, self.dtype)
 
         if (
             MPI_enabled == True
@@ -120,7 +119,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_MPI(circuit, self.dtype, 32)
+            state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
 
@@ -134,7 +133,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_nccl(circuit, self.dtype, 32)
+            state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
 
@@ -148,7 +147,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state = cutn.eval_expectation(circuit, self.dtype)
+            state = eval.expectation_tn(circuit, self.dtype)
 
         if (
             MPI_enabled == True
@@ -160,7 +159,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_MPI_expectation(
+            state, rank = eval.expectation_tn_MPI(
                 circuit, self.dtype, 32)
 
             if rank > 0:
@@ -176,7 +175,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = cutn.eval_tn_nccl_expectation(
+            state, rank = eval.expectation_tn_nccl(
                 circuit, self.dtype, 32)
 
             if rank > 0:

From 9ae7dc18f53adfd3e0958020cbefc440934a1a6e Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 30 Jan 2024 17:49:30 +0800
Subject: [PATCH 21/85] Change execute_circuit inputs

---
 src/qibotn/backends/gpu.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 47f667f..5f41344 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -1,14 +1,14 @@
 import numpy as np
 
 from qibo.backends.numpy import NumpyBackend
-from qibo.result import CircuitResult
+from qibo.states import CircuitResult
 from qibo.config import raise_error
 
 
 class CuTensorNet(NumpyBackend):  # pragma: no cover
     # CI does not test for GPU
 
-    def __init__(self, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False):
+    def __init__(self):
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
@@ -19,10 +19,6 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         self.platform = "cutensornet"
         self.versions["cuquantum"] = self.cuquantum.__version__
         self.supports_multigpu = True
-        self.MPI_enabled = MPI_enabled
-        self.MPS_enabled = MPS_enabled
-        self.NCCL_enabled = NCCL_enabled
-        self.expectation_enabled = expectation_enabled
         self.handle = self.cutn.create()
 
     def apply_gate(self, gate, state, nqubits):  # pragma: no cover
@@ -57,7 +53,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             raise TypeError("Type can be either complex64 or complex128")
 
     def execute_circuit(
-        self, circuit, initial_state=None, nshots=None, return_array=False
+        self, circuit, test, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False, initial_state=None, nshots=None, return_array=False
     ):  # pragma: no cover
         """Executes a quantum circuit.
 
@@ -72,12 +68,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         """
 
         import qibotn.eval as eval
-
-        MPI_enabled = self.MPI_enabled
-        MPS_enabled = self.MPS_enabled
-        NCCL_enabled = self.NCCL_enabled
-        expectation_enabled = self.expectation_enabled
-
+        print("Test", test)
         if (
             MPI_enabled == False
             and MPS_enabled == False

From 6216a3291ea426f51ada1944550c7022667b7f3d Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 10:02:34 +0800
Subject: [PATCH 22/85] Rename expectation function name

---
 src/qibotn/backends/gpu.py | 12 ++++++++----
 src/qibotn/eval.py         |  6 +++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 5f41344..6301bf7 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -53,7 +53,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             raise TypeError("Type can be either complex64 or complex128")
 
     def execute_circuit(
-        self, circuit, test, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False, initial_state=None, nshots=None, return_array=False
+        self, circuit, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False, initial_state=None, nshots=None, return_array=False
     ):  # pragma: no cover
         """Executes a quantum circuit.
 
@@ -68,7 +68,11 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         """
 
         import qibotn.eval as eval
-        print("Test", test)
+        print("MPI_enabled", MPI_enabled)
+        print("MPS_enabled", MPS_enabled)
+        print("NCCL_enabled", NCCL_enabled)
+        print("expectation_enabled", expectation_enabled)
+        
         if (
             MPI_enabled == False
             and MPS_enabled == False
@@ -150,7 +154,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_tn_MPI(
+            state, rank = eval.expectation_pauli_tn_MPI(
                 circuit, self.dtype, 32)
 
             if rank > 0:
@@ -166,7 +170,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
                 raise_error(NotImplementedError,
                             "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_tn_nccl(
+            state, rank = eval.expectation_pauli_tn_nccl(
                 circuit, self.dtype, 32)
 
             if rank > 0:
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index a283ff3..5bb5ba1 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -14,7 +14,7 @@ def dense_vector_tn(qibo_circ, datatype):
     return contract(*myconvertor.state_vector_operands())
 
 
-def expectation_tn(qibo_circ, datatype):
+def expectation_pauli_tn(qibo_circ, datatype):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
@@ -203,7 +203,7 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def expectation_tn_nccl(qibo_circ, datatype, n_samples=8):
+def expectation_pauli_tn_nccl(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -290,7 +290,7 @@ def expectation_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def expectation_tn_MPI(qibo_circ, datatype, n_samples=8):
+def expectation_pauli_tn_MPI(qibo_circ, datatype, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network

From 168018593cd07eab93e4c09b76d44f45b81c5208 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 10:52:09 +0800
Subject: [PATCH 23/85] Use runcard to select compute type as no other way to
 pass in para wout changing struct

---
 src/qibotn/backends/gpu.py | 130 +++++++++++++++++++------------------
 1 file changed, 68 insertions(+), 62 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 6301bf7..ad82b75 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -8,11 +8,24 @@ from qibo.config import raise_error
 class CuTensorNet(NumpyBackend):  # pragma: no cover
     # CI does not test for GPU
 
-    def __init__(self):
+    def __init__(self, runcard):
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
 
+        if runcard is not None:
+            print("inside runcard")
+            # Parse the runcard or use its values to set flags
+            self.MPI_enabled = runcard.get("MPI_enabled", False)
+            self.MPS_enabled = runcard.get("MPS_enabled", False)
+            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
+            self.expectation_enabled = runcard.get("expectation_enabled", False)
+        else:
+            self.MPI_enabled = False
+            self.MPS_enabled = False
+            self.NCCL_enabled = False
+            self.expectation_enabled = False
+
         self.name = "qibotn"
         self.cuquantum = cuquantum
         self.cutn = cutn
@@ -53,7 +66,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             raise TypeError("Type can be either complex64 or complex128")
 
     def execute_circuit(
-        self, circuit, MPI_enabled=False, MPS_enabled=False, NCCL_enabled=False, expectation_enabled=False, initial_state=None, nshots=None, return_array=False
+        self, circuit, initial_state=None, nshots=None, return_array=False
     ):  # pragma: no cover
         """Executes a quantum circuit.
 
@@ -68,32 +81,31 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         """
 
         import qibotn.eval as eval
-        print("MPI_enabled", MPI_enabled)
-        print("MPS_enabled", MPS_enabled)
-        print("NCCL_enabled", NCCL_enabled)
-        print("expectation_enabled", expectation_enabled)
-        
+
+        print("MPI_enabled", self.MPI_enabled)
+        print("MPS_enabled", self.MPS_enabled)
+        print("NCCL_enabled", self.NCCL_enabled)
+        print("expectation_enabled", self.expectation_enabled)
+
         if (
-            MPI_enabled == False
-            and MPS_enabled == False
-            and NCCL_enabled == False
-            and expectation_enabled == False
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
             state = eval.dense_vector_tn(circuit, self.dtype)
 
-        if (
-            MPI_enabled == False
-            and MPS_enabled == True
-            and NCCL_enabled == False
-            and expectation_enabled == False
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == True
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
             gate_algo = {
                 "qr_method": False,
@@ -104,81 +116,75 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             }  # make this user input
             state = eval.dense_vector_mps(circuit, gate_algo, self.dtype)
 
-        if (
-            MPI_enabled == True
-            and MPS_enabled == False
-            and NCCL_enabled == False
-            and expectation_enabled == False
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
             state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
 
-        if (
-            MPI_enabled == False
-            and MPS_enabled == False
-            and NCCL_enabled == True
-            and expectation_enabled == False
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == False
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
             state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
 
-        if (
-            MPI_enabled == False
-            and MPS_enabled == False
-            and NCCL_enabled == False
-            and expectation_enabled == True
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state = eval.expectation_tn(circuit, self.dtype)
+            state = eval.expectation_pauli_tn(circuit, self.dtype)
 
-        if (
-            MPI_enabled == True
-            and MPS_enabled == False
-            and NCCL_enabled == False
-            and expectation_enabled == True
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_MPI(
-                circuit, self.dtype, 32)
+            state, rank = eval.expectation_pauli_tn_MPI(circuit, self.dtype, 32)
 
             if rank > 0:
                 state = np.array(0)
 
-        if (
-            MPI_enabled == False
-            and MPS_enabled == False
-            and NCCL_enabled == True
-            and expectation_enabled == True
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == True
         ):
             if initial_state is not None:
-                raise_error(NotImplementedError,
-                            "QiboTN cannot support initial state.")
+                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_nccl(
-                circuit, self.dtype, 32)
+            state, rank = eval.expectation_pauli_tn_nccl(circuit, self.dtype, 32)
 
             if rank > 0:
                 state = np.array(0)
+        else:
+            raise_error(NotImplementedError, "Backend not supported.")
 
         if return_array:
             return state.flatten()
         else:
-            circuit._final_state = CircuitResult(
-                self, circuit, state.flatten(), nshots)
+            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
             return circuit._final_state

From e7776f55bf61faa084e14e36579b01a1f4a93877 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 10:56:47 +0800
Subject: [PATCH 24/85] Remove print [skip CI]

---
 src/qibotn/backends/gpu.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index ad82b75..d758d5a 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -14,8 +14,6 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
 
         if runcard is not None:
-            print("inside runcard")
-            # Parse the runcard or use its values to set flags
             self.MPI_enabled = runcard.get("MPI_enabled", False)
             self.MPS_enabled = runcard.get("MPS_enabled", False)
             self.NCCL_enabled = runcard.get("NCCL_enabled", False)
@@ -82,11 +80,6 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         import qibotn.eval as eval
 
-        print("MPI_enabled", self.MPI_enabled)
-        print("MPS_enabled", self.MPS_enabled)
-        print("NCCL_enabled", self.NCCL_enabled)
-        print("expectation_enabled", self.expectation_enabled)
-
         if (
             self.MPI_enabled == False
             and self.MPS_enabled == False
@@ -181,7 +174,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if rank > 0:
                 state = np.array(0)
         else:
-            raise_error(NotImplementedError, "Backend not supported.")
+            raise_error(NotImplementedError, "Compute type not supported.")
 
         if return_array:
             return state.flatten()

From 9b8058d99e3824cbe1d8398c811211fb59b02c16 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 14:56:21 +0800
Subject: [PATCH 25/85] Allow user to specify Pauli string pattern for
 expecation calculation [skip CI]

---
 src/qibotn/backends/gpu.py | 33 ++++++++++++++++++++++++++++-----
 src/qibotn/eval.py         | 20 ++++++++++----------
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index d758d5a..c133f4e 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -12,12 +12,35 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
-
+        
+        self.pauli_string_pattern = "XXXZ"
         if runcard is not None:
             self.MPI_enabled = runcard.get("MPI_enabled", False)
             self.MPS_enabled = runcard.get("MPS_enabled", False)
             self.NCCL_enabled = runcard.get("NCCL_enabled", False)
-            self.expectation_enabled = runcard.get("expectation_enabled", False)
+                        
+            expectation_enabled_value = runcard.get('expectation_enabled')
+
+            if expectation_enabled_value is True:
+                self.expectation_enabled = True
+
+                print("expectation_enabled is",self.expectation_enabled)
+            elif expectation_enabled_value is False:
+                self.expectation_enabled = False
+
+                print("expectation_enabled is",self.expectation_enabled)
+            elif isinstance(expectation_enabled_value, dict):
+                self.expectation_enabled = True
+                expectation_enabled_dict = runcard.get('expectation_enabled', {})
+
+                self.pauli_string_pattern = expectation_enabled_dict.get('pauli_string_pattern', None)
+
+                print("expectation_enabled is a dictionary",self.expectation_enabled,self.pauli_string_pattern )
+            else:
+                raise TypeError("expectation_enabled has an unexpected type")
+
+
+
         else:
             self.MPI_enabled = False
             self.MPS_enabled = False
@@ -144,7 +167,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state = eval.expectation_pauli_tn(circuit, self.dtype)
+            state = eval.expectation_pauli_tn(circuit, self.dtype, self.pauli_string_pattern)
 
         elif (
             self.MPI_enabled == True
@@ -155,7 +178,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_MPI(circuit, self.dtype, 32)
+            state, rank = eval.expectation_pauli_tn_MPI(circuit, self.dtype, self.pauli_string_pattern, 32)
 
             if rank > 0:
                 state = np.array(0)
@@ -169,7 +192,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_nccl(circuit, self.dtype, 32)
+            state, rank = eval.expectation_pauli_tn_nccl(circuit, self.dtype, self.pauli_string_pattern, 32)
 
             if rank > 0:
                 state = np.array(0)
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index 5bb5ba1..c8b5671 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -14,10 +14,10 @@ def dense_vector_tn(qibo_circ, datatype):
     return contract(*myconvertor.state_vector_operands())
 
 
-def expectation_pauli_tn(qibo_circ, datatype):
+def expectation_pauli_tn(qibo_circ, datatype, pauli_string):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
-        *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+        *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
     )
 
 
@@ -203,7 +203,7 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def expectation_pauli_tn_nccl(qibo_circ, datatype, n_samples=8):
+def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -231,7 +231,7 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, n_samples=8):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
 
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
@@ -290,7 +290,7 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def expectation_pauli_tn_MPI(qibo_circ, datatype, n_samples=8):
+def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
     from mpi4py import MPI  # this line initializes MPI
     import socket
     from cuquantum import Network
@@ -310,7 +310,7 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, n_samples=8):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
@@ -378,17 +378,17 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
     )
 
 
-def PauliStringGen(nqubits):
+def PauliStringGen(nqubits, pauli_string):
     if nqubits <= 0:
         return "Invalid input. N should be a positive integer."
 
-    # characters = 'IXYZ'
-    characters = "XXXZ"
+    characters = pauli_string
+    #characters = "XXXZ"
 
     result = ""
 
     for i in range(nqubits):
         char_to_add = characters[i % len(characters)]
         result += char_to_add
-
+    print("pauli string", result)
     return result

From a3f3538f04c0281a20c3036a9064ffc1bc6809a3 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 16:47:10 +0800
Subject: [PATCH 26/85] Update to allow user to set MPS parameters and to set
 Pauli string pattern

---
 src/qibotn/backends/gpu.py | 60 +++++++++++++++++++++-----------------
 src/qibotn/eval.py         | 14 ++++++---
 2 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index c133f4e..5777fe9 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -12,34 +12,43 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
         super().__init__()
         import cuquantum  # pylint: disable=import-error
         from cuquantum import cutensornet as cutn  # pylint: disable=import-error
-        
-        self.pauli_string_pattern = "XXXZ"
+
         if runcard is not None:
             self.MPI_enabled = runcard.get("MPI_enabled", False)
-            self.MPS_enabled = runcard.get("MPS_enabled", False)
             self.NCCL_enabled = runcard.get("NCCL_enabled", False)
-                        
-            expectation_enabled_value = runcard.get('expectation_enabled')
 
+            expectation_enabled_value = runcard.get("expectation_enabled")
             if expectation_enabled_value is True:
                 self.expectation_enabled = True
-
-                print("expectation_enabled is",self.expectation_enabled)
+                self.pauli_string_pattern = "XXXZ"
             elif expectation_enabled_value is False:
                 self.expectation_enabled = False
-
-                print("expectation_enabled is",self.expectation_enabled)
             elif isinstance(expectation_enabled_value, dict):
                 self.expectation_enabled = True
-                expectation_enabled_dict = runcard.get('expectation_enabled', {})
-
-                self.pauli_string_pattern = expectation_enabled_dict.get('pauli_string_pattern', None)
-
-                print("expectation_enabled is a dictionary",self.expectation_enabled,self.pauli_string_pattern )
+                expectation_enabled_dict = runcard.get("expectation_enabled", {})
+                self.pauli_string_pattern = expectation_enabled_dict.get(
+                    "pauli_string_pattern", None
+                )
             else:
                 raise TypeError("expectation_enabled has an unexpected type")
 
-
+            mps_enabled_value = runcard.get("MPS_enabled")
+            if mps_enabled_value is True:
+                self.MPS_enabled = True
+                self.gate_algo = {
+                    "qr_method": False,
+                    "svd_method": {
+                        "partition": "UV",
+                        "abs_cutoff": 1e-12,
+                    },
+                }
+            elif mps_enabled_value is False:
+                self.MPS_enabled = False
+            elif isinstance(mps_enabled_value, dict):
+                self.MPS_enabled = True
+                self.gate_algo = runcard.get("MPS_enabled", {})
+            else:
+                raise TypeError("MPS_enabled has an unexpected type")
 
         else:
             self.MPI_enabled = False
@@ -123,14 +132,7 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            gate_algo = {
-                "qr_method": False,
-                "svd_method": {
-                    "partition": "UV",
-                    "abs_cutoff": 1e-12,
-                },
-            }  # make this user input
-            state = eval.dense_vector_mps(circuit, gate_algo, self.dtype)
+            state = eval.dense_vector_mps(circuit, self.gate_algo, self.dtype)
 
         elif (
             self.MPI_enabled == True
@@ -167,7 +169,9 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state = eval.expectation_pauli_tn(circuit, self.dtype, self.pauli_string_pattern)
+            state = eval.expectation_pauli_tn(
+                circuit, self.dtype, self.pauli_string_pattern
+            )
 
         elif (
             self.MPI_enabled == True
@@ -178,7 +182,9 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_MPI(circuit, self.dtype, self.pauli_string_pattern, 32)
+            state, rank = eval.expectation_pauli_tn_MPI(
+                circuit, self.dtype, self.pauli_string_pattern, 32
+            )
 
             if rank > 0:
                 state = np.array(0)
@@ -192,7 +198,9 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
             if initial_state is not None:
                 raise_error(NotImplementedError, "QiboTN cannot support initial state.")
 
-            state, rank = eval.expectation_pauli_tn_nccl(circuit, self.dtype, self.pauli_string_pattern, 32)
+            state, rank = eval.expectation_pauli_tn_nccl(
+                circuit, self.dtype, self.pauli_string_pattern, 32
+            )
 
             if rank > 0:
                 state = np.array(0)
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index c8b5671..afa6cbd 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -17,7 +17,9 @@ def dense_vector_tn(qibo_circ, datatype):
 def expectation_pauli_tn(qibo_circ, datatype, pauli_string):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
-        *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
+        *myconvertor.expectation_operands(
+            PauliStringGen(qibo_circ.nqubits, pauli_string)
+        )
     )
 
 
@@ -231,7 +233,9 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
+    operands = myconvertor.expectation_operands(
+        PauliStringGen(qibo_circ.nqubits, pauli_string)
+    )
 
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
@@ -310,7 +314,9 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits, pauli_string))
+    operands = myconvertor.expectation_operands(
+        PauliStringGen(qibo_circ.nqubits, pauli_string)
+    )
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
@@ -383,7 +389,7 @@ def PauliStringGen(nqubits, pauli_string):
         return "Invalid input. N should be a positive integer."
 
     characters = pauli_string
-    #characters = "XXXZ"
+    # characters = "XXXZ"
 
     result = ""
 

From 67fdafedede0d7be5b4dec250fb595b2107bbc31 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 16:49:34 +0800
Subject: [PATCH 27/85] Remove backend

---
 src/qibotn/backends.py | 129 -----------------------------------------
 1 file changed, 129 deletions(-)
 delete mode 100644 src/qibotn/backends.py

diff --git a/src/qibotn/backends.py b/src/qibotn/backends.py
deleted file mode 100644
index 3728a99..0000000
--- a/src/qibotn/backends.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from qibo.backends import NumpyBackend
-from qibo.config import raise_error
-from qibotn import cutn
-from qibotn import quimb
-from qibo.states import CircuitResult
-import numpy as np
-
-
-class QiboTNBackend(NumpyBackend):
-    def __init__(self, platform):
-        super().__init__()
-        self.name = "qibotn"
-        if (
-            platform == "cu_tensornet"
-            or platform == "cu_mps"
-            or platform == "qu_tensornet"
-            or platform == "cu_tensornet_mpi"
-            or platform == "cu_tensornet_mpi_expectation"
-            or platform == "cu_tensornet_expectation"
-            or platform == "cu_tensornet_nccl"
-            or platform == "cu_tensornet_nccl_expectation"
-        ):  # pragma: no cover
-            self.platform = platform
-        else:
-            raise_error(
-                NotImplementedError, "QiboTN cannot support the specified backend."
-            )
-
-    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
-        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
-
-    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
-        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
-
-    def assign_measurements(self, measurement_map, circuit_result):
-        raise_error(NotImplementedError, "Not implemented in QiboTN.")
-
-    def execute_circuit(
-        self, circuit, initial_state=None, nshots=None, return_array=False
-    ):  # pragma: no cover
-        """Executes a quantum circuit.
-
-        Args:
-            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
-            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
-                If ``None`` the default ``|00...0>`` state is used.
-
-        Returns:
-            xxx.
-
-        """
-
-        if self.platform == "cu_tensornet":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            state = cutn.eval(circuit, self.dtype)
-
-        if self.platform == "cu_mps":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            gate_algo = {
-                "qr_method": False,
-                "svd_method": {
-                    "partition": "UV",
-                    "abs_cutoff": 1e-12,
-                },
-            }  # make this user input
-            state = cutn.eval_mps(circuit, gate_algo, self.dtype)
-
-        if self.platform == "qu_tensornet":
-            # init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
-            # init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
-            init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
-            init_state[0] = 1.0
-            state = quimb.eval(circuit.to_qasm(), init_state, backend="numpy")
-
-        if self.platform == "cu_tensornet_mpi":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_MPI_2(circuit, self.dtype, 32)
-            if rank > 0:
-                state = np.array(0)
-
-        if self.platform == "cu_tensornet_nccl":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_nccl(circuit, self.dtype, 32)
-            if rank > 0:
-                state = np.array(0)
-
-        if self.platform == "cu_tensornet_expectation":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            state = cutn.eval_expectation(circuit, self.dtype)
-
-        if self.platform == "cu_tensornet_mpi_expectation":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            # state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_MPI_2_expectation(circuit, self.dtype, 32)
-
-            if rank > 0:
-                state = np.array(0)
-
-        if self.platform == "cu_tensornet_nccl_expectation":
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
-            # state, rank = cutn.eval_tn_MPI(circuit, self.dtype,32)
-            # state, rank = cutn.eval_tn_MPI_expectation(circuit, self.dtype,32)
-            state, rank = cutn.eval_tn_nccl_expectation(circuit, self.dtype, 32)
-
-            if rank > 0:
-                state = np.array(0)
-
-        if return_array:
-            return state.flatten()
-        else:
-            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
-            return circuit._final_state

From d621eb2f6e019db07dc1fe55afe646fd1b36a73e Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 17:34:15 +0800
Subject: [PATCH 28/85] Update sample code [skip CI]

---
 README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/README.md b/README.md
index 4628c1c..a1fea8d 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,58 @@
 Qibotn is the tensor-network translation module for Qibo to support large-scale simulation of quantum circuits and acceleration.
 
 To get started, `python setup.py install` to install the tools and dependencies.
+
+# Sample Codes
+## Single Node
+
+<pre>
+```
+import numpy as np
+from qibo import Circuit, gates
+import qibo
+
+'''
+computation_settings = {
+    'MPI_enabled': False,
+    'MPS_enabled': False,
+    'NCCL_enabled': False,
+    'expectation_enabled': {
+        'pauli_string_pattern': "IXZ"
+    }
+}
+'''
+
+computation_settings = {
+    'MPI_enabled': False,
+    'MPS_enabled': {
+                "qr_method": False,
+                "svd_method": {
+                    "partition": "UV",
+                    "abs_cutoff": 1e-12,
+                },
+            } ,
+    'NCCL_enabled': False,
+    'expectation_enabled': False
+}
+
+# computation_settings = {
+#     'MPI_enabled': False,
+#     'MPS_enabled': True,
+#     'NCCL_enabled': False,
+#     'expectation_enabled': False
+# }
+
+qibo.set_backend(backend="qibotn", runcard=computation_settings)
+
+# Construct the circuit
+c = Circuit(2)
+# Add some gates
+c.add(gates.H(0))
+c.add(gates.H(1))
+
+# Execute the circuit and obtain the final state
+result = c()
+
+print(result.state())
+```
+</pre>
\ No newline at end of file

From 83ffcfa19b6a45627c5803b4e555335a22d126e9 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 17:42:10 +0800
Subject: [PATCH 29/85] Update

---
 README.md | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index a1fea8d..1bf2f7c 100644
--- a/README.md
+++ b/README.md
@@ -11,16 +11,10 @@ import numpy as np
 from qibo import Circuit, gates
 import qibo
 
-'''
-computation_settings = {
-    'MPI_enabled': False,
-    'MPS_enabled': False,
-    'NCCL_enabled': False,
-    'expectation_enabled': {
-        'pauli_string_pattern': "IXZ"
-    }
-}
-'''
+# Below shows 3 ways of setting the computation_settings
+# Note that for MPS_enabled and expectation_enabled parameters the accepted inputs are boolean or a dictionary with the format shown below.
+# If computation_settings is not specified, the default setting is used in which all booleans will be False. 
+# This will trigger the dense vector computation of the tensornet.
 
 computation_settings = {
     'MPI_enabled': False,
@@ -35,6 +29,14 @@ computation_settings = {
     'expectation_enabled': False
 }
 
+# computation_settings = {
+#    'MPI_enabled': False,
+#    'MPS_enabled': False,
+#    'NCCL_enabled': False,
+#    'expectation_enabled': {
+#        'pauli_string_pattern': "IXZ"
+# }
+
 # computation_settings = {
 #     'MPI_enabled': False,
 #     'MPS_enabled': True,

From 460f5e76210e684803b4ec8150d29f2bc6a25642 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Wed, 31 Jan 2024 17:49:56 +0800
Subject: [PATCH 30/85] Update sample codes

---
 README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1bf2f7c..09281c8 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ To get started, `python setup.py install` to install the tools and dependencies.
 
 # Sample Codes
 ## Single Node
-
+The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
 <pre>
 ```
 import numpy as np
@@ -57,4 +57,13 @@ result = c()
 
 print(result.state())
 ```
+</pre>
+
+## Multi-Node
+Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch multi node on 4 GPU in cluster. 
+
+<pre>
+```
+mpirun -n 4 --mca opal_common_ucx_opal_mem_hooks 1 --mca orte_base_help_aggregate 0 -mca btl ^openib  -hostfile $node_list python test.py
+```
 </pre>
\ No newline at end of file

From 6f4ffa777a3050582680c4a2d849c5f56e96fa7d Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 1 Feb 2024 11:35:55 +0800
Subject: [PATCH 31/85] Add more documentation

---
 README.md          | 13 +++++++++++++
 src/qibotn/eval.py | 18 ++++++++++--------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 09281c8..68f2830 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,19 @@ Qibotn is the tensor-network translation module for Qibo to support large-scale
 
 To get started, `python setup.py install` to install the tools and dependencies.
 
+# Computation Supported
+
+- Tensornet (TN)
+    - TN contraction to dense vector
+    - TN contraction to dense vector with Message Passing Interface (MPI)
+    - TN contraction to dense vector with NCCL
+    - TN contraction to expectation of given Pauli string
+    - TN contraction to expectation of given Pauli string with Message Passing Interface (MPI)
+    - TN contraction to expectation of given Pauli string with NCCL
+
+- Matrix Product State (MPS)
+    - MPS contraction to dense vector
+
 # Sample Codes
 ## Single Node
 The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index afa6cbd..a9aeaac 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -18,7 +18,7 @@ def expectation_pauli_tn(qibo_circ, datatype, pauli_string):
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(
-            PauliStringGen(qibo_circ.nqubits, pauli_string)
+            pauli_string_gen(qibo_circ.nqubits, pauli_string)
         )
     )
 
@@ -234,7 +234,7 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.expectation_operands(
-        PauliStringGen(qibo_circ.nqubits, pauli_string)
+        pauli_string_gen(qibo_circ.nqubits, pauli_string)
     )
 
     # mem_avail = cp.cuda.Device().mem_info[0]
@@ -315,7 +315,7 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.expectation_operands(
-        PauliStringGen(qibo_circ.nqubits, pauli_string)
+        pauli_string_gen(qibo_circ.nqubits, pauli_string)
     )
     # mem_avail = cp.cuda.Device().mem_info[0]
     # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
@@ -376,6 +376,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
 
 
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
+    """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector.
+    """
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
 
@@ -384,17 +386,17 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
     )
 
 
-def PauliStringGen(nqubits, pauli_string):
+def pauli_string_gen(nqubits, pauli_string_pattern):
+    """ Used internally to generate the string based on given pattern and number of qubit.
+    Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
+    """
     if nqubits <= 0:
         return "Invalid input. N should be a positive integer."
 
-    characters = pauli_string
-    # characters = "XXXZ"
-
     result = ""
 
     for i in range(nqubits):
-        char_to_add = characters[i % len(characters)]
+        char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
         result += char_to_add
     print("pauli string", result)
     return result

From 31fe1ef0c91f7c1cc1ba7d02aff578a27222c32c Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 1 Feb 2024 14:42:17 +0800
Subject: [PATCH 32/85] Tidy up

---
 src/qibotn/MPSUtils.py               |   2 +
 src/qibotn/QiboCircuitConvertor.py   |  22 +---
 src/qibotn/eval.py                   | 160 +++++++--------------------
 src/qibotn/mps_contraction_helper.py |   4 +-
 4 files changed, 49 insertions(+), 139 deletions(-)

diff --git a/src/qibotn/MPSUtils.py b/src/qibotn/MPSUtils.py
index fd1b4c7..4f84f67 100644
--- a/src/qibotn/MPSUtils.py
+++ b/src/qibotn/MPSUtils.py
@@ -2,6 +2,8 @@ import cupy as cp
 from cuquantum.cutensornet.experimental import contract_decompose
 from cuquantum import contract
 
+# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
+
 
 def initial(num_qubits, dtype):
     """
diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index d3a0569..c59745b 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -1,6 +1,8 @@
 import cupy as cp
 import numpy as np
 
+# Reference: https://github.com/NVIDIA/cuQuantum/tree/main/python/samples/cutensornet/circuit_converter
+
 
 class QiboCircuitToEinsum:
     """Convert a circuit to a Tensor Network (TN) representation.
@@ -159,9 +161,7 @@ class QiboCircuitToEinsum:
         return gates
 
     def expectation_operands(self, pauli_string):
-        # assign pauli string to qubit
-        # _get_forward_inverse_metadata()
-        input_bitstring = "0" * self.circuit.nqubits  # Need all qubits!
+        input_bitstring = "0" * self.circuit.nqubits 
 
         input_operands = self._get_bitstring_tensors(input_bitstring)
         pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
@@ -185,8 +185,6 @@ class QiboCircuitToEinsum:
 
         next_frontier = max(qubits_frontier.values()) + 1
 
-        # input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)
-
         pauli_gates = self.get_pauli_gates(
             pauli_map, dtype=self.dtype, backend=self.backend
         )
@@ -208,18 +206,4 @@ class QiboCircuitToEinsum:
 
         operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
 
-        # expec = contract(*operand_exp_interleave)
-        # print(expec)
-
-        """
-        gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates, 
-                                                                                         qubits_frontier, 
-                                                                                         next_frontier)
-        
-        mode_labels = input_mode_labels + gate_mode_labels + [[qubits_frontier[ix]] for ix in self.qubits]
-        operands = input_operands + gate_operands + input_operands[:n_qubits]
-
-        output_mode_labels = []
-        expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
-        """
         return operand_exp_interleave
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index a9aeaac..b73418a 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -1,7 +1,5 @@
 from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
 from cuquantum import contract
-from cuquantum import cutensornet as cutn
-import multiprocessing
 from cupy.cuda.runtime import getDeviceCount
 import cupy as cp
 
@@ -10,15 +8,17 @@ from qibotn.mps_contraction_helper import MPSContractionHelper
 
 
 def dense_vector_tn(qibo_circ, datatype):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to dense vector."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
 
-def expectation_pauli_tn(qibo_circ, datatype, pauli_string):
+def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(
-            pauli_string_gen(qibo_circ.nqubits, pauli_string)
+            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
         )
     )
 
@@ -30,54 +30,35 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
     After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
     """
 
-    from mpi4py import MPI  # this line initializes MPI
-    import socket
+    from mpi4py import MPI
     from cuquantum import Network
 
-    # Get the hostname
-    # hostname = socket.gethostname()
-
     root = 0
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+
     device_id = rank % getDeviceCount()
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.state_vector_operands()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
-    # Broadcast the operand data.
-    # operands = comm.bcast(operands, root)
+    operands = myconvertor.state_vector_operands()
 
     # Assign the device for each process.
     device_id = rank % getDeviceCount()
 
-    # dev = cp.cuda.Device(device_id)
-    # free_mem, total_mem = dev.mem_info
-    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
-
     # Create network object.
     network = Network(*operands, options={"device_id": device_id})
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
     path, info = network.contract_path(
-        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
     )
-    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    # if rank == root:
-    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
-
     # Broadcast info from the sender to all other ranks.
     info = comm.bcast(info, sender)
 
@@ -95,45 +76,30 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
     )
     slices = range(slice_begin, slice_end)
 
-    # print(f"Process {rank} is processing slice range: {slices}.")
-
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    # print(f"Process {rank} result shape is : {result.shape}.")
-    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
 
-    """
-    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
-
-    num_slices = opt_info.num_slices#Andy
-    chunk, extra = num_slices // size, num_slices % size#Andy
-    slice_begin = rank * chunk + min(rank, extra)#Andy
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
-    slices = range(slice_begin, slice_end)#Andy
-    result = network.contract(slices=slices)
-    """
-
     return result, rank
 
 
 def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
-    from mpi4py import MPI  # this line initializes MPI
-    import socket
+    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through NCCL.
+    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
+    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
+    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """
+    from mpi4py import MPI
     from cuquantum import Network
     from cupy.cuda import nccl
 
-    # Get the hostname
-    # hostname = socket.gethostname()
-
     root = 0
     comm_mpi = MPI.COMM_WORLD
     rank = comm_mpi.Get_rank()
     size = comm_mpi.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+
     device_id = rank % getDeviceCount()
 
     cp.cuda.Device(device_id).use()
@@ -145,27 +111,18 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.state_vector_operands()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
     network = Network(*operands)
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
     path, info = network.contract_path(
-        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
     )
 
-    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
-
     # Select the best path from all ranks.
     opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    # if rank == root:
-    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
-
     # Broadcast info from the sender to all other ranks.
     info = comm_mpi.bcast(info, sender)
 
@@ -183,12 +140,8 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
     )
     slices = range(slice_begin, slice_end)
 
-    # print(f"Process {rank} is processing slice range: {slices}.")
-
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    # print(f"Process {rank} result shape is : {result.shape}.")
-    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     stream_ptr = cp.cuda.get_current_stream().ptr
@@ -205,21 +158,22 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
     return result, rank
 
 
-def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
-    from mpi4py import MPI  # this line initializes MPI
-    import socket
+def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through NCCL.
+    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
+    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
+    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
+    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """
+    from mpi4py import MPI
     from cuquantum import Network
     from cupy.cuda import nccl
 
-    # Get the hostname
-    # hostname = socket.gethostname()
-
     root = 0
     comm_mpi = MPI.COMM_WORLD
     rank = comm_mpi.Get_rank()
     size = comm_mpi.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+
     device_id = rank % getDeviceCount()
 
     cp.cuda.Device(device_id).use()
@@ -231,30 +185,20 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
     operands = myconvertor.expectation_operands(
-        pauli_string_gen(qibo_circ.nqubits, pauli_string)
+        pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
     )
 
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
-
     network = Network(*operands)
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
     path, info = network.contract_path(
-        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
     )
 
-    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
-
     # Select the best path from all ranks.
     opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    # if rank == root:
-    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
-
     # Broadcast info from the sender to all other ranks.
     info = comm_mpi.bcast(info, sender)
 
@@ -272,12 +216,8 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
     )
     slices = range(slice_begin, slice_end)
 
-    # print(f"Process {rank} is processing slice range: {slices}.")
-
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    # print(f"Process {rank} result shape is : {result.shape}.")
-    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     stream_ptr = cp.cuda.get_current_stream().ptr
@@ -294,57 +234,44 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string, n_samples=8):
     return result, rank
 
 
-def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
+def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through MPI.
+    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
+    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
+    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
+    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """
     from mpi4py import MPI  # this line initializes MPI
-    import socket
     from cuquantum import Network
 
-    # Get the hostname
-    # hostname = socket.gethostname()
-
     root = 0
     comm = MPI.COMM_WORLD
     rank = comm.Get_rank()
     size = comm.Get_size()
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
+
     device_id = rank % getDeviceCount()
 
     # Perform circuit conversion
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
-    operands = myconvertor.expectation_operands(
-        pauli_string_gen(qibo_circ.nqubits, pauli_string)
-    )
-    # mem_avail = cp.cuda.Device().mem_info[0]
-    # print("Mem avail: aft operand interleave",mem_avail, "rank =",rank)
 
-    # Broadcast the operand data.
-    # operands = comm.bcast(operands, root)
+    operands = myconvertor.expectation_operands(
+        pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+    )
 
     # Assign the device for each process.
     device_id = rank % getDeviceCount()
 
-    # dev = cp.cuda.Device(device_id)
-    # free_mem, total_mem = dev.mem_info
-    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)
-
     # Create network object.
     network = Network(*operands, options={"device_id": device_id})
 
     # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
     path, info = network.contract_path(
-        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
     )
-    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")
 
     # Select the best path from all ranks.
     opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
 
-    # if rank == root:
-    #    print(f"Process {sender} has the path with the lowest FLOP count {opt_cost}.")
-
     # Broadcast info from the sender to all other ranks.
     info = comm.bcast(info, sender)
 
@@ -362,12 +289,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
     )
     slices = range(slice_begin, slice_end)
 
-    # print(f"Process {rank} is processing slice range: {slices}.")
-
     # Contract the group of slices the process is responsible for.
     result = network.contract(slices=slices)
-    # print(f"Process {rank} result shape is : {result.shape}.")
-    # print(f"Process {rank} result size is : {result.nbytes}.")
 
     # Sum the partial contribution from each process on root.
     result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
@@ -376,8 +299,7 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string, n_samples=8):
 
 
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
-    """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector.
-    """
+    """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector."""
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
 
@@ -387,7 +309,7 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
 
 
 def pauli_string_gen(nqubits, pauli_string_pattern):
-    """ Used internally to generate the string based on given pattern and number of qubit.
+    """Used internally to generate the string based on given pattern and number of qubit.
     Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
     """
     if nqubits <= 0:
diff --git a/src/qibotn/mps_contraction_helper.py b/src/qibotn/mps_contraction_helper.py
index ee8e4e4..29d5e25 100644
--- a/src/qibotn/mps_contraction_helper.py
+++ b/src/qibotn/mps_contraction_helper.py
@@ -1,5 +1,7 @@
 from cuquantum import contract, contract_path, CircuitToEinsum, tensor
 
+# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
+
 
 class MPSContractionHelper:
     """
@@ -85,7 +87,7 @@ class MPSContractionHelper:
         self, mps_tensors, operator, qubits, options=None, normalize=False
     ):
         """
-        Contract the corresponding tensor network to form the state vector representation of the MPS.
+        Contract the corresponding tensor network to form the expectation of the MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.

From 6a6cd2a90b0738413fc796373a9a8273e4903202 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 1 Feb 2024 14:45:55 +0800
Subject: [PATCH 33/85] Remove print [skip CI]

---
 src/qibotn/eval.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index b73418a..96fd488 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -320,5 +320,4 @@ def pauli_string_gen(nqubits, pauli_string_pattern):
     for i in range(nqubits):
         char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
         result += char_to_add
-    print("pauli string", result)
     return result

From 88fb4340bef10aba601bac24836c1487fc905dc0 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 1 Feb 2024 15:21:04 +0800
Subject: [PATCH 34/85] Update

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 68f2830..25b499c 100644
--- a/README.md
+++ b/README.md
@@ -73,10 +73,10 @@ print(result.state())
 </pre>
 
 ## Multi-Node
-Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch multi node on 4 GPU in cluster. 
+Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 
 <pre>
 ```
-mpirun -n 4 --mca opal_common_ucx_opal_mem_hooks 1 --mca orte_base_help_aggregate 0 -mca btl ^openib  -hostfile $node_list python test.py
+mpirun -n 4 -hostfile $node_list python test.py
 ```
 </pre>
\ No newline at end of file

From 347cb354424482a0c2ae14dba2d32c37436a9069 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 2 Feb 2024 12:53:48 +0800
Subject: [PATCH 35/85] Update readme

---
 README.md | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 25b499c..2e9b09c 100644
--- a/README.md
+++ b/README.md
@@ -4,22 +4,22 @@ To get started, `python setup.py install` to install the tools and dependencies.
 
 # Computation Supported
 
-- Tensornet (TN)
-    - TN contraction to dense vector
-    - TN contraction to dense vector with Message Passing Interface (MPI)
-    - TN contraction to dense vector with NCCL
-    - TN contraction to expectation of given Pauli string
-    - TN contraction to expectation of given Pauli string with Message Passing Interface (MPI)
-    - TN contraction to expectation of given Pauli string with NCCL
+1. Tensornet (TN) with contractions to:
+    - dense vector
+    - expecation of given Pauli string
 
-- Matrix Product State (MPS)
-    - MPS contraction to dense vector
+    For each TN case:
+    - single node
+    - multi node with Message Passing Interface (MPI)
+    - multi node with NCCL
+
+2. Tensornet (TN) with contractions to:
+    - dense vector (single node)
 
 # Sample Codes
 ## Single Node
 The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
-<pre>
-```
+```py
 import numpy as np
 from qibo import Circuit, gates
 import qibo
@@ -29,7 +29,6 @@ import qibo
 # If computation_settings is not specified, the default setting is used in which all booleans will be False. 
 # This will trigger the dense vector computation of the tensornet.
 
-computation_settings = {
     'MPI_enabled': False,
     'MPS_enabled': {
                 "qr_method": False,
@@ -70,13 +69,11 @@ result = c()
 
 print(result.state())
 ```
-</pre>
 
 ## Multi-Node
 Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 
-<pre>
-```
+
+```sh
 mpirun -n 4 -hostfile $node_list python test.py
-```
-</pre>
\ No newline at end of file
+```
\ No newline at end of file

From bc487022b3646333f7005e3a6d967c8ad58d88fb Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 2 Feb 2024 13:02:09 +0800
Subject: [PATCH 36/85] Update computation setting

---
 README.md | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 2e9b09c..a5b4400 100644
--- a/README.md
+++ b/README.md
@@ -24,11 +24,12 @@ import numpy as np
 from qibo import Circuit, gates
 import qibo
 
-# Below shows 3 ways of setting the computation_settings
+# Below shows how to set the computation_settings
 # Note that for MPS_enabled and expectation_enabled parameters the accepted inputs are boolean or a dictionary with the format shown below.
 # If computation_settings is not specified, the default setting is used in which all booleans will be False. 
 # This will trigger the dense vector computation of the tensornet.
 
+computation_settings = {
     'MPI_enabled': False,
     'MPS_enabled': {
                 "qr_method": False,
@@ -41,20 +42,6 @@ import qibo
     'expectation_enabled': False
 }
 
-# computation_settings = {
-#    'MPI_enabled': False,
-#    'MPS_enabled': False,
-#    'NCCL_enabled': False,
-#    'expectation_enabled': {
-#        'pauli_string_pattern': "IXZ"
-# }
-
-# computation_settings = {
-#     'MPI_enabled': False,
-#     'MPS_enabled': True,
-#     'NCCL_enabled': False,
-#     'expectation_enabled': False
-# }
 
 qibo.set_backend(backend="qibotn", runcard=computation_settings)
 
@@ -70,6 +57,27 @@ result = c()
 print(result.state())
 ```
 
+Other examples of setting the computation_settings
+
+```py
+# Expectation computation with specific Pauli String pattern
+computation_settings = {
+   'MPI_enabled': False,
+   'MPS_enabled': False,
+   'NCCL_enabled': False,
+   'expectation_enabled': {
+       'pauli_string_pattern': "IXZ"
+}
+
+# Dense vector computation using multi node through MPI
+computation_settings = {
+    'MPI_enabled': False,
+    'MPS_enabled': True,
+    'NCCL_enabled': False,
+    'expectation_enabled': False
+}
+```
+
 ## Multi-Node
 Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 

From 2d5fa9e520535bf701c93166e67493157dcf3ccd Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Mon, 5 Feb 2024 10:41:23 +0800
Subject: [PATCH 37/85] Correct computation_settings

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index a5b4400..54ff75d 100644
--- a/README.md
+++ b/README.md
@@ -71,8 +71,8 @@ computation_settings = {
 
 # Dense vector computation using multi node through MPI
 computation_settings = {
-    'MPI_enabled': False,
-    'MPS_enabled': True,
+    'MPI_enabled': True,
+    'MPS_enabled': False,
     'NCCL_enabled': False,
     'expectation_enabled': False
 }

From 20f32c6c5173b09733976f4ed31126e42312dbca Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Tue, 6 Feb 2024 10:39:10 +0800
Subject: [PATCH 38/85] Remove not required Numba backend

---
 src/qibotn/backends/__init__.py |   1 -
 src/qibotn/backends/cpu.py      | 302 --------------------------------
 2 files changed, 303 deletions(-)
 delete mode 100644 src/qibotn/backends/cpu.py

diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index ebc3a20..26a0b9d 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1,2 +1 @@
-from qibotn.backends.cpu import NumbaBackend
 from qibotn.backends.gpu import CuTensorNet
diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
deleted file mode 100644
index 22afcdc..0000000
--- a/src/qibotn/backends/cpu.py
+++ /dev/null
@@ -1,302 +0,0 @@
-import numpy as np
-from qibo.backends.numpy import NumpyBackend
-from qibo.config import log
-from qibo.gates.abstract import ParametrizedGate
-from qibo.gates.channels import ReadoutErrorChannel
-from qibo.gates.special import FusedGate
-
-from qibojit.backends.matrices import CustomMatrices
-
-GATE_OPS = {
-    "X": "apply_x",
-    "CNOT": "apply_x",
-    "TOFFOLI": "apply_x",
-    "Y": "apply_y",
-    "Z": "apply_z",
-    "CZ": "apply_z",
-    "U1": "apply_z_pow",
-    "CU1": "apply_z_pow",
-    "SWAP": "apply_swap",
-    "fSim": "apply_fsim",
-    "GeneralizedfSim": "apply_fsim",
-}
-
-
-class NumbaBackend(NumpyBackend):
-    def __init__(self):
-        super().__init__()
-        import sys
-
-        import psutil
-        from numba import __version__ as numba_version
-
-        from qibotn import __version__ as qibotn_version
-
-        self.name = "qibotn"
-        self.platform = "numba"
-        self.versions.update(
-            {
-                "qibotn": qibotn_version,
-                "numba": numba_version,
-            }
-        )
-        self.numeric_types = (
-            int,
-            float,
-            complex,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-            np.complex64,
-            np.complex128,
-        )
-        self.tensor_types = (np.ndarray,)
-        self.device = "/CPU:0"
-        self.custom_matrices = CustomMatrices(self.dtype)
-        self.gates = gates
-        self.ops = ops
-        self.measure_frequencies_op = ops.measure_frequencies
-        self.multi_qubit_kernels = {
-            3: self.gates.apply_three_qubit_gate_kernel,
-            4: self.gates.apply_four_qubit_gate_kernel,
-            5: self.gates.apply_five_qubit_gate_kernel,
-        }
-        if sys.platform == "darwin":  # pragma: no cover
-            self.set_threads(psutil.cpu_count(logical=False))
-        else:
-            self.set_threads(len(psutil.Process().cpu_affinity()))
-
-    def set_precision(self, precision):
-        if precision != self.precision:
-            super().set_precision(precision)
-            if self.custom_matrices:
-                self.custom_matrices = CustomMatrices(self.dtype)
-
-    def set_threads(self, nthreads):
-        import numba
-
-        numba.set_num_threads(nthreads)
-        self.nthreads = nthreads
-
-    # def cast(self, x, dtype=None, copy=False): Inherited from ``NumpyBackend``
-
-    # def to_numpy(self, x): Inherited from ``NumpyBackend``
-
-    def zero_state(self, nqubits):
-        size = 2**nqubits
-        state = np.empty((size,), dtype=self.dtype)
-        return self.ops.initial_state_vector(state)
-
-    def zero_density_matrix(self, nqubits):
-        size = 2**nqubits
-        state = np.empty((size, size), dtype=self.dtype)
-        return self.ops.initial_density_matrix(state)
-
-    # def plus_state(self, nqubits): Inherited from ``NumpyBackend``
-
-    # def plus_density_matrix(self, nqubits): Inherited from ``NumpyBackend``
-
-    # def asmatrix_special(self, gate): Inherited from ``NumpyBackend``
-
-    # def control_matrix(self, gate): Inherited from ``NumpyBackend``
-
-    def one_qubit_base(self, state, nqubits, target, kernel, gate, qubits):
-        ncontrols = len(qubits) - 1 if qubits is not None else 0
-        m = nqubits - target - 1
-        nstates = 1 << (nqubits - ncontrols - 1)
-        if ncontrols:
-            kernel = getattr(self.gates, "multicontrol_{}_kernel".format(kernel))
-            return kernel(state, gate, qubits, nstates, m)
-        kernel = getattr(self.gates, "{}_kernel".format(kernel))
-        return kernel(state, gate, nstates, m)
-
-    def two_qubit_base(self, state, nqubits, target1, target2, kernel, gate, qubits):
-        ncontrols = len(qubits) - 2 if qubits is not None else 0
-        if target1 > target2:
-            swap_targets = True
-            m1 = nqubits - target1 - 1
-            m2 = nqubits - target2 - 1
-        else:
-            swap_targets = False
-            m1 = nqubits - target2 - 1
-            m2 = nqubits - target1 - 1
-        nstates = 1 << (nqubits - 2 - ncontrols)
-        if ncontrols:
-            kernel = getattr(self.gates, "multicontrol_{}_kernel".format(kernel))
-            return kernel(state, gate, qubits, nstates, m1, m2, swap_targets)
-        kernel = getattr(self.gates, "{}_kernel".format(kernel))
-        return kernel(state, gate, nstates, m1, m2, swap_targets)
-
-    def multi_qubit_base(self, state, nqubits, targets, gate, qubits):
-        if qubits is None:
-            qubits = np.array(sorted(nqubits - q - 1 for q in targets), dtype="int32")
-        nstates = 1 << (nqubits - len(qubits))
-        targets = np.array(
-            [1 << (nqubits - t - 1) for t in targets[::-1]], dtype="int64"
-        )
-        if len(targets) > 5:
-            kernel = self.gates.apply_multi_qubit_gate_kernel
-        else:
-            kernel = self.multi_qubit_kernels.get(len(targets))
-        return kernel(state, gate, qubits, nstates, targets)
-
-    @staticmethod
-    def _create_qubits_tensor(gate, nqubits):
-        # TODO: Treat density matrices
-        qubits = [nqubits - q - 1 for q in gate.control_qubits]
-        qubits.extend(nqubits - q - 1 for q in gate.target_qubits)
-        return np.array(sorted(qubits), dtype="int32")
-
-    def _as_custom_matrix(self, gate):
-        name = gate.__class__.__name__
-        if isinstance(gate, ParametrizedGate):
-            return getattr(self.custom_matrices, name)(*gate.parameters)
-        elif isinstance(gate, FusedGate):  # pragma: no cover
-            # fusion is tested in qibo tests
-            return self.asmatrix_fused(gate)
-        else:
-            return getattr(self.custom_matrices, name)
-
-    def apply_gate(self, gate, state, nqubits):
-        matrix = self._as_custom_matrix(gate)
-        qubits = self._create_qubits_tensor(gate, nqubits)
-        targets = gate.target_qubits
-        state = self.cast(state)
-        if len(targets) == 1:
-            op = GATE_OPS.get(gate.__class__.__name__, "apply_gate")
-            return self.one_qubit_base(state, nqubits, *targets, op, matrix, qubits)
-        elif len(targets) == 2:
-            op = GATE_OPS.get(gate.__class__.__name__, "apply_two_qubit_gate")
-            return self.two_qubit_base(state, nqubits, *targets, op, matrix, qubits)
-        else:
-            return self.multi_qubit_base(state, nqubits, targets, matrix, qubits)
-
-    def apply_gate_density_matrix(self, gate, state, nqubits, inverse=False):
-        name = gate.__class__.__name__
-        if name == "Y":
-            return self._apply_ygate_density_matrix(gate, state, nqubits)
-        if inverse:
-            # used to reset the state when applying channels
-            # see :meth:`qibojit.backend.NumpyBackend.apply_channel_density_matrix` below
-            matrix = np.linalg.inv(gate.asmatrix(self))
-            matrix = self.cast(matrix)
-        else:
-            matrix = self._as_custom_matrix(gate)
-        qubits = self._create_qubits_tensor(gate, nqubits)
-        qubits_dm = qubits + nqubits
-        targets = gate.target_qubits
-        targets_dm = tuple(q + nqubits for q in targets)
-
-        state = self.cast(state)
-        shape = state.shape
-        if len(targets) == 1:
-            op = GATE_OPS.get(name, "apply_gate")
-            state = self.one_qubit_base(
-                state.ravel(), 2 * nqubits, *targets, op, matrix, qubits_dm
-            )
-            state = self.one_qubit_base(
-                state, 2 * nqubits, *targets_dm, op, np.conj(matrix), qubits
-            )
-        elif len(targets) == 2:
-            op = GATE_OPS.get(name, "apply_two_qubit_gate")
-            state = self.two_qubit_base(
-                state.ravel(), 2 * nqubits, *targets, op, matrix, qubits_dm
-            )
-            state = self.two_qubit_base(
-                state, 2 * nqubits, *targets_dm, op, np.conj(matrix), qubits
-            )
-        else:
-            state = self.multi_qubit_base(
-                state.ravel(), 2 * nqubits, targets, matrix, qubits_dm
-            )
-            state = self.multi_qubit_base(
-                state, 2 * nqubits, targets_dm, np.conj(matrix), qubits
-            )
-        return np.reshape(state, shape)
-
-    def _apply_ygate_density_matrix(self, gate, state, nqubits):
-        matrix = self._as_custom_matrix(gate)
-        qubits = self._create_qubits_tensor(gate, nqubits)
-        qubits_dm = qubits + nqubits
-        targets = gate.target_qubits
-        targets_dm = tuple(q + nqubits for q in targets)
-        state = self.cast(state)
-        shape = state.shape
-        state = self.one_qubit_base(
-            state.ravel(), 2 * nqubits, *targets, "apply_y", matrix, qubits_dm
-        )
-        # force using ``apply_gate`` kernel so that conjugate is properly applied
-        state = self.one_qubit_base(
-            state, 2 * nqubits, *targets_dm, "apply_gate", np.conj(matrix), qubits
-        )
-        return np.reshape(state, shape)
-
-    # def apply_channel(self, gate): Inherited from ``NumpyBackend``
-
-    def apply_channel_density_matrix(self, channel, state, nqubits):
-        state = self.cast(state)
-        if isinstance(channel, ReadoutErrorChannel) is True:
-            state_copy = self.cast(state, copy=True)
-        new_state = (1 - channel.coefficient_sum) * state
-        for coeff, gate in zip(channel.coefficients, channel.gates):
-            state = self.apply_gate_density_matrix(gate, state, nqubits)
-            new_state += coeff * state
-            # reset the state
-            if isinstance(channel, ReadoutErrorChannel) is True:
-                state = self.cast(state_copy, copy=True)
-            else:
-                state = self.apply_gate_density_matrix(
-                    gate, state, nqubits, inverse=True
-                )
-        return new_state
-
-    def collapse_state(self, state, qubits, shot, nqubits, normalize=True):
-        state = self.cast(state)
-        qubits = self.cast([nqubits - q - 1 for q in reversed(qubits)], dtype="int32")
-        if normalize:
-            return self.ops.collapse_state_normalized(state, qubits, int(shot), nqubits)
-        else:
-            return self.ops.collapse_state(state, qubits, int(shot), nqubits)
-
-    def collapse_density_matrix(self, state, qubits, shot, nqubits, normalize=True):
-        state = self.cast(state)
-        shape = state.shape
-        dm_qubits = [q + nqubits for q in qubits]
-        state = self.collapse_state(state.ravel(), dm_qubits, shot, 2 * nqubits, False)
-        state = self.collapse_state(state, qubits, shot, 2 * nqubits, False)
-        state = self.np.reshape(state, shape)
-        if normalize:
-            state = state / self.np.trace(state)
-        return state
-
-    # def calculate_probabilities(self, state, qubits, nqubits): Inherited from ``NumpyBackend``
-
-    # def sample_shots(self, probabilities, nshots): Inherited from ``NumpyBackend``
-
-    # def aggregate_shots(self, shots): Inherited from ``NumpyBackend``
-
-    # def samples_to_binary(self, samples, nqubits): Inherited from ``NumpyBackend``
-
-    # def samples_to_decimal(self, samples, nqubits): Inherited from ``NumpyBackend``
-
-    def sample_frequencies(self, probabilities, nshots):
-        from qibo.config import SHOT_METROPOLIS_THRESHOLD
-
-        if nshots < SHOT_METROPOLIS_THRESHOLD:
-            return super().sample_frequencies(probabilities, nshots)
-
-        import collections
-
-        seed = np.random.randint(0, int(1e8), dtype="int64")
-        nqubits = int(np.log2(tuple(probabilities.shape)[0]))
-        frequencies = np.zeros(2**nqubits, dtype="int64")
-        # always fall back to numba CPU backend because for ops not implemented on GPU
-        frequencies = self.measure_frequencies_op(
-            frequencies, probabilities, nshots, nqubits, seed, self.nthreads
-        )
-        return collections.Counter({i: f for i, f in enumerate(frequencies) if f > 0})
-
-    # def calculate_frequencies(self, samples): Inherited from ``NumpyBackend``
-
-    # def assert_allclose(self, value, target, rtol=1e-7, atol=0.0): Inherited from ``NumpyBackend``

From 921828106a8cd46a96f33f1b10e96773dd0f23ca Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Tue, 6 Feb 2024 16:30:24 +0800
Subject: [PATCH 39/85] Add the tensor network library list

---
 README.md | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 54ff75d..117d6a2 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,21 @@
-Qibotn is the tensor-network translation module for Qibo to support large-scale simulation of quantum circuits and acceleration.
+Qibotn is the tensor network translation module for Qibo to support large-scale simulation of quantum circuits and acceleration.
 
 To get started, `python setup.py install` to install the tools and dependencies.
 
-# Computation Supported
+# Supported Computation
 
-1. Tensornet (TN) with contractions to:
-    - dense vector
-    - expecation of given Pauli string
+Tensor network contractions to:
+- dense vectors
+- expecation values of given Pauli string
 
-    For each TN case:
-    - single node
-    - multi node with Message Passing Interface (MPI)
-    - multi node with NCCL
+The supported configuration are:
+- single node
+- multi node with Message Passing Interface (MPI)
+- multi node with NVIDIA Collective Communications Library (NCCL)
+
+Currently the supported libraries are:
+ - [cuQuantum](https://github.com/NVIDIA/cuQuantum)
+ - [quimb](https://quimb.readthedocs.io/en/latest/)
 
 2. Tensornet (TN) with contractions to:
     - dense vector (single node)

From 6a0459ee838c6a346881c847f3baf04aaf2328ae Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Tue, 6 Feb 2024 17:48:53 +0800
Subject: [PATCH 40/85] Added class for quimb backend

---
 src/qibotn/backends/cpu.py | 144 +++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 src/qibotn/backends/cpu.py

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
new file mode 100644
index 0000000..d826ba1
--- /dev/null
+++ b/src/qibotn/backends/cpu.py
@@ -0,0 +1,144 @@
+import numpy as np
+
+from qibo.backends.numpy import NumpyBackend
+from qibo.states import CircuitResult
+from qibo.config import raise_error
+
+
+class QuTensorNet(NumpyBackend):  
+
+    def __init__(self, runcard):
+        super().__init__()
+        import quimb  # pylint: disable=import-error
+
+        if runcard is not None:
+            self.MPI_enabled = runcard.get("MPI_enabled", False)
+            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
+            self.expectation_enabled_value = runcard.get("expectation_enabled", False)
+
+
+            mps_enabled_value = runcard.get("MPS_enabled")
+            if mps_enabled_value is True:
+                self.MPS_enabled = True
+            elif mps_enabled_value is False:
+                self.MPS_enabled = False
+            else:
+                raise TypeError("MPS_enabled has an unexpected type")
+
+        else:
+            self.MPI_enabled = False
+            self.MPS_enabled = False
+            self.NCCL_enabled = False
+            self.expectation_enabled = False
+
+        self.name = "qibotn"
+        self.quimb = quimb
+        self.platform = "qutensornet"
+        self.versions["quimb"] = self.quimb.__version__
+   
+
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+
+
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+
+        """
+
+        import qibotn.eval_qu as eval_qu
+
+        if (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+
+            state = eval.dense_vector_tn_qu(circuit, init_state, is_mps=False, backend="numpy")
+
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == True
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+            
+
+            state = eval.dense_vector_tn_qu(circuit, init_state, is_mps=True, backend="numpy")
+
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+           
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
+
+
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == False
+        ):
+           
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support NCCL.")
+
+
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
+        ):
+           
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
+
+
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
+        ):
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
+
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == True
+        ):
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
+        else:
+            raise_error(NotImplementedError, "Compute type not supported.")
+
+        if return_array:
+            return state.flatten()
+        else:
+            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
+            return circuit._final_state

From 4fe1684144a9533a8157da183c3151d981d41416 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Tue, 6 Feb 2024 17:52:55 +0800
Subject: [PATCH 41/85] Added quimb backend

---
 src/qibotn/backends/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index 26a0b9d..f927932 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1 +1,2 @@
 from qibotn.backends.gpu import CuTensorNet
+from qibotn.backends.cpu import QuTensorNet

From a7574192b63d40c5636da5ebcce022ec75beccf1 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Tue, 6 Feb 2024 17:54:24 +0800
Subject: [PATCH 42/85] renamed function 'eval' to 'dense_vector_tn_qu'

---
 src/qibotn/eval_qu.py | 49 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 src/qibotn/eval_qu.py

diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
new file mode 100644
index 0000000..9dc639c
--- /dev/null
+++ b/src/qibotn/eval_qu.py
@@ -0,0 +1,49 @@
+import numpy as np
+import quimb.tensor as qtn
+from qibo.models import Circuit as QiboCircuit
+
+
+def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
+              cutoff=1e-6, cutoff_mode='abs'):
+    nqubits = circuit.nqubits
+    gate_opt = {}
+    if is_mps:
+        tncirc = qtn.CircuitMPS(nqubits, psi0=psi0)
+        gate_opt["method"] = method
+        gate_opt["cutoff"] = cutoff
+        gate_opt["cutoff_mode"] = cutoff_mode
+    else:
+        tncirc = qtn.Circuit(nqubits, psi0=psi0)
+
+    for gate in circuit.queue:
+        tncirc.apply_gate(
+            gate.name,
+            *gate.parameters,
+            *gate.qubits,
+            parametrize=False if is_mps else (len(gate.parameters) > 0),
+            **gate_opt
+        )
+
+    return tncirc
+
+
+def init_state_tn(nqubits, init_state_sv):
+    dims = tuple(2 * np.ones(nqubits, dtype=int))
+
+    return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
+
+
+def dense_vector_tn_qu(qasm: str, init_state, is_mps, backend="numpy"):
+    """Evaluate QASM with Quimb
+
+    backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
+
+    """
+    circuit = QiboCircuit.from_qasm(qasm)
+    if init_state is not None:
+        init_state = init_state_tn(circuit.nqubits, init_state)
+    circ_quimb = from_qibo(circuit, is_mps, psi0=init_state)
+    interim = circ_quimb.psi.full_simplify(seq="DRC")
+    amplitudes = interim.to_dense(backend=backend).flatten()
+
+    return amplitudes

From 5ee13f7190030d07e49dcb2ac82077a6317a6be8 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Tue, 6 Feb 2024 17:56:29 +0800
Subject: [PATCH 43/85] renamed function 'eval' to 'dense_vector_tn_qu' in
 pytest

---
 tests/test_qasm_quimb_backend.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_qasm_quimb_backend.py b/tests/test_qasm_quimb_backend.py
index 5417115..e7dc6e6 100644
--- a/tests/test_qasm_quimb_backend.py
+++ b/tests/test_qasm_quimb_backend.py
@@ -27,7 +27,7 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     # TODO: remove completely, or at least delegate to the backend
     # implementation
     os.environ["QUIMB_NUM_PROCS"] = str(os.cpu_count())
-    import qibotn.quimb
+    import qibotn.eval_qu
 
     init_state = create_init_state(nqubits=nqubits)
     init_state_tn = copy.deepcopy(init_state)
@@ -45,7 +45,7 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     qasm_circ = qibo_circ.to_qasm()
 
     # Test quimb
-    result_tn = qibotn.quimb.eval(
+    result_tn = qibotn.eval_qu.dense_vector_tn_qu(
             qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
         )
    

From 8e73b8c172c15775f5c4761c6af67d74a8646e0b Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Wed, 7 Feb 2024 16:48:18 +0800
Subject: [PATCH 44/85] Minor update in the supported configurations and tensor
 network library list

---
 README.md | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 117d6a2..74e1e3a 100644
--- a/README.md
+++ b/README.md
@@ -8,17 +8,15 @@ Tensor network contractions to:
 - dense vectors
 - expecation values of given Pauli string
 
-The supported configuration are:
-- single node
-- multi node with Message Passing Interface (MPI)
-- multi node with NVIDIA Collective Communications Library (NCCL)
+The supported configurations are:
+- single-node CPU
+- single-node GPU or GPUs
+- multi-node multi-GPU with Message Passing Interface (MPI)
+- multi-node multi-GPU with NVIDIA Collective Communications Library (NCCL)
 
-Currently the supported libraries are:
- - [cuQuantum](https://github.com/NVIDIA/cuQuantum)
- - [quimb](https://quimb.readthedocs.io/en/latest/)
-
-2. Tensornet (TN) with contractions to:
-    - dense vector (single node)
+Currently, the supported tensor network libraries are:
+ - [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
+ - [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
 
 # Sample Codes
 ## Single Node

From 853f721a2ed9b02ccb7bf951bf3dad8647d83cae Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Wed, 7 Feb 2024 17:51:44 +0800
Subject: [PATCH 45/85] Format update using black formatter

---
 src/qibotn/backends/cpu.py | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index d826ba1..b6b5cf6 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -5,7 +5,7 @@ from qibo.states import CircuitResult
 from qibo.config import raise_error
 
 
-class QuTensorNet(NumpyBackend):  
+class QuTensorNet(NumpyBackend):
 
     def __init__(self, runcard):
         super().__init__()
@@ -16,7 +16,6 @@ class QuTensorNet(NumpyBackend):
             self.NCCL_enabled = runcard.get("NCCL_enabled", False)
             self.expectation_enabled_value = runcard.get("expectation_enabled", False)
 
-
             mps_enabled_value = runcard.get("MPS_enabled")
             if mps_enabled_value is True:
                 self.MPS_enabled = True
@@ -35,7 +34,6 @@ class QuTensorNet(NumpyBackend):
         self.quimb = quimb
         self.platform = "qutensornet"
         self.versions["quimb"] = self.quimb.__version__
-   
 
     def apply_gate(self, gate, state, nqubits):  # pragma: no cover
         raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
@@ -46,13 +44,10 @@ class QuTensorNet(NumpyBackend):
     def assign_measurements(self, measurement_map, circuit_result):
         raise_error(NotImplementedError, "Not implemented in QiboTN.")
 
-
     def set_precision(self, precision):
         if precision != self.precision:
             super().set_precision(precision)
 
-
-
     def execute_circuit(
         self, circuit, initial_state=None, nshots=None, return_array=False
     ):  # pragma: no cover
@@ -77,7 +72,9 @@ class QuTensorNet(NumpyBackend):
             and self.expectation_enabled == False
         ):
 
-            state = eval.dense_vector_tn_qu(circuit, init_state, is_mps=False, backend="numpy")
+            state = eval.dense_vector_tn_qu(
+                circuit, init_state, is_mps=False, backend="numpy"
+            )
 
         elif (
             self.MPI_enabled == False
@@ -85,9 +82,10 @@ class QuTensorNet(NumpyBackend):
             and self.NCCL_enabled == False
             and self.expectation_enabled == False
         ):
-            
 
-            state = eval.dense_vector_tn_qu(circuit, init_state, is_mps=True, backend="numpy")
+            state = eval.dense_vector_tn_qu(
+                circuit, init_state, is_mps=True, backend="numpy"
+            )
 
         elif (
             self.MPI_enabled == True
@@ -95,19 +93,19 @@ class QuTensorNet(NumpyBackend):
             and self.NCCL_enabled == False
             and self.expectation_enabled == False
         ):
-           
+
             raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
 
-
         elif (
             self.MPI_enabled == False
             and self.MPS_enabled == False
             and self.NCCL_enabled == True
             and self.expectation_enabled == False
         ):
-           
-            raise_error(NotImplementedError, "QiboTN quimb backend cannot support NCCL.")
 
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support NCCL."
+            )
 
         elif (
             self.MPI_enabled == False
@@ -115,9 +113,10 @@ class QuTensorNet(NumpyBackend):
             and self.NCCL_enabled == False
             and self.expectation_enabled == True
         ):
-           
-            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
 
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support expectation"
+            )
 
         elif (
             self.MPI_enabled == True
@@ -125,7 +124,9 @@ class QuTensorNet(NumpyBackend):
             and self.NCCL_enabled == False
             and self.expectation_enabled == True
         ):
-            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support expectation"
+            )
 
         elif (
             self.MPI_enabled == False
@@ -133,7 +134,9 @@ class QuTensorNet(NumpyBackend):
             and self.NCCL_enabled == True
             and self.expectation_enabled == True
         ):
-            raise_error(NotImplementedError, "QiboTN quimb backend cannot support expectation")
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support expectation"
+            )
         else:
             raise_error(NotImplementedError, "Compute type not supported.")
 

From c370c920a3c6c689fe2bae787ff916929dd38ebe Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Wed, 7 Feb 2024 17:59:10 +0800
Subject: [PATCH 46/85] Format update using black formatter

---
 tests/test_cuquantum_cutensor_backend.py | 36 +++++++++++-------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py
index 3de5c17..57b3f34 100644
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -35,18 +35,16 @@ def test_eval(nqubits: int, dtype="complex128"):
     import qibotn.eval
 
     # Test qibo
-    qibo.set_backend(backend=config.qibo.backend,
-                     platform=config.qibo.platform)
-    qibo_time, (qibo_circ, result_sv) = time(
-        lambda: qibo_qft(nqubits, swaps=True))
+    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
+    qibo_time, (qibo_circ, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))
 
     # Test Cuquantum
     cutn_time, result_tn = time(
-        lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten())
+        lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten()
+    )
 
     assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time
-    assert np.allclose(
-        result_sv, result_tn), "Resulting dense vectors do not match"
+    assert np.allclose(result_sv, result_tn), "Resulting dense vectors do not match"
 
 
 @pytest.mark.gpu
@@ -62,25 +60,25 @@ def test_mps(nqubits: int, dtype="complex128"):
     import qibotn.eval
 
     # Test qibo
-    qibo.set_backend(backend=config.qibo.backend,
-                     platform=config.qibo.platform)
+    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
 
-    qibo_time, (circ_qibo, result_sv) = time(
-        lambda: qibo_qft(nqubits, swaps=True))
+    qibo_time, (circ_qibo, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))
 
     result_sv_cp = cp.asarray(result_sv)
 
     # Test of MPS
-    gate_algo = {'qr_method': False,
-                 'svd_method': {
-                     'partition': 'UV',
-                     'abs_cutoff': 1e-12,
-                 }}
+    gate_algo = {
+        "qr_method": False,
+        "svd_method": {
+            "partition": "UV",
+            "abs_cutoff": 1e-12,
+        },
+    }
 
     cutn_time, result_tn = time(
-        lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten())
+        lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten()
+    )
 
-    print(
-        f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")
+    print(f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")
 
     assert cp.allclose(result_tn, result_sv_cp)

From d4e75b94a403842e00b5b8267404571de17071e8 Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Wed, 7 Feb 2024 11:10:44 +0100
Subject: [PATCH 47/85] ci: Update pre-commit configurations, introduce
 pre-commit ci

---
 .pre-commit-config.yaml | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 177c408..9d8b368 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,25 +1,48 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
+ci:
+  autofix_prs: true
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
       - id: check-toml
-      - id: check-merge-conflict
       - id: debug-statements
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 24.1.1
     hooks:
       - id: black
   - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         args: ["--profile", "black"]
+  - repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        additional_dependencies: [tomli]
+        args: [--in-place, --config, ./pyproject.toml]
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.15.0
     hooks:
       - id: pyupgrade
+  - repo: https://github.com/hadialqattan/pycln
+    rev: v2.4.0
+    hooks:
+      - id: pycln
+        args:
+          - --config=pyproject.toml
+          - --all
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: 1.16.0
+    hooks:
+      - id: blacken-docs
+  - repo: https://github.com/pycqa/pydocstyle
+    rev: 6.3.0
+    hooks:
+      - id: pydocstyle
+        args:
+          - --select=D103,D200,D206,D300,D301
+        files: ^src/

From c46909a7466c58826ea9907f1ce8a480451d45f3 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 11:46:32 +0800
Subject: [PATCH 48/85] removed quimb.py as it is duplicate of eval_qu.py

---
 src/qibotn/quimb.py | 48 ---------------------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 src/qibotn/quimb.py

diff --git a/src/qibotn/quimb.py b/src/qibotn/quimb.py
deleted file mode 100644
index a9ac510..0000000
--- a/src/qibotn/quimb.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import numpy as np
-import quimb.tensor as qtn
-from qibo.models import Circuit as QiboCircuit
-
-
-def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
-              cutoff=1e-6, cutoff_mode='abs'):
-    nqubits = circuit.nqubits
-    gate_opt = {}
-    if is_mps:
-        tncirc = qtn.CircuitMPS(nqubits, psi0=psi0)
-        gate_opt["method"] = method
-        gate_opt["cutoff"] = cutoff
-        gate_opt["cutoff_mode"] = cutoff_mode
-    else:
-        tncirc = qtn.Circuit(nqubits, psi0=psi0)
-
-    for gate in circuit.queue:
-        tncirc.apply_gate(
-            gate.name,
-            *gate.parameters,
-            *gate.qubits,
-            parametrize=False if is_mps else (len(gate.parameters) > 0),
-            **gate_opt
-        )
-
-    return tncirc
-
-
-def init_state_tn(nqubits, init_state_sv):
-    dims = tuple(2 * np.ones(nqubits, dtype=int))
-
-    return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
-
-
-def eval(qasm: str, init_state, is_mps, backend="numpy"):
-    """Evaluate QASM with Quimb
-
-    backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
-
-    """
-    circuit = QiboCircuit.from_qasm(qasm)
-    init_state_mps = init_state_tn(circuit.nqubits, init_state)
-    circ_quimb = from_qibo(circuit, is_mps, psi0=init_state_mps)
-    interim = circ_quimb.psi.full_simplify(seq="DRC")
-    amplitudes = interim.to_dense(backend=backend).flatten()
-
-    return amplitudes

From 546cac6956be1da01e6699ce0ba6713c19f738ed Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 11:47:25 +0800
Subject: [PATCH 49/85] removed __main__.py as it is no longer used

---
 src/qibotn/__main__.py | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 src/qibotn/__main__.py

diff --git a/src/qibotn/__main__.py b/src/qibotn/__main__.py
deleted file mode 100644
index 0476be5..0000000
--- a/src/qibotn/__main__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import argparse
-
-import qibotn.quimb
-
-
-def parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--nqubits", default=10, type=int, help="Number of quibits in the circuits."
-    )
-    return parser.parse_args()
-
-
-def main(args: argparse.Namespace):
-    print("Testing for %d nqubits" % (args.nqubits))
-    qibotn.quimb.eval(args.nqubits, args.qasm_circ, args.init_state)
-
-
-if __name__ == "__main__":
-    main(parser())

From 84a6a035ba88f07129b9651df79662467de0f071 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 12:06:38 +0800
Subject: [PATCH 50/85] added missing docstring

---
 src/qibotn/eval_qu.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 9dc639c..78d84ef 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -5,6 +5,8 @@ from qibo.models import Circuit as QiboCircuit
 
 def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
               cutoff=1e-6, cutoff_mode='abs'):
+    """Create a tensornetwork representation of the circuit"""
+
     nqubits = circuit.nqubits
     gate_opt = {}
     if is_mps:
@@ -28,6 +30,9 @@ def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
 
 
 def init_state_tn(nqubits, init_state_sv):
+
+    """Create a matrixproductstate directly from a dense vector"""
+
     dims = tuple(2 * np.ones(nqubits, dtype=int))
 
     return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)

From 2422f1face53e38a90d1e858f0d6628b1d6e3fd4 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 12:07:30 +0800
Subject: [PATCH 51/85] added missing docstring

---
 tests/test_qasm_quimb_backend.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/test_qasm_quimb_backend.py b/tests/test_qasm_quimb_backend.py
index e7dc6e6..e01cde4 100644
--- a/tests/test_qasm_quimb_backend.py
+++ b/tests/test_qasm_quimb_backend.py
@@ -23,6 +23,14 @@ def qibo_qft(nqubits, init_state, swaps):
 @pytest.mark.parametrize("nqubits, tolerance, is_mps",
                          [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)])
 def test_eval(nqubits: int, tolerance: float, is_mps: bool):
+
+    """Evaluate circuit with Quimb backend.
+
+    Args:
+        nqubits (int): Total number of qubits in the system.
+        tolerance (float): Maximum limit allowed for difference in results
+        is_mps (bool): True if state is MPS and False for tensor network structure
+    """
     # hack quimb to use the correct number of processes
     # TODO: remove completely, or at least delegate to the backend
     # implementation
@@ -35,9 +43,7 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     # Test qibo
     qibo.set_backend(backend=config.qibo.backend,
                      platform=config.qibo.platform)
-    #qibo_time, (qibo_circ, result_sv) = time(
-        #lambda: qibo_qft(nqubits, init_state, swaps=True)
-    #)
+   
     qibo_circ, result_sv= qibo_qft(nqubits, init_state, swaps=True)
     
 

From 336702a555c5c862ff431e9ce83b9d34375ca810 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 13:09:19 +0800
Subject: [PATCH 52/85] Remove unused variable

---
 src/qibotn/QiboCircuitConvertor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index c59745b..5ac2fee 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -166,7 +166,6 @@ class QiboCircuitToEinsum:
         input_operands = self._get_bitstring_tensors(input_bitstring)
         pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
         pauli_map = pauli_string
-        coned_qubits = pauli_map.keys()
 
         (
             mode_labels,

From 91152d87dbba92970224b6164149b07736c1c3b8 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 13:57:10 +0800
Subject: [PATCH 53/85] Update sample codes to include Quimb

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 74e1e3a..b7532ac 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,9 @@ computation_settings = {
 }
 
 
-qibo.set_backend(backend="qibotn", runcard=computation_settings)
+qibo.set_backend(backend="qibotn", platform="cutensornet", runcard=computation_settings)  #cuQuantum
+# qibo.set_backend(backend="qibotn", platform="qutensornet", runcard=computation_settings) #quimb
+
 
 # Construct the circuit
 c = Circuit(2)

From 4937488e198b2cc40f30d9d4fb627b1c0dfb4c40 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 14:17:35 +0800
Subject: [PATCH 54/85] fixed some bugs

---
 src/qibotn/backends/cpu.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index b6b5cf6..2483d98 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -14,7 +14,7 @@ class QuTensorNet(NumpyBackend):
         if runcard is not None:
             self.MPI_enabled = runcard.get("MPI_enabled", False)
             self.NCCL_enabled = runcard.get("NCCL_enabled", False)
-            self.expectation_enabled_value = runcard.get("expectation_enabled", False)
+            self.expectation_enabled = runcard.get("expectation_enabled", False)
 
             mps_enabled_value = runcard.get("MPS_enabled")
             if mps_enabled_value is True:
@@ -63,7 +63,7 @@ class QuTensorNet(NumpyBackend):
 
         """
 
-        import qibotn.eval_qu as eval_qu
+        import qibotn.eval_qu as eval
 
         if (
             self.MPI_enabled == False
@@ -73,7 +73,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit, init_state, is_mps=False, backend="numpy"
+                circuit, initial_state=None, is_mps=False, backend="numpy"
             )
 
         elif (
@@ -84,7 +84,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit, init_state, is_mps=True, backend="numpy"
+                circuit, initial_state=None, is_mps=True, backend="numpy"
             )
 
         elif (

From 929df9199faf86a13153e022ee68e452ef3e44b6 Mon Sep 17 00:00:00 2001
From: nitinshivaraman <nitin.shivaraman@gmail.com>
Date: Thu, 8 Feb 2024 15:34:14 +0800
Subject: [PATCH 55/85] Rename test_qasm_quimb_backend.py to
 test_quimb_backend.py for better readability

---
 tests/{test_qasm_quimb_backend.py => test_quimb_backend.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_qasm_quimb_backend.py => test_quimb_backend.py} (100%)

diff --git a/tests/test_qasm_quimb_backend.py b/tests/test_quimb_backend.py
similarity index 100%
rename from tests/test_qasm_quimb_backend.py
rename to tests/test_quimb_backend.py

From 4c692c14d0f89b390612bab12e8b0bb6871b7308 Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Thu, 8 Feb 2024 16:16:30 +0800
Subject: [PATCH 56/85] Minor naming update

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b7532ac..2e8dd94 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Tensor network contractions to:
 - dense vectors
 - expecation values of given Pauli string
 
-The supported configurations are:
+The supported HPC configurations are:
 - single-node CPU
 - single-node GPU or GPUs
 - multi-node multi-GPU with Message Passing Interface (MPI)

From 1668eb7baa741948a3f224e7ebde715e4da79115 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 16:17:39 +0800
Subject: [PATCH 57/85] fixed error caused by  missing qasm str error

---
 src/qibotn/backends/cpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index 2483d98..2df7883 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -73,7 +73,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit, initial_state=None, is_mps=False, backend="numpy"
+                circuit.to_qasm(), initial_state=None, is_mps=False, backend="numpy"
             )
 
         elif (
@@ -84,7 +84,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit, initial_state=None, is_mps=True, backend="numpy"
+                circuit.to_qasm(), initial_state=None, is_mps=True, backend="numpy"
             )
 
         elif (

From aaabd855a2acdf630a912d8c8aa4d25332fc3fcb Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 16:19:17 +0800
Subject: [PATCH 58/85] Add in tensor network types

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2e8dd94..cdf65d4 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,11 @@ To get started, `python setup.py install` to install the tools and dependencies.
 
 # Supported Computation
 
-Tensor network contractions to:
+Tensor Network Types:
+- Tensornet (TN)
+- Matrix Product States (MPS)
+
+Tensor Network contractions to:
 - dense vectors
 - expecation values of given Pauli string
 

From 604f11d2896c68e1ed5607452a51f222529af840 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 16:19:59 +0800
Subject: [PATCH 59/85] fixed error caused by initial_state

---
 src/qibotn/eval_qu.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 78d84ef..579a42a 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -38,17 +38,17 @@ def init_state_tn(nqubits, init_state_sv):
     return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
 
 
-def dense_vector_tn_qu(qasm: str, init_state, is_mps, backend="numpy"):
+def dense_vector_tn_qu(qasm: str, initial_state, is_mps,  backend="numpy"):
     """Evaluate QASM with Quimb
 
     backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
 
     """
     circuit = QiboCircuit.from_qasm(qasm)
-    if init_state is not None:
-        init_state = init_state_tn(circuit.nqubits, init_state)
-    circ_quimb = from_qibo(circuit, is_mps, psi0=init_state)
+    if initial_state is not None:
+        initial_state = init_state_tn(circuit.nqubits, initial_state)
+    circ_quimb = from_qibo(circuit, is_mps, psi0=initial_state)
     interim = circ_quimb.psi.full_simplify(seq="DRC")
-    amplitudes = interim.to_dense(backend=backend).flatten()
+    amplitudes = interim.to_dense(backend=backend)
 
     return amplitudes

From 773953e18b6c3b3ecea85f6ad3dca2e37c04d02e Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 16:22:38 +0800
Subject: [PATCH 60/85] added flatten of state in pytest instead of using
 within eval_qu

---
 tests/test_quimb_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_quimb_backend.py b/tests/test_quimb_backend.py
index e01cde4..81a0e2b 100644
--- a/tests/test_quimb_backend.py
+++ b/tests/test_quimb_backend.py
@@ -53,7 +53,7 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     # Test quimb
     result_tn = qibotn.eval_qu.dense_vector_tn_qu(
             qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
-        )
+        ).flatten()
    
 
     assert np.allclose(result_sv, result_tn,

From 8ee168c3108ec3e9e8dac50e881795d2e4534fd9 Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Thu, 8 Feb 2024 16:31:22 +0800
Subject: [PATCH 61/85] Minor update for consistency

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index cdf65d4..070ccb3 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Currently, the supported tensor network libraries are:
  - [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
 
 # Sample Codes
-## Single Node
+## Single-Node Example
 The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
 ```py
 import numpy as np
@@ -86,7 +86,7 @@ computation_settings = {
 }
 ```
 
-## Multi-Node
+## Multi-Node Example
 Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 
 

From fbd995d0d1e734834e28346e360637939ce919a3 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 16:34:46 +0800
Subject: [PATCH 62/85] minor update of initial state

---
 src/qibotn/backends/cpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index 2df7883..7115b39 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -73,7 +73,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state=None, is_mps=False, backend="numpy"
+                circuit.to_qasm(), initial_state, is_mps=False, backend="numpy"
             )
 
         elif (
@@ -84,7 +84,7 @@ class QuTensorNet(NumpyBackend):
         ):
 
             state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state=None, is_mps=True, backend="numpy"
+                circuit.to_qasm(), initial_state, is_mps=True, backend="numpy"
             )
 
         elif (

From 4982fbf9262aaac28550b4ddc696da1989b7f04f Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Wed, 7 Feb 2024 11:13:55 +0100
Subject: [PATCH 63/85] build: Add Nix files

---
 .envrc     |   9 ++
 .gitignore |   1 +
 flake.lock | 323 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 flake.nix  |  62 ++++++++++
 4 files changed, 395 insertions(+)
 create mode 100644 .envrc
 create mode 100644 flake.lock
 create mode 100644 flake.nix

diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..01f5f41
--- /dev/null
+++ b/.envrc
@@ -0,0 +1,9 @@
+if ! has nix_direnv_version || ! nix_direnv_version 2.2.1; then
+  source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/2.2.1/direnvrc" "sha256-zelF0vLbEl5uaqrfIzbgNzJWGmLzCmYAkInj/LNxvKs="
+fi
+
+nix_direnv_watch_file flake.nix
+nix_direnv_watch_file flake.lock
+if ! use flake . --impure; then
+  echo "devenv could not be built. The devenv environment was not loaded. Make the necessary changes to devenv.nix and hit enter to try again." >&2
+fi
diff --git a/.gitignore b/.gitignore
index d903da0..7f051b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,3 +159,4 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+.devenv
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..0f48ef0
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,323 @@
+{
+  "nodes": {
+    "devenv": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "nix": "nix",
+        "nixpkgs": "nixpkgs",
+        "pre-commit-hooks": "pre-commit-hooks"
+      },
+      "locked": {
+        "lastModified": 1707004164,
+        "narHash": "sha256-9Hr8onWtvLk5A8vCEkaE9kxA0D7PR62povFokM1oL5Q=",
+        "owner": "cachix",
+        "repo": "devenv",
+        "rev": "0e68853bb27981a4ffd7a7225b59ed84f7180fc7",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "devenv",
+        "type": "github"
+      }
+    },
+    "flake-compat": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1673956053,
+        "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-compat_2": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1696426674,
+        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1685518550,
+        "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "flake-utils_2": {
+      "inputs": {
+        "systems": "systems_2"
+      },
+      "locked": {
+        "lastModified": 1701680307,
+        "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
+        "type": "github"
+      },
+      "original": {
+        "id": "flake-utils",
+        "type": "indirect"
+      }
+    },
+    "gitignore": {
+      "inputs": {
+        "nixpkgs": [
+          "devenv",
+          "pre-commit-hooks",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1660459072,
+        "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=",
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "type": "github"
+      }
+    },
+    "lowdown-src": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1633514407,
+        "narHash": "sha256-Dw32tiMjdK9t3ETl5fzGrutQTzh2rufgZV4A/BbxuD4=",
+        "owner": "kristapsdz",
+        "repo": "lowdown",
+        "rev": "d2c2b44ff6c27b936ec27358a2653caaef8f73b8",
+        "type": "github"
+      },
+      "original": {
+        "owner": "kristapsdz",
+        "repo": "lowdown",
+        "type": "github"
+      }
+    },
+    "nix": {
+      "inputs": {
+        "lowdown-src": "lowdown-src",
+        "nixpkgs": [
+          "devenv",
+          "nixpkgs"
+        ],
+        "nixpkgs-regression": "nixpkgs-regression"
+      },
+      "locked": {
+        "lastModified": 1676545802,
+        "narHash": "sha256-EK4rZ+Hd5hsvXnzSzk2ikhStJnD63odF7SzsQ8CuSPU=",
+        "owner": "domenkozar",
+        "repo": "nix",
+        "rev": "7c91803598ffbcfe4a55c44ac6d49b2cf07a527f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "domenkozar",
+        "ref": "relaxed-flakes",
+        "repo": "nix",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1678875422,
+        "narHash": "sha256-T3o6NcQPwXjxJMn2shz86Chch4ljXgZn746c2caGxd8=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "126f49a01de5b7e35a43fd43f891ecf6d3a51459",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs-python": {
+      "inputs": {
+        "flake-compat": "flake-compat_2",
+        "flake-utils": "flake-utils_2",
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1707114737,
+        "narHash": "sha256-ZXqv2epXAjDjfWbYn+yy4VOmW+C7SuUBoiZkkDoSqA4=",
+        "owner": "cachix",
+        "repo": "nixpkgs-python",
+        "rev": "f34ed02276bc08fe1c91c1bf0ef3589d68028878",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "nixpkgs-python",
+        "type": "github"
+      }
+    },
+    "nixpkgs-regression": {
+      "locked": {
+        "lastModified": 1643052045,
+        "narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
+        "type": "github"
+      }
+    },
+    "nixpkgs-stable": {
+      "locked": {
+        "lastModified": 1685801374,
+        "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "c37ca420157f4abc31e26f436c1145f8951ff373",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-23.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs_2": {
+      "locked": {
+        "lastModified": 1707092692,
+        "narHash": "sha256-ZbHsm+mGk/izkWtT4xwwqz38fdlwu7nUUKXTOmm4SyE=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "faf912b086576fd1a15fca610166c98d47bc667e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "pre-commit-hooks": {
+      "inputs": {
+        "flake-compat": [
+          "devenv",
+          "flake-compat"
+        ],
+        "flake-utils": "flake-utils",
+        "gitignore": "gitignore",
+        "nixpkgs": [
+          "devenv",
+          "nixpkgs"
+        ],
+        "nixpkgs-stable": "nixpkgs-stable"
+      },
+      "locked": {
+        "lastModified": 1704725188,
+        "narHash": "sha256-qq8NbkhRZF1vVYQFt1s8Mbgo8knj+83+QlL5LBnYGpI=",
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "rev": "ea96f0c05924341c551a797aaba8126334c505d2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "devenv": "devenv",
+        "nixpkgs": "nixpkgs_2",
+        "nixpkgs-python": "nixpkgs-python",
+        "systems": "systems_3"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_2": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_3": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..92f53ee
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,62 @@
+{
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    systems.url = "github:nix-systems/default";
+    devenv.url = "github:cachix/devenv";
+    nixpkgs-python = {
+      url = "github:cachix/nixpkgs-python";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+  };
+
+  outputs = {
+    self,
+    nixpkgs,
+    devenv,
+    systems,
+    ...
+  } @ inputs: let
+    forEachSystem = nixpkgs.lib.genAttrs (import systems);
+  in {
+    # packages = forEachSystem (system: {
+    #   default =
+    #     nixpkgs.legacyPackages.${system}.poetry2nix.mkPoetryApplication
+    #     {
+    #       projectDir = self;
+    #       preferWheels = true;
+    #     };
+    # });
+
+    devShells =
+      forEachSystem
+      (system: let
+        pkgs = nixpkgs.legacyPackages.${system};
+      in {
+        default = devenv.lib.mkShell {
+          inherit inputs pkgs;
+
+          modules = [
+            {
+              packages = with pkgs; [pre-commit poethepoet stdenv.cc.cc.lib];
+
+              languages.python = {
+                enable = true;
+                # poetry = {
+                #   enable = true;
+                #   install.enable = true;
+                #   install.groups = ["dev" "tests"];
+                #   install.allExtras = true;
+                # };
+                version = "3.11";
+              };
+            }
+          ];
+        };
+      });
+  };
+
+  nixConfig = {
+    extra-trusted-public-keys = "devenv.cachix.org-1:w1cLUi8dv3hnoSPGAuibQv+f9TZLr6cv/Hm9XgU50cw=";
+    extra-substituters = "https://devenv.cachix.org";
+  };
+}

From c3a4a544b5e899af7f20c3a0f3fc5e373062515f Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Wed, 7 Feb 2024 11:20:58 +0100
Subject: [PATCH 64/85] fix: Fix dependencies issues introducing additional
 ones

---
 .pre-commit-config.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d8b368..b65aeed 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
     rev: v1.7.5
     hooks:
       - id: docformatter
-        additional_dependencies: [tomli]
+        additional_dependencies: [tomli, charset-normalizer]
         args: [--in-place, --config, ./pyproject.toml]
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.15.0
@@ -39,6 +39,8 @@ repos:
     rev: 1.16.0
     hooks:
       - id: blacken-docs
+        additional_dependencies:
+          - platformdirs
   - repo: https://github.com/pycqa/pydocstyle
     rev: 6.3.0
     hooks:

From e496f9f156d5d9bc1270cf8aebeae9811f26ce0b Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Wed, 7 Feb 2024 11:21:27 +0100
Subject: [PATCH 65/85] chore: Run pre-commit on all files

---
 README.md                                |  4 +-
 setup.py                                 |  9 ++-
 src/qibotn/MPSUtils.py                   | 13 +---
 src/qibotn/QiboCircuitConvertor.py       | 16 ++--
 src/qibotn/QiboCircuitToMPS.py           |  4 +-
 src/qibotn/backends/__init__.py          |  2 +-
 src/qibotn/backends/gpu.py               |  4 +-
 src/qibotn/eval.py                       | 93 ++++++++++++++++--------
 src/qibotn/mps_contraction_helper.py     | 17 ++---
 tests/test_cuquantum_cutensor_backend.py |  2 +-
 10 files changed, 92 insertions(+), 72 deletions(-)

diff --git a/README.md b/README.md
index 070ccb3..9f60ad3 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ import qibo
 
 # Below shows how to set the computation_settings
 # Note that for MPS_enabled and expectation_enabled parameters the accepted inputs are boolean or a dictionary with the format shown below.
-# If computation_settings is not specified, the default setting is used in which all booleans will be False. 
+# If computation_settings is not specified, the default setting is used in which all booleans will be False.
 # This will trigger the dense vector computation of the tensornet.
 
 computation_settings = {
@@ -92,4 +92,4 @@ Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in
 
 ```sh
 mpirun -n 4 -hostfile $node_list python test.py
-```
\ No newline at end of file
+```
diff --git a/setup.py b/setup.py
index 13285eb..0f619a5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-from setuptools import setup, find_packages
-import re
 import pathlib
+import re
+
+from setuptools import find_packages, setup
 
 HERE = pathlib.Path(__file__).parent.absolute()
 PACKAGE = "qibotn"
@@ -8,8 +9,8 @@ PACKAGE = "qibotn"
 
 # Returns the qibotn version
 def version():
-    """Gets the version from the package's __init__ file
-    if there is some problem, let it happily fail"""
+    """Gets the version from the package's __init__ file if there is some
+    problem, let it happily fail."""
     version_file = HERE / "src" / PACKAGE / "__init__.py"
     version_regex = r"^__version__ = ['\"]([^'\"]*)['\"]"
 
diff --git a/src/qibotn/MPSUtils.py b/src/qibotn/MPSUtils.py
index 4f84f67..e8068f7 100644
--- a/src/qibotn/MPSUtils.py
+++ b/src/qibotn/MPSUtils.py
@@ -1,23 +1,19 @@
 import cupy as cp
-from cuquantum.cutensornet.experimental import contract_decompose
 from cuquantum import contract
+from cuquantum.cutensornet.experimental import contract_decompose
 
 # Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
 
 
 def initial(num_qubits, dtype):
-    """
-    Generate the MPS with an initial state of |00...00>
-    """
+    """Generate the MPS with an initial state of |00...00>"""
     state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
     mps_tensors = [state_tensor] * num_qubits
     return mps_tensors
 
 
 def mps_site_right_swap(mps_tensors, i, **kwargs):
-    """
-    Perform the swap operation between the ith and i+1th MPS tensors.
-    """
+    """Perform the swap operation between the ith and i+1th MPS tensors."""
     # contraction followed by QR decomposition
     a, _, b = contract_decompose(
         "ipj,jqk->iqj,jpk",
@@ -30,8 +26,7 @@ def mps_site_right_swap(mps_tensors, i, **kwargs):
 
 
 def apply_gate(mps_tensors, gate, qubits, **kwargs):
-    """
-    Apply the gate operand to the MPS tensors in-place.
+    """Apply the gate operand to the MPS tensors in-place.
 
     Args:
         mps_tensors: A list of rank-3 ndarray-like tensor objects.
diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index 5ac2fee..f67fb8e 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -5,9 +5,9 @@ import numpy as np
 
 
 class QiboCircuitToEinsum:
-    """Convert a circuit to a Tensor Network (TN) representation.
-    The circuit is first processed to an intermediate form by grouping each gate
-    matrix with its corresponding qubit it is acting on to a list. It is then
+    """Convert a circuit to a Tensor Network (TN) representation. The circuit
+    is first processed to an intermediate form by grouping each gate matrix
+    with its corresponding qubit it is acting on to a list. It is then
     converted to an equivalent TN expression through the class function
     state_vector_operands() following the Einstein summation convention in the
     interleave format.
@@ -79,9 +79,8 @@ class QiboCircuitToEinsum:
         return mode_labels, operands
 
     def op_shape_from_qubits(self, nqubits):
-        """Modify tensor to cuQuantum shape
-        (qubit_states,input_output) * qubits_involved
-        """
+        """Modify tensor to cuQuantum shape (qubit_states,input_output) *
+        qubits_involved."""
         return (2, 2) * nqubits
 
     def init_intermediate_circuit(self, circuit):
@@ -134,8 +133,7 @@ class QiboCircuitToEinsum:
         self.active_qubits_inverse = np.unique(gates_qubits_inverse)
 
     def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
-        """
-        Populate the gates for all pauli operators.
+        """Populate the gates for all pauli operators.
 
         Args:
             pauli_map: A dictionary mapping qubits to pauli operators.
@@ -161,7 +159,7 @@ class QiboCircuitToEinsum:
         return gates
 
     def expectation_operands(self, pauli_string):
-        input_bitstring = "0" * self.circuit.nqubits 
+        input_bitstring = "0" * self.circuit.nqubits
 
         input_operands = self._get_bitstring_tensors(input_bitstring)
         pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
diff --git a/src/qibotn/QiboCircuitToMPS.py b/src/qibotn/QiboCircuitToMPS.py
index 816b17c..b1d847f 100644
--- a/src/qibotn/QiboCircuitToMPS.py
+++ b/src/qibotn/QiboCircuitToMPS.py
@@ -1,9 +1,9 @@
 import cupy as cp
 import numpy as np
-
 from cuquantum import cutensornet as cutn
+
+from qibotn.MPSUtils import apply_gate, initial
 from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from qibotn.MPSUtils import initial, apply_gate
 
 
 class QiboCircuitToMPS:
diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index f927932..e5d68de 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1,2 +1,2 @@
-from qibotn.backends.gpu import CuTensorNet
 from qibotn.backends.cpu import QuTensorNet
+from qibotn.backends.gpu import CuTensorNet
diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 5777fe9..2c3f8d4 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -1,8 +1,7 @@
 import numpy as np
-
 from qibo.backends.numpy import NumpyBackend
-from qibo.states import CircuitResult
 from qibo.config import raise_error
+from qibo.states import CircuitResult
 
 
 class CuTensorNet(NumpyBackend):  # pragma: no cover
@@ -107,7 +106,6 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         Returns:
             xxx.
-
         """
 
         import qibotn.eval as eval
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index 96fd488..5fcb66f 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -1,20 +1,22 @@
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from cuquantum import contract
-from cupy.cuda.runtime import getDeviceCount
 import cupy as cp
+from cupy.cuda.runtime import getDeviceCount
+from cuquantum import contract
 
-from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper
+from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
+from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 
 
 def dense_vector_tn(qibo_circ, datatype):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to dense vector."""
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    dense vector."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
 
 def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string."""
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(
@@ -24,14 +26,19 @@ def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
 
 
 def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
     """
 
-    from mpi4py import MPI
     from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm = MPI.COMM_WORLD
@@ -86,14 +93,19 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
 
 
 def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through NCCL.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
     """
-    from mpi4py import MPI
-    from cuquantum import Network
     from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm_mpi = MPI.COMM_WORLD
@@ -159,15 +171,22 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
 
 
 def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through NCCL.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
     """
-    from mpi4py import MPI
-    from cuquantum import Network
     from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm_mpi = MPI.COMM_WORLD
@@ -235,14 +254,21 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
 
 
 def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
     """
-    from mpi4py import MPI  # this line initializes MPI
     from cuquantum import Network
+    from mpi4py import MPI  # this line initializes MPI
 
     root = 0
     comm = MPI.COMM_WORLD
@@ -299,7 +325,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
 
 
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
-    """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector."""
+    """Convert qibo circuit to matrix product state (MPS) format and perform
+    contraction to dense vector."""
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
 
@@ -309,7 +336,9 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
 
 
 def pauli_string_gen(nqubits, pauli_string_pattern):
-    """Used internally to generate the string based on given pattern and number of qubit.
+    """Used internally to generate the string based on given pattern and number
+    of qubit.
+
     Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
     """
     if nqubits <= 0:
diff --git a/src/qibotn/mps_contraction_helper.py b/src/qibotn/mps_contraction_helper.py
index 29d5e25..1c004de 100644
--- a/src/qibotn/mps_contraction_helper.py
+++ b/src/qibotn/mps_contraction_helper.py
@@ -1,11 +1,10 @@
-from cuquantum import contract, contract_path, CircuitToEinsum, tensor
+from cuquantum import contract, contract_path
 
 # Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
 
 
 class MPSContractionHelper:
-    """
-    A helper class to compute various quantities for a given MPS.
+    """A helper class to compute various quantities for a given MPS.
 
     Interleaved format is used to construct the input args for `cuquantum.contract`.
     A concrete example on how the modes are populated for a 7-site MPS is provided below:
@@ -43,8 +42,8 @@ class MPSContractionHelper:
         ]
 
     def contract_norm(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the norm of the MPS.
+        """Contract the corresponding tensor network to form the norm of the
+        MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -64,8 +63,8 @@ class MPSContractionHelper:
         return self._contract(interleaved_inputs, options=options).real
 
     def contract_state_vector(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the state vector representation of the MPS.
+        """Contract the corresponding tensor network to form the state vector
+        representation of the MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -86,8 +85,8 @@ class MPSContractionHelper:
     def contract_expectation(
         self, mps_tensors, operator, qubits, options=None, normalize=False
     ):
-        """
-        Contract the corresponding tensor network to form the expectation of the MPS.
+        """Contract the corresponding tensor network to form the expectation of
+        the MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py
index 57b3f34..c8f1e19 100644
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -1,8 +1,8 @@
 from timeit import default_timer as timer
 
 import config
-import numpy as np
 import cupy as cp
+import numpy as np
 import pytest
 import qibo
 from qibo.models import QFT

From 665cec42b2903609e4c6641e611889b6c0407536 Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Wed, 7 Feb 2024 18:58:01 +0100
Subject: [PATCH 66/85] chore: Revert additional deps in pre-commit hooks,
 apparently not needed

The problem could have been caused by a corrupted cache. However, it does not persist after a force clean
---
 .pre-commit-config.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b65aeed..9d8b368 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
     rev: v1.7.5
     hooks:
       - id: docformatter
-        additional_dependencies: [tomli, charset-normalizer]
+        additional_dependencies: [tomli]
         args: [--in-place, --config, ./pyproject.toml]
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.15.0
@@ -39,8 +39,6 @@ repos:
     rev: 1.16.0
     hooks:
       - id: blacken-docs
-        additional_dependencies:
-          - platformdirs
   - repo: https://github.com/pycqa/pydocstyle
     rev: 6.3.0
     hooks:

From c69fd5f04520f74c7dc84eb882a5d0d2c8e6db0e Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Thu, 8 Feb 2024 10:17:22 +0100
Subject: [PATCH 67/85] chore: Pre-commit all files once more

---
 README.md                   | 56 ++++++++++++++++++++++---------------
 src/qibotn/backends/cpu.py  |  5 +---
 src/qibotn/eval_qu.py       | 20 +++++++------
 tests/test_quimb_backend.py | 29 ++++++++++---------
 4 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 9f60ad3..d22503c 100644
--- a/README.md
+++ b/README.md
@@ -5,26 +5,33 @@ To get started, `python setup.py install` to install the tools and dependencies.
 # Supported Computation
 
 Tensor Network Types:
+
 - Tensornet (TN)
 - Matrix Product States (MPS)
 
 Tensor Network contractions to:
+
 - dense vectors
 - expecation values of given Pauli string
 
 The supported HPC configurations are:
+
 - single-node CPU
 - single-node GPU or GPUs
 - multi-node multi-GPU with Message Passing Interface (MPI)
 - multi-node multi-GPU with NVIDIA Collective Communications Library (NCCL)
 
 Currently, the supported tensor network libraries are:
- - [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
- - [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
+
+- [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
+- [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
 
 # Sample Codes
+
 ## Single-Node Example
+
 The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
+
 ```py
 import numpy as np
 from qibo import Circuit, gates
@@ -36,20 +43,22 @@ import qibo
 # This will trigger the dense vector computation of the tensornet.
 
 computation_settings = {
-    'MPI_enabled': False,
-    'MPS_enabled': {
-                "qr_method": False,
-                "svd_method": {
-                    "partition": "UV",
-                    "abs_cutoff": 1e-12,
-                },
-            } ,
-    'NCCL_enabled': False,
-    'expectation_enabled': False
+    "MPI_enabled": False,
+    "MPS_enabled": {
+        "qr_method": False,
+        "svd_method": {
+            "partition": "UV",
+            "abs_cutoff": 1e-12,
+        },
+    },
+    "NCCL_enabled": False,
+    "expectation_enabled": False,
 }
 
 
-qibo.set_backend(backend="qibotn", platform="cutensornet", runcard=computation_settings)  #cuQuantum
+qibo.set_backend(
+    backend="qibotn", platform="cutensornet", runcard=computation_settings
+)  # cuQuantum
 # qibo.set_backend(backend="qibotn", platform="qutensornet", runcard=computation_settings) #quimb
 
 
@@ -70,25 +79,26 @@ Other examples of setting the computation_settings
 ```py
 # Expectation computation with specific Pauli String pattern
 computation_settings = {
-   'MPI_enabled': False,
-   'MPS_enabled': False,
-   'NCCL_enabled': False,
-   'expectation_enabled': {
-       'pauli_string_pattern': "IXZ"
+    "MPI_enabled": False,
+    "MPS_enabled": False,
+    "NCCL_enabled": False,
+    "expectation_enabled": {
+        "pauli_string_pattern": "IXZ",
+    },
 }
 
 # Dense vector computation using multi node through MPI
 computation_settings = {
-    'MPI_enabled': True,
-    'MPS_enabled': False,
-    'NCCL_enabled': False,
-    'expectation_enabled': False
+    "MPI_enabled": True,
+    "MPS_enabled": False,
+    "NCCL_enabled": False,
+    "expectation_enabled": False,
 }
 ```
 
 ## Multi-Node Example
-Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 
+Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
 
 ```sh
 mpirun -n 4 -hostfile $node_list python test.py
diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index 7115b39..a85dfd9 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -1,8 +1,6 @@
-import numpy as np
-
 from qibo.backends.numpy import NumpyBackend
-from qibo.states import CircuitResult
 from qibo.config import raise_error
+from qibo.states import CircuitResult
 
 
 class QuTensorNet(NumpyBackend):
@@ -60,7 +58,6 @@ class QuTensorNet(NumpyBackend):
 
         Returns:
             xxx.
-
         """
 
         import qibotn.eval_qu as eval
diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 579a42a..7b603b5 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -3,9 +3,15 @@ import quimb.tensor as qtn
 from qibo.models import Circuit as QiboCircuit
 
 
-def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
-              cutoff=1e-6, cutoff_mode='abs'):
-    """Create a tensornetwork representation of the circuit"""
+def from_qibo(
+    circuit: QiboCircuit,
+    is_mps: False,
+    psi0=None,
+    method="svd",
+    cutoff=1e-6,
+    cutoff_mode="abs",
+):
+    """Create a tensornetwork representation of the circuit."""
 
     nqubits = circuit.nqubits
     gate_opt = {}
@@ -30,19 +36,17 @@ def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
 
 
 def init_state_tn(nqubits, init_state_sv):
-
-    """Create a matrixproductstate directly from a dense vector"""
+    """Create a matrixproductstate directly from a dense vector."""
 
     dims = tuple(2 * np.ones(nqubits, dtype=int))
 
     return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
 
 
-def dense_vector_tn_qu(qasm: str, initial_state, is_mps,  backend="numpy"):
-    """Evaluate QASM with Quimb
+def dense_vector_tn_qu(qasm: str, initial_state, is_mps, backend="numpy"):
+    """Evaluate QASM with Quimb.
 
     backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
-
     """
     circuit = QiboCircuit.from_qasm(qasm)
     if initial_state is not None:
diff --git a/tests/test_quimb_backend.py b/tests/test_quimb_backend.py
index 81a0e2b..15ba652 100644
--- a/tests/test_quimb_backend.py
+++ b/tests/test_quimb_backend.py
@@ -1,5 +1,6 @@
 import copy
 import os
+
 import config
 import numpy as np
 import pytest
@@ -8,8 +9,7 @@ from qibo.models import QFT
 
 
 def create_init_state(nqubits):
-    init_state = np.random.random(2**nqubits) + \
-        1j * np.random.random(2**nqubits)
+    init_state = np.random.random(2**nqubits) + 1j * np.random.random(2**nqubits)
     init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
     return init_state
 
@@ -20,10 +20,11 @@ def qibo_qft(nqubits, init_state, swaps):
     return circ_qibo, state_vec
 
 
-@pytest.mark.parametrize("nqubits, tolerance, is_mps",
-                         [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)])
+@pytest.mark.parametrize(
+    "nqubits, tolerance, is_mps",
+    [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)],
+)
 def test_eval(nqubits: int, tolerance: float, is_mps: bool):
-
     """Evaluate circuit with Quimb backend.
 
     Args:
@@ -41,20 +42,18 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     init_state_tn = copy.deepcopy(init_state)
 
     # Test qibo
-    qibo.set_backend(backend=config.qibo.backend,
-                     platform=config.qibo.platform)
-   
-    qibo_circ, result_sv= qibo_qft(nqubits, init_state, swaps=True)
-    
+    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
+
+    qibo_circ, result_sv = qibo_qft(nqubits, init_state, swaps=True)
 
     # Convert to qasm for other backends
     qasm_circ = qibo_circ.to_qasm()
 
     # Test quimb
     result_tn = qibotn.eval_qu.dense_vector_tn_qu(
-            qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
-        ).flatten()
-   
+        qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
+    ).flatten()
 
-    assert np.allclose(result_sv, result_tn,
-                       atol=tolerance), "Resulting dense vectors do not match"
+    assert np.allclose(
+        result_sv, result_tn, atol=tolerance
+    ), "Resulting dense vectors do not match"

From e6a28ce573b926758af191427d764c24ec1aac7e Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Thu, 8 Feb 2024 17:18:07 +0800
Subject: [PATCH 68/85] Minor black formatting

---
 src/qibotn/QiboCircuitConvertor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index 5ac2fee..e1aabea 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -161,7 +161,7 @@ class QiboCircuitToEinsum:
         return gates
 
     def expectation_operands(self, pauli_string):
-        input_bitstring = "0" * self.circuit.nqubits 
+        input_bitstring = "0" * self.circuit.nqubits
 
         input_operands = self._get_bitstring_tensors(input_bitstring)
         pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))

From 89e97b48e8595483ac962a575d7784dd218f9190 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Feb 2024 09:18:18 +0000
Subject: [PATCH 69/85] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md                                |  4 +-
 setup.py                                 |  9 ++-
 src/qibotn/MPSUtils.py                   | 13 +---
 src/qibotn/QiboCircuitConvertor.py       | 14 ++--
 src/qibotn/QiboCircuitToMPS.py           |  4 +-
 src/qibotn/backends/__init__.py          |  2 +-
 src/qibotn/backends/cpu.py               |  5 +-
 src/qibotn/backends/gpu.py               |  4 +-
 src/qibotn/eval.py                       | 93 ++++++++++++++++--------
 src/qibotn/eval_qu.py                    | 20 +++--
 src/qibotn/mps_contraction_helper.py     | 17 ++---
 tests/test_cuquantum_cutensor_backend.py |  2 +-
 tests/test_quimb_backend.py              | 29 ++++----
 13 files changed, 118 insertions(+), 98 deletions(-)

diff --git a/README.md b/README.md
index 070ccb3..9f60ad3 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ import qibo
 
 # Below shows how to set the computation_settings
 # Note that for MPS_enabled and expectation_enabled parameters the accepted inputs are boolean or a dictionary with the format shown below.
-# If computation_settings is not specified, the default setting is used in which all booleans will be False. 
+# If computation_settings is not specified, the default setting is used in which all booleans will be False.
 # This will trigger the dense vector computation of the tensornet.
 
 computation_settings = {
@@ -92,4 +92,4 @@ Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in
 
 ```sh
 mpirun -n 4 -hostfile $node_list python test.py
-```
\ No newline at end of file
+```
diff --git a/setup.py b/setup.py
index 13285eb..0f619a5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-from setuptools import setup, find_packages
-import re
 import pathlib
+import re
+
+from setuptools import find_packages, setup
 
 HERE = pathlib.Path(__file__).parent.absolute()
 PACKAGE = "qibotn"
@@ -8,8 +9,8 @@ PACKAGE = "qibotn"
 
 # Returns the qibotn version
 def version():
-    """Gets the version from the package's __init__ file
-    if there is some problem, let it happily fail"""
+    """Gets the version from the package's __init__ file if there is some
+    problem, let it happily fail."""
     version_file = HERE / "src" / PACKAGE / "__init__.py"
     version_regex = r"^__version__ = ['\"]([^'\"]*)['\"]"
 
diff --git a/src/qibotn/MPSUtils.py b/src/qibotn/MPSUtils.py
index 4f84f67..e8068f7 100644
--- a/src/qibotn/MPSUtils.py
+++ b/src/qibotn/MPSUtils.py
@@ -1,23 +1,19 @@
 import cupy as cp
-from cuquantum.cutensornet.experimental import contract_decompose
 from cuquantum import contract
+from cuquantum.cutensornet.experimental import contract_decompose
 
 # Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
 
 
 def initial(num_qubits, dtype):
-    """
-    Generate the MPS with an initial state of |00...00>
-    """
+    """Generate the MPS with an initial state of |00...00>"""
     state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
     mps_tensors = [state_tensor] * num_qubits
     return mps_tensors
 
 
 def mps_site_right_swap(mps_tensors, i, **kwargs):
-    """
-    Perform the swap operation between the ith and i+1th MPS tensors.
-    """
+    """Perform the swap operation between the ith and i+1th MPS tensors."""
     # contraction followed by QR decomposition
     a, _, b = contract_decompose(
         "ipj,jqk->iqj,jpk",
@@ -30,8 +26,7 @@ def mps_site_right_swap(mps_tensors, i, **kwargs):
 
 
 def apply_gate(mps_tensors, gate, qubits, **kwargs):
-    """
-    Apply the gate operand to the MPS tensors in-place.
+    """Apply the gate operand to the MPS tensors in-place.
 
     Args:
         mps_tensors: A list of rank-3 ndarray-like tensor objects.
diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/QiboCircuitConvertor.py
index e1aabea..f67fb8e 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -5,9 +5,9 @@ import numpy as np
 
 
 class QiboCircuitToEinsum:
-    """Convert a circuit to a Tensor Network (TN) representation.
-    The circuit is first processed to an intermediate form by grouping each gate
-    matrix with its corresponding qubit it is acting on to a list. It is then
+    """Convert a circuit to a Tensor Network (TN) representation. The circuit
+    is first processed to an intermediate form by grouping each gate matrix
+    with its corresponding qubit it is acting on to a list. It is then
     converted to an equivalent TN expression through the class function
     state_vector_operands() following the Einstein summation convention in the
     interleave format.
@@ -79,9 +79,8 @@ class QiboCircuitToEinsum:
         return mode_labels, operands
 
     def op_shape_from_qubits(self, nqubits):
-        """Modify tensor to cuQuantum shape
-        (qubit_states,input_output) * qubits_involved
-        """
+        """Modify tensor to cuQuantum shape (qubit_states,input_output) *
+        qubits_involved."""
         return (2, 2) * nqubits
 
     def init_intermediate_circuit(self, circuit):
@@ -134,8 +133,7 @@ class QiboCircuitToEinsum:
         self.active_qubits_inverse = np.unique(gates_qubits_inverse)
 
     def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
-        """
-        Populate the gates for all pauli operators.
+        """Populate the gates for all pauli operators.
 
         Args:
             pauli_map: A dictionary mapping qubits to pauli operators.
diff --git a/src/qibotn/QiboCircuitToMPS.py b/src/qibotn/QiboCircuitToMPS.py
index 816b17c..b1d847f 100644
--- a/src/qibotn/QiboCircuitToMPS.py
+++ b/src/qibotn/QiboCircuitToMPS.py
@@ -1,9 +1,9 @@
 import cupy as cp
 import numpy as np
-
 from cuquantum import cutensornet as cutn
+
+from qibotn.MPSUtils import apply_gate, initial
 from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from qibotn.MPSUtils import initial, apply_gate
 
 
 class QiboCircuitToMPS:
diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index f927932..e5d68de 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1,2 +1,2 @@
-from qibotn.backends.gpu import CuTensorNet
 from qibotn.backends.cpu import QuTensorNet
+from qibotn.backends.gpu import CuTensorNet
diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index 7115b39..a85dfd9 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -1,8 +1,6 @@
-import numpy as np
-
 from qibo.backends.numpy import NumpyBackend
-from qibo.states import CircuitResult
 from qibo.config import raise_error
+from qibo.states import CircuitResult
 
 
 class QuTensorNet(NumpyBackend):
@@ -60,7 +58,6 @@ class QuTensorNet(NumpyBackend):
 
         Returns:
             xxx.
-
         """
 
         import qibotn.eval_qu as eval
diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 5777fe9..2c3f8d4 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -1,8 +1,7 @@
 import numpy as np
-
 from qibo.backends.numpy import NumpyBackend
-from qibo.states import CircuitResult
 from qibo.config import raise_error
+from qibo.states import CircuitResult
 
 
 class CuTensorNet(NumpyBackend):  # pragma: no cover
@@ -107,7 +106,6 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         Returns:
             xxx.
-
         """
 
         import qibotn.eval as eval
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index 96fd488..5fcb66f 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -1,20 +1,22 @@
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from cuquantum import contract
-from cupy.cuda.runtime import getDeviceCount
 import cupy as cp
+from cupy.cuda.runtime import getDeviceCount
+from cuquantum import contract
 
-from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper
+from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
+from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 
 
 def dense_vector_tn(qibo_circ, datatype):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to dense vector."""
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    dense vector."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(*myconvertor.state_vector_operands())
 
 
 def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string."""
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string."""
     myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
     return contract(
         *myconvertor.expectation_operands(
@@ -24,14 +26,19 @@ def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
 
 
 def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
     """
 
-    from mpi4py import MPI
     from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm = MPI.COMM_WORLD
@@ -86,14 +93,19 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
 
 
 def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through NCCL.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
     """
-    from mpi4py import MPI
-    from cuquantum import Network
     from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm_mpi = MPI.COMM_WORLD
@@ -159,15 +171,22 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
 
 
 def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through NCCL.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
     """
-    from mpi4py import MPI
-    from cuquantum import Network
     from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
 
     root = 0
     comm_mpi = MPI.COMM_WORLD
@@ -235,14 +254,21 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
 
 
 def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pauli_string_pattern is used to generate the pauli string corresponding to the number of qubits of the system.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give an expectation value.
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
     """
-    from mpi4py import MPI  # this line initializes MPI
     from cuquantum import Network
+    from mpi4py import MPI  # this line initializes MPI
 
     root = 0
     comm = MPI.COMM_WORLD
@@ -299,7 +325,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
 
 
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
-    """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector."""
+    """Convert qibo circuit to matrix product state (MPS) format and perform
+    contraction to dense vector."""
     myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
     mps_helper = MPSContractionHelper(myconvertor.num_qubits)
 
@@ -309,7 +336,9 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
 
 
 def pauli_string_gen(nqubits, pauli_string_pattern):
-    """Used internally to generate the string based on given pattern and number of qubit.
+    """Used internally to generate the string based on given pattern and number
+    of qubit.
+
     Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
     """
     if nqubits <= 0:
diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 579a42a..7b603b5 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -3,9 +3,15 @@ import quimb.tensor as qtn
 from qibo.models import Circuit as QiboCircuit
 
 
-def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
-              cutoff=1e-6, cutoff_mode='abs'):
-    """Create a tensornetwork representation of the circuit"""
+def from_qibo(
+    circuit: QiboCircuit,
+    is_mps: False,
+    psi0=None,
+    method="svd",
+    cutoff=1e-6,
+    cutoff_mode="abs",
+):
+    """Create a tensornetwork representation of the circuit."""
 
     nqubits = circuit.nqubits
     gate_opt = {}
@@ -30,19 +36,17 @@ def from_qibo(circuit: QiboCircuit, is_mps: False, psi0=None, method='svd',
 
 
 def init_state_tn(nqubits, init_state_sv):
-
-    """Create a matrixproductstate directly from a dense vector"""
+    """Create a matrixproductstate directly from a dense vector."""
 
     dims = tuple(2 * np.ones(nqubits, dtype=int))
 
     return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
 
 
-def dense_vector_tn_qu(qasm: str, initial_state, is_mps,  backend="numpy"):
-    """Evaluate QASM with Quimb
+def dense_vector_tn_qu(qasm: str, initial_state, is_mps, backend="numpy"):
+    """Evaluate QASM with Quimb.
 
     backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
-
     """
     circuit = QiboCircuit.from_qasm(qasm)
     if initial_state is not None:
diff --git a/src/qibotn/mps_contraction_helper.py b/src/qibotn/mps_contraction_helper.py
index 29d5e25..1c004de 100644
--- a/src/qibotn/mps_contraction_helper.py
+++ b/src/qibotn/mps_contraction_helper.py
@@ -1,11 +1,10 @@
-from cuquantum import contract, contract_path, CircuitToEinsum, tensor
+from cuquantum import contract, contract_path
 
 # Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
 
 
 class MPSContractionHelper:
-    """
-    A helper class to compute various quantities for a given MPS.
+    """A helper class to compute various quantities for a given MPS.
 
     Interleaved format is used to construct the input args for `cuquantum.contract`.
     A concrete example on how the modes are populated for a 7-site MPS is provided below:
@@ -43,8 +42,8 @@ class MPSContractionHelper:
         ]
 
     def contract_norm(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the norm of the MPS.
+        """Contract the corresponding tensor network to form the norm of the
+        MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -64,8 +63,8 @@ class MPSContractionHelper:
         return self._contract(interleaved_inputs, options=options).real
 
     def contract_state_vector(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the state vector representation of the MPS.
+        """Contract the corresponding tensor network to form the state vector
+        representation of the MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -86,8 +85,8 @@ class MPSContractionHelper:
     def contract_expectation(
         self, mps_tensors, operator, qubits, options=None, normalize=False
     ):
-        """
-        Contract the corresponding tensor network to form the expectation of the MPS.
+        """Contract the corresponding tensor network to form the expectation of
+        the MPS.
 
         Args:
             mps_tensors: A list of rank-3 ndarray-like tensor objects.
diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py
index 57b3f34..c8f1e19 100644
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -1,8 +1,8 @@
 from timeit import default_timer as timer
 
 import config
-import numpy as np
 import cupy as cp
+import numpy as np
 import pytest
 import qibo
 from qibo.models import QFT
diff --git a/tests/test_quimb_backend.py b/tests/test_quimb_backend.py
index 81a0e2b..15ba652 100644
--- a/tests/test_quimb_backend.py
+++ b/tests/test_quimb_backend.py
@@ -1,5 +1,6 @@
 import copy
 import os
+
 import config
 import numpy as np
 import pytest
@@ -8,8 +9,7 @@ from qibo.models import QFT
 
 
 def create_init_state(nqubits):
-    init_state = np.random.random(2**nqubits) + \
-        1j * np.random.random(2**nqubits)
+    init_state = np.random.random(2**nqubits) + 1j * np.random.random(2**nqubits)
     init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
     return init_state
 
@@ -20,10 +20,11 @@ def qibo_qft(nqubits, init_state, swaps):
     return circ_qibo, state_vec
 
 
-@pytest.mark.parametrize("nqubits, tolerance, is_mps",
-                         [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)])
+@pytest.mark.parametrize(
+    "nqubits, tolerance, is_mps",
+    [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)],
+)
 def test_eval(nqubits: int, tolerance: float, is_mps: bool):
-
     """Evaluate circuit with Quimb backend.
 
     Args:
@@ -41,20 +42,18 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
     init_state_tn = copy.deepcopy(init_state)
 
     # Test qibo
-    qibo.set_backend(backend=config.qibo.backend,
-                     platform=config.qibo.platform)
-   
-    qibo_circ, result_sv= qibo_qft(nqubits, init_state, swaps=True)
-    
+    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
+
+    qibo_circ, result_sv = qibo_qft(nqubits, init_state, swaps=True)
 
     # Convert to qasm for other backends
     qasm_circ = qibo_circ.to_qasm()
 
     # Test quimb
     result_tn = qibotn.eval_qu.dense_vector_tn_qu(
-            qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
-        ).flatten()
-   
+        qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
+    ).flatten()
 
-    assert np.allclose(result_sv, result_tn,
-                       atol=tolerance), "Resulting dense vectors do not match"
+    assert np.allclose(
+        result_sv, result_tn, atol=tolerance
+    ), "Resulting dense vectors do not match"

From 890b9d1c93fc981efc2d2fee998b3f8ca2ef071f Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Thu, 8 Feb 2024 10:30:22 +0100
Subject: [PATCH 70/85] chore: Enforce naming conventions on module files

---
 ...{QiboCircuitConvertor.py => circuit_convertor.py} | 12 ++++++------
 .../{QiboCircuitToMPS.py => circuit_to_mps.py}       |  4 ++--
 src/qibotn/eval.py                                   |  4 ++--
 src/qibotn/{MPSUtils.py => mps_utils.py}             |  0
 4 files changed, 10 insertions(+), 10 deletions(-)
 rename src/qibotn/{QiboCircuitConvertor.py => circuit_convertor.py} (96%)
 rename src/qibotn/{QiboCircuitToMPS.py => circuit_to_mps.py} (89%)
 rename src/qibotn/{MPSUtils.py => mps_utils.py} (100%)

diff --git a/src/qibotn/QiboCircuitConvertor.py b/src/qibotn/circuit_convertor.py
similarity index 96%
rename from src/qibotn/QiboCircuitConvertor.py
rename to src/qibotn/circuit_convertor.py
index f67fb8e..14af79d 100644
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/circuit_convertor.py
@@ -5,12 +5,12 @@ import numpy as np
 
 
 class QiboCircuitToEinsum:
-    """Convert a circuit to a Tensor Network (TN) representation. The circuit
-    is first processed to an intermediate form by grouping each gate matrix
-    with its corresponding qubit it is acting on to a list. It is then
-    converted to an equivalent TN expression through the class function
-    state_vector_operands() following the Einstein summation convention in the
-    interleave format.
+    """Convert a circuit to a Tensor Network (TN) representation.
+
+    The circuit is first processed to an intermediate form by grouping each gate matrix
+    with its corresponding qubit it is acting on to a list. It is then converted to an
+    equivalent TN expression through the class function state_vector_operands()
+    following the Einstein summation convention in the interleave format.
 
     See document for detail of the format: https://docs.nvidia.com/cuda/cuquantum/python/api/generated/cuquantum.contract.html
 
diff --git a/src/qibotn/QiboCircuitToMPS.py b/src/qibotn/circuit_to_mps.py
similarity index 89%
rename from src/qibotn/QiboCircuitToMPS.py
rename to src/qibotn/circuit_to_mps.py
index b1d847f..af8acd5 100644
--- a/src/qibotn/QiboCircuitToMPS.py
+++ b/src/qibotn/circuit_to_mps.py
@@ -2,8 +2,8 @@ import cupy as cp
 import numpy as np
 from cuquantum import cutensornet as cutn
 
-from qibotn.MPSUtils import apply_gate, initial
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
+from qibotn.circuit_convertor import QiboCircuitToEinsum
+from qibotn.mps_utils import apply_gate, initial
 
 
 class QiboCircuitToMPS:
diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py
index 5fcb66f..6375aa6 100644
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -2,9 +2,9 @@ import cupy as cp
 from cupy.cuda.runtime import getDeviceCount
 from cuquantum import contract
 
+from qibotn.circuit_convertor import QiboCircuitToEinsum
+from qibotn.circuit_to_mps import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
 
 
 def dense_vector_tn(qibo_circ, datatype):
diff --git a/src/qibotn/MPSUtils.py b/src/qibotn/mps_utils.py
similarity index 100%
rename from src/qibotn/MPSUtils.py
rename to src/qibotn/mps_utils.py

From e58d176de47d1de11083927298f0189d6266e23c Mon Sep 17 00:00:00 2001
From: Vinitha-balachandran
 <127284874+Vinitha-balachandran@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:56:44 +0800
Subject: [PATCH 71/85] Update src/qibotn/backends/cpu.py

Co-authored-by: Alessandro Candido <candido.ale@gmail.com>
---
 src/qibotn/backends/cpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index a85dfd9..542599c 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -3,7 +3,7 @@ from qibo.config import raise_error
 from qibo.states import CircuitResult
 
 
-class QuTensorNet(NumpyBackend):
+class QuimbBackend(NumpyBackend):
 
     def __init__(self, runcard):
         super().__init__()

From a2e32c887389f17024947469705135d4d20594ea Mon Sep 17 00:00:00 2001
From: Vinitha-balachandran
 <127284874+Vinitha-balachandran@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:58:53 +0800
Subject: [PATCH 72/85] Update src/qibotn/backends/cpu.py

Co-authored-by: Alessandro Candido <candido.ale@gmail.com>
---
 src/qibotn/backends/cpu.py | 72 ++++----------------------------------
 1 file changed, 6 insertions(+), 66 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index 542599c..cb03cb8 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -62,80 +62,20 @@ class QuimbBackend(NumpyBackend):
 
         import qibotn.eval_qu as eval
 
-        if (
-            self.MPI_enabled == False
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == False
-            and self.expectation_enabled == False
-        ):
-
-            state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state, is_mps=False, backend="numpy"
-            )
-
-        elif (
-            self.MPI_enabled == False
-            and self.MPS_enabled == True
-            and self.NCCL_enabled == False
-            and self.expectation_enabled == False
-        ):
-
-            state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state, is_mps=True, backend="numpy"
-            )
-
-        elif (
-            self.MPI_enabled == True
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == False
-            and self.expectation_enabled == False
-        ):
-
+        if self.MPI_enabled == True:
             raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
-
-        elif (
-            self.MPI_enabled == False
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == True
-            and self.expectation_enabled == False
-        ):
-
+        if self.NCCL_enabled == True:
             raise_error(
                 NotImplementedError, "QiboTN quimb backend cannot support NCCL."
             )
-
-        elif (
-            self.MPI_enabled == False
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == False
-            and self.expectation_enabled == True
-        ):
-
+        if self.expectation_enabled == True:
             raise_error(
                 NotImplementedError, "QiboTN quimb backend cannot support expectation"
             )
 
-        elif (
-            self.MPI_enabled == True
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == False
-            and self.expectation_enabled == True
-        ):
-            raise_error(
-                NotImplementedError, "QiboTN quimb backend cannot support expectation"
-            )
-
-        elif (
-            self.MPI_enabled == False
-            and self.MPS_enabled == False
-            and self.NCCL_enabled == True
-            and self.expectation_enabled == True
-        ):
-            raise_error(
-                NotImplementedError, "QiboTN quimb backend cannot support expectation"
-            )
-        else:
-            raise_error(NotImplementedError, "Compute type not supported.")
+       state = eval.dense_vector_tn_qu(
+                circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
+       )
 
         if return_array:
             return state.flatten()

From 6f7df9d924b56821b72539757665bdc725f8991b Mon Sep 17 00:00:00 2001
From: Vinitha-balachandran
 <127284874+Vinitha-balachandran@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:59:56 +0800
Subject: [PATCH 73/85] Update src/qibotn/eval_qu.py

made changes

Co-authored-by: Alessandro Candido <candido.ale@gmail.com>
---
 src/qibotn/eval_qu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 7b603b5..024db88 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -11,7 +11,7 @@ def from_qibo(
     cutoff=1e-6,
     cutoff_mode="abs",
 ):
-    """Create a tensornetwork representation of the circuit."""
+    """Create a tensor network representation of the circuit."""
 
     nqubits = circuit.nqubits
     gate_opt = {}

From 9ea895bcfdffc4b6bf6c5d25ea8126184055ab83 Mon Sep 17 00:00:00 2001
From: Vinitha-balachandran
 <127284874+Vinitha-balachandran@users.noreply.github.com>
Date: Thu, 8 Feb 2024 18:00:23 +0800
Subject: [PATCH 74/85] Update src/qibotn/eval_qu.py

updated the changes

Co-authored-by: Alessandro Candido <candido.ale@gmail.com>
---
 src/qibotn/eval_qu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/eval_qu.py b/src/qibotn/eval_qu.py
index 024db88..74fca1c 100644
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -36,7 +36,7 @@ def from_qibo(
 
 
 def init_state_tn(nqubits, init_state_sv):
-    """Create a matrixproductstate directly from a dense vector."""
+    """Create a matrix product state directly from a dense vector."""
 
     dims = tuple(2 * np.ones(nqubits, dtype=int))
 

From 07b4a799be9a5b6daae124687ad171333b9b0ce8 Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Thu, 8 Feb 2024 18:13:13 +0800
Subject: [PATCH 75/85] Use CUDA_PATH as the if conditional for running
 workflow jobs

---
 .github/workflows/rules.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/rules.yml b/.github/workflows/rules.yml
index d1c0632..ed8b461 100644
--- a/.github/workflows/rules.yml
+++ b/.github/workflows/rules.yml
@@ -1,6 +1,9 @@
 # A single CI script with github workflow
 name: Tests
 
+env:
+  CUDA_PATH: echo $CUDA_PATH
+
 on:
   push:
   pull_request:
@@ -8,7 +11,7 @@ on:
 
 jobs:
   build:
-    if: contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' && {{ $CUDA_PATH != '' }}
+    if: contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' && github.env.CUDA_PATH != ''
     strategy:
       matrix:
         os: [ubuntu-latest]

From b232c94fbdc46a90a927c282412de320dc3ea46a Mon Sep 17 00:00:00 2001
From: Alessandro Candido <candido.ale@gmail.com>
Date: Thu, 8 Feb 2024 11:18:22 +0100
Subject: [PATCH 76/85] fix: Fix indentation wrong level from review suggestion

---
 src/qibotn/backends/cpu.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
index cb03cb8..ce4a3c7 100644
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -4,7 +4,6 @@ from qibo.states import CircuitResult
 
 
 class QuimbBackend(NumpyBackend):
-
     def __init__(self, runcard):
         super().__init__()
         import quimb  # pylint: disable=import-error
@@ -73,9 +72,9 @@ class QuimbBackend(NumpyBackend):
                 NotImplementedError, "QiboTN quimb backend cannot support expectation"
             )
 
-       state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
-       )
+        state = eval.dense_vector_tn_qu(
+            circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
+        )
 
         if return_array:
             return state.flatten()

From 7199dd6b0e45c30821eff13134a20d96eab5236d Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 18:25:28 +0800
Subject: [PATCH 77/85] renamed cpu to quimb and qutensornet to quimbbackend

---
 src/qibotn/backends/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index e5d68de..36c58fc 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1,2 +1,2 @@
-from qibotn.backends.cpu import QuTensorNet
+from qibotn.backends.quimb import QuimbBackend
 from qibotn.backends.gpu import CuTensorNet

From f86627451a5585cc1a8597a4479d170321231df1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Feb 2024 10:26:30 +0000
Subject: [PATCH 78/85] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/qibotn/backends/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/backends/__init__.py b/src/qibotn/backends/__init__.py
index 36c58fc..adab04e 100644
--- a/src/qibotn/backends/__init__.py
+++ b/src/qibotn/backends/__init__.py
@@ -1,2 +1,2 @@
-from qibotn.backends.quimb import QuimbBackend
 from qibotn.backends.gpu import CuTensorNet
+from qibotn.backends.quimb import QuimbBackend

From b3a9abdcbde73f9aa9f85247cd882ea5dce609ce Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 18:28:07 +0800
Subject: [PATCH 79/85] renaming cpu to quimb

---
 src/qibotn/backends/quimb.py | 84 ++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 src/qibotn/backends/quimb.py

diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py
new file mode 100644
index 0000000..f8380d0
--- /dev/null
+++ b/src/qibotn/backends/quimb.py
@@ -0,0 +1,84 @@
+from qibo.backends.numpy import NumpyBackend
+from qibo.config import raise_error
+from qibo.states import CircuitResult
+
+
+class QuimbBackend(NumpyBackend):
+
+    def __init__(self, runcard):
+        super().__init__()
+        import quimb  # pylint: disable=import-error
+
+        if runcard is not None:
+            self.MPI_enabled = runcard.get("MPI_enabled", False)
+            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
+            self.expectation_enabled = runcard.get("expectation_enabled", False)
+
+            mps_enabled_value = runcard.get("MPS_enabled")
+            if mps_enabled_value is True:
+                self.MPS_enabled = True
+            elif mps_enabled_value is False:
+                self.MPS_enabled = False
+            else:
+                raise TypeError("MPS_enabled has an unexpected type")
+
+        else:
+            self.MPI_enabled = False
+            self.MPS_enabled = False
+            self.NCCL_enabled = False
+            self.expectation_enabled = False
+
+        self.name = "qibotn"
+        self.quimb = quimb
+        self.platform = "QuimbBackend"
+        self.versions["quimb"] = self.quimb.__version__
+
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+        """
+
+        import qibotn.eval_qu as eval
+
+        if self.MPI_enabled == True:
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
+        if self.NCCL_enabled == True:
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support NCCL."
+            )
+        if self.expectation_enabled == True:
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support expectation"
+            )
+
+        state = eval.dense_vector_tn_qu(
+                circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
+        )
+
+        if return_array:
+            return state.flatten()
+        else:
+            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
+            return circuit._final_state

From fa0219e59f9b436307e9a1a81f502f9d8d1f7da2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Feb 2024 10:30:30 +0000
Subject: [PATCH 80/85] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/qibotn/backends/quimb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py
index f8380d0..54dae86 100644
--- a/src/qibotn/backends/quimb.py
+++ b/src/qibotn/backends/quimb.py
@@ -74,7 +74,7 @@ class QuimbBackend(NumpyBackend):
             )
 
         state = eval.dense_vector_tn_qu(
-                circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
+            circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
         )
 
         if return_array:

From 9fc2c74a88030e3b440bdcbb7b624adab194f293 Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Thu, 8 Feb 2024 18:31:32 +0800
Subject: [PATCH 81/85] removing cpu.py as quimb.py is added

---
 src/qibotn/backends/cpu.py | 83 --------------------------------------
 1 file changed, 83 deletions(-)
 delete mode 100644 src/qibotn/backends/cpu.py

diff --git a/src/qibotn/backends/cpu.py b/src/qibotn/backends/cpu.py
deleted file mode 100644
index ce4a3c7..0000000
--- a/src/qibotn/backends/cpu.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from qibo.backends.numpy import NumpyBackend
-from qibo.config import raise_error
-from qibo.states import CircuitResult
-
-
-class QuimbBackend(NumpyBackend):
-    def __init__(self, runcard):
-        super().__init__()
-        import quimb  # pylint: disable=import-error
-
-        if runcard is not None:
-            self.MPI_enabled = runcard.get("MPI_enabled", False)
-            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
-            self.expectation_enabled = runcard.get("expectation_enabled", False)
-
-            mps_enabled_value = runcard.get("MPS_enabled")
-            if mps_enabled_value is True:
-                self.MPS_enabled = True
-            elif mps_enabled_value is False:
-                self.MPS_enabled = False
-            else:
-                raise TypeError("MPS_enabled has an unexpected type")
-
-        else:
-            self.MPI_enabled = False
-            self.MPS_enabled = False
-            self.NCCL_enabled = False
-            self.expectation_enabled = False
-
-        self.name = "qibotn"
-        self.quimb = quimb
-        self.platform = "qutensornet"
-        self.versions["quimb"] = self.quimb.__version__
-
-    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
-        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
-
-    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
-        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
-
-    def assign_measurements(self, measurement_map, circuit_result):
-        raise_error(NotImplementedError, "Not implemented in QiboTN.")
-
-    def set_precision(self, precision):
-        if precision != self.precision:
-            super().set_precision(precision)
-
-    def execute_circuit(
-        self, circuit, initial_state=None, nshots=None, return_array=False
-    ):  # pragma: no cover
-        """Executes a quantum circuit.
-
-        Args:
-            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
-            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
-                If ``None`` the default ``|00...0>`` state is used.
-
-        Returns:
-            xxx.
-        """
-
-        import qibotn.eval_qu as eval
-
-        if self.MPI_enabled == True:
-            raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
-        if self.NCCL_enabled == True:
-            raise_error(
-                NotImplementedError, "QiboTN quimb backend cannot support NCCL."
-            )
-        if self.expectation_enabled == True:
-            raise_error(
-                NotImplementedError, "QiboTN quimb backend cannot support expectation"
-            )
-
-        state = eval.dense_vector_tn_qu(
-            circuit.to_qasm(), initial_state, is_mps=self.MPS_enabled, backend="numpy"
-        )
-
-        if return_array:
-            return state.flatten()
-        else:
-            circuit._final_state = CircuitResult(self, circuit, state.flatten(), nshots)
-            return circuit._final_state

From 38764f7833cebbab2a97882e86317f14f19e7f75 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 9 Feb 2024 10:29:30 +0800
Subject: [PATCH 82/85] Take out repeat codes

---
 src/qibotn/backends/gpu.py | 32 +++-----------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/gpu.py
index 2c3f8d4..073ce59 100644
--- a/src/qibotn/backends/gpu.py
+++ b/src/qibotn/backends/gpu.py
@@ -110,96 +110,70 @@ class CuTensorNet(NumpyBackend):  # pragma: no cover
 
         import qibotn.eval as eval
 
+        if initial_state is not None:
+            raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
         if (
             self.MPI_enabled == False
             and self.MPS_enabled == False
             and self.NCCL_enabled == False
             and self.expectation_enabled == False
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state = eval.dense_vector_tn(circuit, self.dtype)
-
         elif (
             self.MPI_enabled == False
             and self.MPS_enabled == True
             and self.NCCL_enabled == False
             and self.expectation_enabled == False
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state = eval.dense_vector_mps(circuit, self.gate_algo, self.dtype)
-
         elif (
             self.MPI_enabled == True
             and self.MPS_enabled == False
             and self.NCCL_enabled == False
             and self.expectation_enabled == False
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
-
         elif (
             self.MPI_enabled == False
             and self.MPS_enabled == False
             and self.NCCL_enabled == True
             and self.expectation_enabled == False
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
             if rank > 0:
                 state = np.array(0)
-
         elif (
             self.MPI_enabled == False
             and self.MPS_enabled == False
             and self.NCCL_enabled == False
             and self.expectation_enabled == True
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state = eval.expectation_pauli_tn(
                 circuit, self.dtype, self.pauli_string_pattern
             )
-
         elif (
             self.MPI_enabled == True
             and self.MPS_enabled == False
             and self.NCCL_enabled == False
             and self.expectation_enabled == True
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state, rank = eval.expectation_pauli_tn_MPI(
                 circuit, self.dtype, self.pauli_string_pattern, 32
             )
-
             if rank > 0:
                 state = np.array(0)
-
         elif (
             self.MPI_enabled == False
             and self.MPS_enabled == False
             and self.NCCL_enabled == True
             and self.expectation_enabled == True
         ):
-            if initial_state is not None:
-                raise_error(NotImplementedError, "QiboTN cannot support initial state.")
-
             state, rank = eval.expectation_pauli_tn_nccl(
                 circuit, self.dtype, self.pauli_string_pattern, 32
             )
-
             if rank > 0:
                 state = np.array(0)
         else:

From 90b2cd6919150b986ee718a7658d95aa926c13d5 Mon Sep 17 00:00:00 2001
From: tankya2 <tankya2@ihpc.a-star.edu.sg>
Date: Fri, 9 Feb 2024 10:33:00 +0800
Subject: [PATCH 83/85] Update filename

---
 src/qibotn/backends/{gpu.py => cutensornet.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/qibotn/backends/{gpu.py => cutensornet.py} (100%)

diff --git a/src/qibotn/backends/gpu.py b/src/qibotn/backends/cutensornet.py
similarity index 100%
rename from src/qibotn/backends/gpu.py
rename to src/qibotn/backends/cutensornet.py

From a73b9e9a9975609a85a71f71df5012e598b6d21e Mon Sep 17 00:00:00 2001
From: vinitha-balachandran <vinitha-balachandran@ihpc.a-star.edu.sg>
Date: Fri, 9 Feb 2024 10:41:12 +0800
Subject: [PATCH 84/85] updated set_backend for quimb

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d22503c..b15aab3 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ computation_settings = {
 qibo.set_backend(
     backend="qibotn", platform="cutensornet", runcard=computation_settings
 )  # cuQuantum
-# qibo.set_backend(backend="qibotn", platform="qutensornet", runcard=computation_settings) #quimb
+# qibo.set_backend(backend="qibotn", platform="QuimbBackend", runcard=computation_settings) #quimb
 
 
 # Construct the circuit

From 9b871794c987fa63c20f8f471d35266e365c2dc9 Mon Sep 17 00:00:00 2001
From: yangliwei <yangliwei.uestc@gmail.com>
Date: Wed, 14 Feb 2024 14:50:27 +0800
Subject: [PATCH 85/85] Minor fix to the use of env for CUDA_PATH in the
 workflow

---
 .github/workflows/rules.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rules.yml b/.github/workflows/rules.yml
index ed8b461..6415be0 100644
--- a/.github/workflows/rules.yml
+++ b/.github/workflows/rules.yml
@@ -2,7 +2,7 @@
 name: Tests
 
 env:
-  CUDA_PATH: echo $CUDA_PATH
+  CUDA_PATH:
 
 on:
   push:
@@ -11,7 +11,7 @@ on:
 
 jobs:
   build:
-    if: contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' && github.env.CUDA_PATH != ''
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' }} && env.CUDA_PATH != ''
     strategy:
       matrix:
         os: [ubuntu-latest]