Format with black

2024-01-24 11:47:32 +08:00
parent fe36a84e74
commit c2d2c8318f
3 changed files with 275 additions and 214 deletions
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -110,7 +110,6 @@ class QiboCircuitToEinsum:

        self.basis_map = {"0": state_0, "1": state_1}

-
    def init_inverse_circuit(self, circuit):
        self.gate_tensors_inverse = []
        gates_qubits_inverse = []
@@ -132,8 +131,7 @@ class QiboCircuitToEinsum:
        # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
        self.active_qubits_inverse = np.unique(gates_qubits_inverse)

-        
-    def get_pauli_gates(self, pauli_map, dtype='complex128', backend=cp):
+    def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
        """
        Populate the gates for all pauli operators.

@@ -151,15 +149,12 @@ class QiboCircuitToEinsum:
        pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
        pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)

-        operand_map = {'I': pauli_i,
-                    'X': pauli_x,
-                    'Y': pauli_y,
-                    'Z': pauli_z}
+        operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
        gates = []
        for qubit, pauli_char in pauli_map.items():
            operand = operand_map.get(pauli_char)
            if operand is None:
-                raise ValueError('pauli string character must be one of I/X/Y/Z')
+                raise ValueError("pauli string character must be one of I/X/Y/Z")
            gates.append((operand, (qubit,)))
        return gates

@@ -188,20 +183,27 @@ class QiboCircuitToEinsum:

        self.init_inverse_circuit(self.circuit.invert())

-        
        next_frontier = max(qubits_frontier.values()) + 1

        # input_mode_labels, input_operands, qubits_frontier, next_frontier, inverse_gates = self._get_forward_inverse_metadata(coned_qubits)

-        pauli_gates = self.get_pauli_gates(pauli_map, dtype=self.dtype, backend=self.backend)
-        
+        pauli_gates = self.get_pauli_gates(
+            pauli_map, dtype=self.dtype, backend=self.backend
+        )

        gates_inverse = pauli_gates + self.gate_tensors_inverse

-        gate_mode_labels_inverse, gate_operands_inverse = self._parse_gates_to_mode_labels_operands(
+        (
+            gate_mode_labels_inverse,
+            gate_operands_inverse,
+        ) = self._parse_gates_to_mode_labels_operands(
            gates_inverse, qubits_frontier, next_frontier
        )
-        mode_labels = mode_labels + gate_mode_labels_inverse + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
+        mode_labels = (
+            mode_labels
+            + gate_mode_labels_inverse
+            + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
+        )
        operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]

        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
@@ -209,7 +211,7 @@ class QiboCircuitToEinsum:
        # expec = contract(*operand_exp_interleave)
        # print(expec)

-        '''
+        """
        gate_mode_labels, gate_operands = circ_utils.parse_gates_to_mode_labels_operands(gates, 
                                                                                         qubits_frontier, 
                                                                                         next_frontier)
@@ -219,5 +221,5 @@ class QiboCircuitToEinsum:

        output_mode_labels = []
        expression = circ_utils.convert_mode_labels_to_expression(mode_labels, output_mode_labels)
-        '''
+        """
        return operand_exp_interleave
--- a/src/qibotn/backends.py
+++ b/src/qibotn/backends.py
@@ -19,8 +19,6 @@ class QiboTNBackend(NumpyBackend):
            or platform == "cu_tensornet_expectation"
            or platform == "cu_tensornet_nccl"
            or platform == "cu_tensornet_nccl_expectation"
-
-
        ):  # pragma: no cover
            self.platform = platform
        else:
@@ -72,7 +70,6 @@ class QiboTNBackend(NumpyBackend):
            state = cutn.eval_mps(circuit, gate_algo, self.dtype)

        if self.platform == "qu_tensornet":
-            
            # init_state = np.random.random(2**circuit.nqubits) + 1j * np.random.random(2**circuit.nqubits)
            # init_state = init_state / np.sqrt((np.abs(init_state) ** 2).sum())
            init_state = np.zeros(2**circuit.nqubits, dtype=self.dtype)
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -14,9 +14,13 @@ def eval(qibo_circ, datatype):
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    return contract(*myconvertor.state_vector_operands())

+
 def eval_expectation(qibo_circ, datatype):
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    return contract(*myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits)))
+    return contract(
+        *myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    )
+

 def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
    from mpi4py import MPI  # this line initializes MPI
@@ -34,7 +38,6 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
    device_id = rank % getDeviceCount()

-    
    # Perform circuit conversion
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    # mem_avail = cp.cuda.Device().mem_info[0]
@@ -54,10 +57,12 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)

    # Create network object.
-    network = Network(*operands, options={'device_id' : device_id})
+    network = Network(*operands, options={"device_id": device_id})

    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")

    # Select the best path from all ranks.
@@ -70,13 +75,17 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):
    info = comm.bcast(info, sender)

    # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )

    # Calculate this process's share of the slices.
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
    slices = range(slice_begin, slice_end)

    # print(f"Process {rank} is processing slice range: {slices}.")
@@ -91,6 +100,7 @@ def eval_tn_MPI_2(qibo_circ, datatype, n_samples=8):

    return result, rank

+
 def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
    from mpi4py import MPI  # this line initializes MPI
    import socket
@@ -126,7 +136,9 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
    network = Network(*operands)

    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )

    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")

@@ -140,13 +152,17 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):
    info = comm_mpi.bcast(info, sender)

    # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )

    # Calculate this process's share of the slices.
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
    slices = range(slice_begin, slice_end)

    # print(f"Process {rank} is processing slice range: {slices}.")
@@ -158,10 +174,19 @@ def eval_tn_nccl(qibo_circ, datatype, n_samples=8):

    # Sum the partial contribution from each process on root.
    stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )

    return result, rank

+
 def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
    from mpi4py import MPI  # this line initializes MPI
    import socket
@@ -198,7 +223,9 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
    network = Network(*operands)

    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )

    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")

@@ -212,13 +239,17 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):
    info = comm_mpi.bcast(info, sender)

    # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )

    # Calculate this process's share of the slices.
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
    slices = range(slice_begin, slice_end)

    # print(f"Process {rank} is processing slice range: {slices}.")
@@ -230,7 +261,15 @@ def eval_tn_nccl_expectation(qibo_circ, datatype, n_samples=8):

    # Sum the partial contribution from each process on root.
    stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(result.data.ptr, result.data.ptr, result.size, nccl.NCCL_FLOAT64, nccl.NCCL_SUM, root, stream_ptr)
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )

    return result, rank

@@ -251,7 +290,6 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
    # print("Mem avail: Start",mem_avail, "rank =",rank, "hostname =",hostname)
    device_id = rank % getDeviceCount()

-    
    # Perform circuit conversion
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    # mem_avail = cp.cuda.Device().mem_info[0]
@@ -271,10 +309,12 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
    # print("Mem free: ",free_mem, "Total mem: ",total_mem, "rank =",rank)

    # Create network object.
-    network = Network(*operands, options={'device_id' : device_id})
+    network = Network(*operands, options={"device_id": device_id})

    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(optimize={'samples': 8, 'slicing': {'min_slices': max(32, size)}})
+    path, info = network.contract_path(
+        optimize={"samples": 8, "slicing": {"min_slices": max(32, size)}}
+    )
    # print(f"Process {rank} has the path with the  FLOP count {info.opt_cost}.")

    # Select the best path from all ranks.
@@ -287,13 +327,17 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
    info = comm.bcast(info, sender)

    # Set path and slices.
-    path, info = network.contract_path(optimize={'path': info.path, 'slicing': info.slices})
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )

    # Calculate this process's share of the slices.
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
    slices = range(slice_begin, slice_end)

    # print(f"Process {rank} is processing slice range: {slices}.")
@@ -312,6 +356,7 @@ def eval_tn_MPI_2_expectation(qibo_circ, datatype, n_samples=8):
 def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
    from mpi4py import MPI  # this line initializes MPI
    import socket
+
    # Get the hostname
    # hostname = socket.gethostname()

@@ -334,7 +379,9 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
    # print("Mem avail: aft distributed reset config",mem_avail, "rank =",rank)
    # Perform circuit conversion
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    operands_interleave = myconvertor.expectation_operands(PauliStringGen(qibo_circ.nqubits))
+    operands_interleave = myconvertor.expectation_operands(
+        PauliStringGen(qibo_circ.nqubits)
+    )
    # mem_avail = cp.cuda.Device().mem_info[0]
    # print("Mem avail: aft convetor",mem_avail, "rank =",rank)
    # mem_avail = cp.cuda.Device().mem_info[0]
@@ -344,17 +391,24 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):
    network = cutn.Network(*operands_interleave, options=network_opts)
    # mem_avail = cp.cuda.Device().mem_info[0]
    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+    path, opt_info = network.contract_path(
+        optimize={
+            "samples": n_samples,
+            "threads": ncpu_threads,
+            "slicing": {"min_slices": max(16, size)},
+        }
+    )
    # mem_avail = cp.cuda.Device().mem_info[0]
    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
    # Execution: To execute the contraction using the optimal path found previously
    # print("opt_cost",opt_info.opt_cost, "Process =",rank)

-    
    num_slices = opt_info.num_slices  # Andy
    chunk, extra = num_slices // size, num_slices % size  # Andy
    slice_begin = rank * chunk + min(rank, extra)  # Andy
-    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )  # Andy
    slices = range(slice_begin, slice_end)  # Andy
    result = network.contract(slices=slices)
    # mem_avail = cp.cuda.Device().mem_info[0]
@@ -363,6 +417,7 @@ def eval_tn_MPI_expectation(qibo_circ, datatype, n_samples=8):

    return result, rank

+
 def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
@@ -372,6 +427,7 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):

    from mpi4py import MPI  # this line initializes MPI
    import socket
+
    # Get the hostname
    # hostname = socket.gethostname()

@@ -404,13 +460,19 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
    network = cutn.Network(*operands_interleave, options=network_opts)
    # mem_avail = cp.cuda.Device().mem_info[0]
    # print("Mem avail: aft cutn.Network(*operands_interleave,",mem_avail, "rank =",rank)
-    network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})
+    network.contract_path(
+        optimize={
+            "samples": n_samples,
+            "threads": ncpu_threads,
+            "slicing": {"min_slices": max(16, size)},
+        }
+    )
    # mem_avail = cp.cuda.Device().mem_info[0]
    # print("Mem avail: aft contract path",mem_avail, "rank =",rank)
    # Execution: To execute the contraction using the optimal path found previously
    # print("opt_cost",opt_info.opt_cost, "Process =",rank)

-    '''
+    """
    path, opt_info = network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads, 'slicing': {'min_slices': max(16, size)}})

    num_slices = opt_info.num_slices#Andy
@@ -419,7 +481,7 @@ def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
    slice_end = num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)#Andy
    slices = range(slice_begin, slice_end)#Andy
    result = network.contract(slices=slices)
-    '''
+    """
    result = network.contract()

    # mem_avail = cp.cuda.Device().mem_info[0]
@@ -437,15 +499,15 @@ def eval_mps(qibo_circ, gate_algo, datatype):
        myconvertor.mps_tensors, {"handle": myconvertor.handle}
    )

-def PauliStringGen(nqubits):

+def PauliStringGen(nqubits):
    if nqubits <= 0:
        return "Invalid input. N should be a positive integer."

    # characters = 'IXYZ'
-    characters = 'XXXZ'
+    characters = "XXXZ"

-    result = ''
+    result = ""

    for i in range(nqubits):
        char_to_add = characters[i % len(characters)]