From ef565eefc4fb97de65f297a0bf442c34f5f5d997 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Thu, 4 Jul 2024 13:41:10 +0800 Subject: [PATCH] Add configuration and free memory explicitly --- src/qibotn/eval.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py index 245aa5e..23624aa 100644 --- a/src/qibotn/eval.py +++ b/src/qibotn/eval.py @@ -325,25 +325,29 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() - - device_id = rank % getDeviceCount() - - # Perform circuit conversion - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - - operands = myconvertor.expectation_operands( - pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) - ) - + # Assign the device for each process. device_id = rank % getDeviceCount() + cp.cuda.Device(device_id).use() + # Perform circuit conversion + if rank==0: + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + + operands = myconvertor.expectation_operands( + pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) + ) + else: + operands = None + + operands = comm.bcast(operands, root) + # Create network object. network = Network(*operands, options={"device_id": device_id}) # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. path, info = network.contract_path( - optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}} + optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size),"memory_model":cutn.MemoryModel.CUTENSOR}} ) # Select the best path from all ranks. @@ -371,6 +375,9 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample # Sum the partial contribution from each process on root. result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) + + del network + mempool.free_all_blocks() return result, rank