From 9eb93cf0a23beb18235927a1cca5201bdb87f5d0 Mon Sep 17 00:00:00 2001 From: tankya2 Date: Wed, 12 Jul 2023 15:09:47 +0800 Subject: [PATCH] Initial commit --- src/qibotn/MpsHelper.py | 270 ++++++++++++++++++++++++++++++++++++++++ src/qibotn/cutn.py | 68 +++++++++- 2 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 src/qibotn/MpsHelper.py diff --git a/src/qibotn/MpsHelper.py b/src/qibotn/MpsHelper.py new file mode 100644 index 0000000..526cd3c --- /dev/null +++ b/src/qibotn/MpsHelper.py @@ -0,0 +1,270 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES +# +# SPDX-License-Identifier: BSD-3-Clause + +import itertools + +import cupy as cp +import numpy as np + +import cuquantum +from cuquantum import cutensornet as cutn + +class MPSHelper: + + """ + MPSHelper(num_sites, phys_extent, max_virtual_extent, initial_state, data_type, compute_type) + + Create an MPSHelper object for gate splitting algorithm. + i j + -------A-------B------- i j k + p| |q -------> -------A`-------B`------- + GGGGGGGGG r| |s + r| |s + + Args: + num_sites: The number of sites in the MPS. + phys_extents: The extent for the physical mode where the gate tensors are acted on. + max_virtual_extent: The maximal extent allowed for the virtual mode shared between adjacent MPS tensors. + initial_state: A sequence of :class:`cupy.ndarray` representing the initial state of the MPS. + data_type (cuquantum.cudaDataType): The data type for all tensors and gates. + compute_type (cuquantum.ComputeType): The compute type for all gate splitting. + + """ + + def __init__(self, num_sites, phys_extent, max_virtual_extent, initial_state, data_type, compute_type): + self.num_sites = num_sites + self.phys_extent = phys_extent + self.data_type = data_type + self.compute_type = compute_type + + self.phys_modes = [] + self.virtual_modes = [] + self.new_mode = itertools.count(start=0, step=1) + + for i in range(num_sites+1): + self.virtual_modes.append(next(self.new_mode)) + if i != num_sites: + self.phys_modes.append(next(self.new_mode)) + + untruncated_max_extent = phys_extent ** (num_sites // 2) + if max_virtual_extent == 0: + self.max_virtual_extent = untruncated_max_extent + else: + self.max_virtual_extent = min(max_virtual_extent, untruncated_max_extent) + + self.handle = cutn.create() + self.work_desc = cutn.create_workspace_descriptor(self.handle) + self.svd_config = cutn.create_tensor_svd_config(self.handle) + self.svd_info = cutn.create_tensor_svd_info(self.handle) + self.gate_algo = cutn.GateSplitAlgo.DIRECT + + self.desc_tensors = [] + self.state_tensors = [] + + # create tensor descriptors + for i in range(self.num_sites): + temp = initial_state[i] + self.state_tensors.append(initial_state[i].astype(temp.dtype, order="F")) + extent = self.get_tensor_extent(i) + modes = self.get_tensor_modes(i) + desc_tensor = cutn.create_tensor_descriptor(self.handle, 3, extent, 0, modes, self.data_type) + self.desc_tensors.append(desc_tensor) + + def get_tensor(self, site): + """Get the tensor operands for a specific site.""" + return self.state_tensors[site] + + def get_tensor_extent(self, site): + """Get the extent of the MPS tensor at a specific site.""" + return self.state_tensors[site].shape + + def get_tensor_modes(self, site): + """Get the current modes of the MPS tensor at a specific site.""" + return (self.virtual_modes[site], self.phys_modes[site], self.virtual_modes[site+1]) + + def set_svd_config(self, abs_cutoff, rel_cutoff, renorm, partition): + """Update the SVD truncation setting. + + Args: + abs_cutoff: The cutoff value for absolute singular value truncation. + rel_cutoff: The cutoff value for relative singular value truncation. + renorm (cuquantum.cutensornet.TensorSVDNormalization): The option for renormalization of the truncated singular values. + partition (cuquantum.cutensornet.TensorSVDPartition): The option for partitioning of the singular values. + """ + + if partition != cutn.TensorSVDPartition.UV_EQUAL: + raise NotImplementedError("this basic example expects partition to be cutensornet.TensorSVDPartition.UV_EQUAL") + + svd_config_attributes = [cutn.TensorSVDConfigAttribute.ABS_CUTOFF, + cutn.TensorSVDConfigAttribute.REL_CUTOFF, + cutn.TensorSVDConfigAttribute.S_NORMALIZATION, + cutn.TensorSVDConfigAttribute.S_PARTITION] + + for (attr, value) in zip(svd_config_attributes, [abs_cutoff, rel_cutoff, renorm, partition]): + dtype = cutn.tensor_svd_config_get_attribute_dtype(attr) + value = np.array([value], dtype=dtype) + cutn.tensor_svd_config_set_attribute(self.handle, + self.svd_config, attr, value.ctypes.data, value.dtype.itemsize) + + def set_gate_algorithm(self, gate_algo): + """Set the algorithm to use for all gate split operations. + + Args: + gate_algo (cuquantum.cutensornet.GateSplitAlgo): The gate splitting algorithm to use. + """ + + self.gate_algo = gate_algo + + def compute_max_workspace_sizes(self): + """Compute the maximal workspace needed for MPS gating algorithm.""" + modes_in_A = [ord(c) for c in ('i', 'p', 'j')] + modes_in_B = [ord(c) for c in ('j', 'q', 'k')] + modes_in_G = [ord(c) for c in ('p', 'q', 'r', 's')] + modes_out_A = [ord(c) for c in ('i', 'r', 'j')] + modes_out_B = [ord(c) for c in ('j', 's', 'k')] + + max_extents_AB = (self.max_virtual_extent, self.phys_extent, self.max_virtual_extent) + extents_in_G = (self.phys_extent, self.phys_extent, self.phys_extent, self.phys_extent) + + desc_tensor_in_A = cutn.create_tensor_descriptor(self.handle, 3, max_extents_AB, 0, modes_in_A, self.data_type) + desc_tensor_in_B = cutn.create_tensor_descriptor(self.handle, 3, max_extents_AB, 0, modes_in_B, self.data_type) + desc_tensor_in_G = cutn.create_tensor_descriptor(self.handle, 4, extents_in_G, 0, modes_in_G, self.data_type) + desc_tensor_out_A = cutn.create_tensor_descriptor(self.handle, 3, max_extents_AB, 0, modes_out_A, self.data_type) + desc_tensor_out_B = cutn.create_tensor_descriptor(self.handle, 3, max_extents_AB, 0, modes_out_B, self.data_type) + + cutn.workspace_compute_gate_split_sizes(self.handle, + desc_tensor_in_A, desc_tensor_in_B, desc_tensor_in_G, + desc_tensor_out_A, desc_tensor_out_B, + self.gate_algo, self.svd_config, self.compute_type, self.work_desc) + + workspace_size = cutn.workspace_get_memory_size(self.handle, self.work_desc, cutn.WorksizePref.MIN, cutn.Memspace.DEVICE, cutn.WorkspaceKind.SCRATCH) + + # free resources + cutn.destroy_tensor_descriptor(desc_tensor_in_A) + cutn.destroy_tensor_descriptor(desc_tensor_in_B) + cutn.destroy_tensor_descriptor(desc_tensor_in_G) + cutn.destroy_tensor_descriptor(desc_tensor_out_A) + cutn.destroy_tensor_descriptor(desc_tensor_out_B) + return workspace_size + + def set_workspace(self, work, workspace_size): + """Compute the maximal workspace needed for MPS gating algorithm. + + Args: + work: Pointer to the allocated workspace. + workspace_size: The required workspace size on the device. + """ + cutn.workspace_set_memory(self.handle, self.work_desc, cutn.Memspace.DEVICE, cutn.WorkspaceKind.SCRATCH, work.ptr, workspace_size) + + def apply_gate(self, site_A, site_B, gate, verbose, stream): + """Inplace execution of the apply gate algoritm on site A and site B. + + Args: + site_A: The first site on which the gate is applied to. + site_B: The second site on which the gate is applied to. + gate (cupy.ndarray): The input data for the gate tensor. + verbose: Whether to print out the runtime information during truncation. + stream (cupy.cuda.Stream): The CUDA stream on which the computation is performed. + """ + if site_B - site_A != 1: + raise ValueError("Site B must be the right site of site A") + if site_B >= self.num_sites: + raise ValueError("Site index cannot exceed maximum number of sites") + + desc_tensor_in_A = self.desc_tensors[site_A] + desc_tensor_in_B = self.desc_tensors[site_B] + + phys_mode_in_A = self.phys_modes[site_A] + phys_mode_in_B = self.phys_modes[site_B] + phys_mode_out_A = next(self.new_mode) + phys_mode_out_B = next(self.new_mode) + modes_G = (phys_mode_in_A, phys_mode_in_B, phys_mode_out_A, phys_mode_out_B) + extent_G = (self.phys_extent, self.phys_extent, self.phys_extent, self.phys_extent) + desc_tensor_in_G = cutn.create_tensor_descriptor(self.handle, 4, extent_G, 0, modes_G, self.data_type) + + # construct and initialize the expected output A and B + tensor_in_A = self.state_tensors[site_A] + tensor_in_B = self.state_tensors[site_B] + left_extent_A = tensor_in_A.shape[0] + extent_AB_in = tensor_in_A.shape[2] + right_extent_B = tensor_in_B.shape[2] + combined_extent_left = min(left_extent_A, extent_AB_in * self.phys_extent) * self.phys_extent + combined_extent_right = min(right_extent_B, extent_AB_in * self.phys_extent) * self.phys_extent + extent_Aout_B = min(combined_extent_left, combined_extent_right, self.max_virtual_extent) + + extent_out_A = (left_extent_A, self.phys_extent, extent_Aout_B) + extent_out_B = (extent_Aout_B, self.phys_extent, right_extent_B) + + tensor_out_A = cp.zeros(extent_out_A, dtype=tensor_in_A.dtype, order="F") + tensor_out_B = cp.zeros(extent_out_B, dtype=tensor_in_B.dtype, order="F") + + # create tensor descriptors for output A and B + modes_out_A = (self.virtual_modes[site_A], phys_mode_out_A, self.virtual_modes[site_A+1]) + modes_out_B = (self.virtual_modes[site_B], phys_mode_out_B, self.virtual_modes[site_B+1]) + + desc_tensor_out_A = cutn.create_tensor_descriptor(self.handle, 3, extent_out_A, 0, modes_out_A, self.data_type) + desc_tensor_out_B = cutn.create_tensor_descriptor(self.handle, 3, extent_out_B, 0, modes_out_B, self.data_type) + + cutn.gate_split(self.handle, + desc_tensor_in_A, tensor_in_A.data.ptr, + desc_tensor_in_B, tensor_in_B.data.ptr, + desc_tensor_in_G, gate.data.ptr, + desc_tensor_out_A, tensor_out_A.data.ptr, + 0, # we factorize singular values equally onto output A and B. + desc_tensor_out_B, tensor_out_B.data.ptr, + self.gate_algo, self.svd_config, self.compute_type, + self.svd_info, self.work_desc, stream.ptr) + + if verbose: + full_extent = np.array([0], dtype=cutn.tensor_svd_info_get_attribute_dtype(cutn.TensorSVDInfoAttribute.FULL_EXTENT)) + reduced_extent = np.array([0], dtype=cutn.tensor_svd_info_get_attribute_dtype(cutn.TensorSVDInfoAttribute.REDUCED_EXTENT)) + discarded_weight = np.array([0], dtype=cutn.tensor_svd_info_get_attribute_dtype(cutn.TensorSVDInfoAttribute.DISCARDED_WEIGHT)) + + cutn.tensor_svd_info_get_attribute( + self.handle, self.svd_info, cutn.TensorSVDInfoAttribute.FULL_EXTENT, + full_extent.ctypes.data, full_extent.dtype.itemsize) + cutn.tensor_svd_info_get_attribute( + self.handle, self.svd_info, cutn.TensorSVDInfoAttribute.REDUCED_EXTENT, + reduced_extent.ctypes.data, reduced_extent.dtype.itemsize) + cutn.tensor_svd_info_get_attribute( + self.handle, self.svd_info, cutn.TensorSVDInfoAttribute.DISCARDED_WEIGHT, + discarded_weight.ctypes.data, discarded_weight.dtype.itemsize) + + print("Virtual bond truncated from {0} to {1} with a discarded weight of {2:.6f}".format(full_extent[0], reduced_extent[0], discarded_weight[0])) + + self.phys_modes[site_A] = phys_mode_out_A + self.phys_modes[site_B] = phys_mode_out_B + self.desc_tensors[site_A] = desc_tensor_out_A + self.desc_tensors[site_B] = desc_tensor_out_B + + extent_out_A = np.zeros((3,), dtype=np.int64) + extent_out_B = np.zeros((3,), dtype=np.int64) + extent_out_A, strides_out_A = cutn.get_tensor_details(self.handle, desc_tensor_out_A)[2:] + extent_out_B, strides_out_B = cutn.get_tensor_details(self.handle, desc_tensor_out_B)[2:] + + # Recall that `cutensornet.gate_split` can potentially find reduced extent during SVD truncation when value-based truncation is used. + # Therefore we here update the container for output tensor A and B. + if extent_out_A[2] != extent_Aout_B: + # note strides in cutensornet are in the unit of count and strides in cupy/numpy are in the unit of nbytes + strides_out_A = [i * tensor_out_A.itemsize for i in strides_out_A] + strides_out_B = [i * tensor_out_B.itemsize for i in strides_out_B] + tensor_out_A = cp.ndarray(extent_out_A, dtype=tensor_out_A.dtype, memptr=tensor_out_A.data, strides=strides_out_A) + tensor_out_B = cp.ndarray(extent_out_B, dtype=tensor_out_B.dtype, memptr=tensor_out_B.data, strides=strides_out_B) + + self.state_tensors[site_A] = tensor_out_A + self.state_tensors[site_B] = tensor_out_B + + cutn.destroy_tensor_descriptor(desc_tensor_in_A) + cutn.destroy_tensor_descriptor(desc_tensor_in_B) + cutn.destroy_tensor_descriptor(desc_tensor_in_G) + + def __del__(self): + """Free all resources owned by the object.""" + for desc_tensor in self.desc_tensors: + cutn.destroy_tensor_descriptor(desc_tensor) + cutn.destroy(self.handle) + cutn.destroy_workspace_descriptor(self.work_desc) + cutn.destroy_tensor_svd_config(self.svd_config) + cutn.destroy_tensor_svd_info(self.svd_info) + diff --git a/src/qibotn/cutn.py b/src/qibotn/cutn.py index e6f3e8c..6e4a98b 100644 --- a/src/qibotn/cutn.py +++ b/src/qibotn/cutn.py @@ -1,8 +1,72 @@ -# from qibotn import quimb as qiboquimb from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum from cuquantum import contract - +from MpsHelper import MPSHelper +import cuquantum +from cuquantum import cutensornet as cutn +import cupy as cp +import numpy as np def eval(qibo_circ, datatype): myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) return contract(*myconvertor.state_vector_operands()) + +def eval_mps(qibo_circ, datatype): + #Create MPS + cutensornet.create() + return contract() + + +if __name__ == "__main__": + print("cuTensorNet-vers:", cutn.get_version()) + dev = cp.cuda.Device() # get current device + props = cp.cuda.runtime.getDeviceProperties(dev.id) + print("===== device info ======") + print("GPU-name:", props["name"].decode()) + print("GPU-clock:", props["clockRate"]) + print("GPU-memoryClock:", props["memoryClockRate"]) + print("GPU-nSM:", props["multiProcessorCount"]) + print("GPU-major:", props["major"]) + print("GPU-minor:", props["minor"]) + print("========================") + + data_type = cuquantum.cudaDataType.CUDA_C_64F + compute_type = cuquantum.ComputeType.COMPUTE_64F + num_sites = 16 + phys_extent = 2 + max_virtual_extent = 12 + + ## we initialize the MPS state as a product state |000...000> + initial_state = [] + for i in range(num_sites): + # we create dummpy indices for MPS tensors on the boundary for easier bookkeeping + # we'll use Fortran layout throughout this example + tensor = cp.zeros((1,2,1), dtype=np.complex128, order="F") + tensor[0,0,0] = 1.0 + initial_state.append(tensor) + + mps_helper = MPSHelper(num_sites, phys_extent, max_virtual_extent, initial_state, data_type, compute_type) + + ################################## + # Setup options for gate operation + ################################## + + abs_cutoff = 1e-2 + rel_cutoff = 1e-2 + renorm = cutn.TensorSVDNormalization.L2 + partition = cutn.TensorSVDPartition.UV_EQUAL + mps_helper.set_svd_config(abs_cutoff, rel_cutoff, renorm, partition) + + gate_algo = cutn.GateSplitAlgo.REDUCED + mps_helper.set_gate_algorithm(gate_algo) + + ##################################### + # Workspace estimation and allocation + ##################################### + + free_mem, total_mem = dev.mem_info + worksize = free_mem *.7 + required_workspace_size = mps_helper.compute_max_workspace_sizes() + work = cp.cuda.alloc(worksize) + print(f"Maximal workspace size requried: {required_workspace_size / 1024 ** 3:.3f} GB") + mps_helper.set_workspace(work, required_workspace_size) +