Merge pull request #24 from qiboteam/qibotn_integration

Qibotn integration to Qibo
2024-03-01 18:04:47 +08:00
parent 8e69f4610b 891102f638
commit 371ec8de01
25 changed files with 3259 additions and 353 deletions
--- a/.envrc
+++ b/.envrc
@@ -0,0 +1,9 @@
+if ! has nix_direnv_version || ! nix_direnv_version 2.2.1; then
+  source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/2.2.1/direnvrc" "sha256-zelF0vLbEl5uaqrfIzbgNzJWGmLzCmYAkInj/LNxvKs="
+fi
+
+nix_direnv_watch_file flake.nix
+nix_direnv_watch_file flake.lock
+if ! use flake . --impure; then
+  echo "devenv could not be built. The devenv environment was not loaded. Make the necessary changes to devenv.nix and hit enter to try again." >&2
+fi
--- a/.github/workflows/rules.yml
+++ b/.github/workflows/rules.yml
@@ -1,22 +1,38 @@
 # A single CI script with github workflow
 name: Tests

+env:
+  CUDA_PATH:
+
 on:
+  workflow_dispatch:
  push:
  pull_request:
    types: [labeled]

 jobs:
+  check:
+    # job to check cuda availability
+    runs-on: ubuntu-latest
+    steps:
+      - id: step1
+        run: echo "test=${{ env.CUDA_PATH != ''}}" >> "$GITHUB_OUTPUT"
+      - id: step2
+        run: echo "test=${{ contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' }}" >> "$GITHUB_OUTPUT"
+    outputs:
+      cuda_avail: ${{ fromJSON(steps.step1.outputs.test) && fromJSON(steps.step2.outputs.test) }}
+
  build:
-    if: contains(github.event.pull_request.labels.*.name, 'run-workflow') || github.event_name == 'push' && {{ $CUDA_PATH != '' }}
+    # job to build
+    needs: check
+    if: ${{fromJSON(needs.check.outputs.cuda_avail)}}
    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: [3.8, 3.9, "3.10"]
-    uses: qiboteam/workflows/.github/workflows/rules.yml@main
+          matrix:
+            os: [ubuntu-latest]
+            python-version: [3.8, 3.9, "3.10", "3.11"]
+    uses: qiboteam/workflows/.github/workflows/rules-poetry.yml@main
    with:
      os: ${{ matrix.os }}
      python-version: ${{ matrix.python-version }}
-      environment: "qibotn"
-      pip-extras: "analysis,tests"
+      poetry-extras: "--with analysis,tests"
    secrets: inherit
--- a/.gitignore
+++ b/.gitignore
@@ -159,3 +159,4 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+.devenv
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
+ci:
+  autofix_prs: true
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.5.0
@@ -8,10 +8,9 @@ repos:
      - id: end-of-file-fixer
      - id: check-yaml
      - id: check-toml
-      - id: check-merge-conflict
      - id: debug-statements
  - repo: https://github.com/psf/black
-    rev: 24.2.0
+    rev: 24.1.1
    hooks:
      - id: black
  - repo: https://github.com/pycqa/isort
@@ -19,7 +18,31 @@ repos:
    hooks:
      - id: isort
        args: ["--profile", "black"]
+  - repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        additional_dependencies: [tomli]
+        args: [--in-place, --config, ./pyproject.toml]
  - repo: https://github.com/asottile/pyupgrade
    rev: v3.15.1
    hooks:
      - id: pyupgrade
+  - repo: https://github.com/hadialqattan/pycln
+    rev: v2.4.0
+    hooks:
+      - id: pycln
+        args:
+          - --config=pyproject.toml
+          - --all
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: 1.16.0
+    hooks:
+      - id: blacken-docs
+  - repo: https://github.com/pycqa/pydocstyle
+    rev: 6.3.0
+    hooks:
+      - id: pydocstyle
+        args:
+          - --select=D103,D200,D206,D300,D301
+        files: ^src/
--- a/README.md
+++ b/README.md
@@ -1,3 +1,118 @@
-Qibotn is the tensor-network translation module for Qibo to support large-scale simulation of quantum circuits and acceleration.
+# Qibotn

-To get started, `python setup.py install` to install the tools and dependencies.
+The tensor network translation module for Qibo to support large-scale simulation of quantum circuits and acceleration.
+
+## Supported Computation
+
+Tensor Network Types:
+
+- Tensornet (TN)
+- Matrix Product States (MPS)
+
+Tensor Network contractions to:
+
+- dense vectors
+- expecation values of given Pauli string
+
+The supported HPC configurations are:
+
+- single-node CPU
+- single-node GPU or GPUs
+- multi-node multi-GPU with Message Passing Interface (MPI)
+- multi-node multi-GPU with NVIDIA Collective Communications Library (NCCL)
+
+Currently, the supported tensor network libraries are:
+
+- [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
+- [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
+
+## Installation
+
+To get started:
+
+```sh
+python setup.py install
+```
+
+to install the tools and dependencies. A few extras are provided, check `setup.py` in
+case you need them.
+
+<!-- TODO: describe extras, after Poetry adoption and its groups -->
+
+## Sample Codes
+
+### Single-Node Example
+
+The code below shows an example of how to activate the Cuquantum TensorNetwork backend of Qibo.
+
+```py
+import numpy as np
+from qibo import Circuit, gates
+import qibo
+
+# Below shows how to set the computation_settings
+# Note that for MPS_enabled and expectation_enabled parameters the accepted inputs are boolean or a dictionary with the format shown below.
+# If computation_settings is not specified, the default setting is used in which all booleans will be False.
+# This will trigger the dense vector computation of the tensornet.
+
+computation_settings = {
+    "MPI_enabled": False,
+    "MPS_enabled": {
+        "qr_method": False,
+        "svd_method": {
+            "partition": "UV",
+            "abs_cutoff": 1e-12,
+        },
+    },
+    "NCCL_enabled": False,
+    "expectation_enabled": False,
+}
+
+
+qibo.set_backend(
+    backend="qibotn", platform="cutensornet", runcard=computation_settings
+)  # cuQuantum
+# qibo.set_backend(backend="qibotn", platform="QuimbBackend", runcard=computation_settings) #quimb
+
+
+# Construct the circuit
+c = Circuit(2)
+# Add some gates
+c.add(gates.H(0))
+c.add(gates.H(1))
+
+# Execute the circuit and obtain the final state
+result = c()
+
+print(result.state())
+```
+
+Other examples of setting the computation_settings
+
+```py
+# Expectation computation with specific Pauli String pattern
+computation_settings = {
+    "MPI_enabled": False,
+    "MPS_enabled": False,
+    "NCCL_enabled": False,
+    "expectation_enabled": {
+        "pauli_string_pattern": "IXZ",
+    },
+}
+
+# Dense vector computation using multi node through MPI
+computation_settings = {
+    "MPI_enabled": True,
+    "MPS_enabled": False,
+    "NCCL_enabled": False,
+    "expectation_enabled": False,
+}
+```
+
+### Multi-Node Example
+
+Multi-node is enabled by setting either the MPI or NCCL enabled flag to True in the computation settings. Below shows the script to launch on 2 nodes with 2 GPUs each. $node_list contains the IP of the nodes assigned.
+
+```sh
+mpirun -n 4 -hostfile $node_list python test.py
+```
--- a/flake.lock
+++ b/flake.lock
@@ -0,0 +1,323 @@
+{
+  "nodes": {
+    "devenv": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "nix": "nix",
+        "nixpkgs": "nixpkgs",
+        "pre-commit-hooks": "pre-commit-hooks"
+      },
+      "locked": {
+        "lastModified": 1707004164,
+        "narHash": "sha256-9Hr8onWtvLk5A8vCEkaE9kxA0D7PR62povFokM1oL5Q=",
+        "owner": "cachix",
+        "repo": "devenv",
+        "rev": "0e68853bb27981a4ffd7a7225b59ed84f7180fc7",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "devenv",
+        "type": "github"
+      }
+    },
+    "flake-compat": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1673956053,
+        "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-compat_2": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1696426674,
+        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1685518550,
+        "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "flake-utils_2": {
+      "inputs": {
+        "systems": "systems_2"
+      },
+      "locked": {
+        "lastModified": 1701680307,
+        "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
+        "type": "github"
+      },
+      "original": {
+        "id": "flake-utils",
+        "type": "indirect"
+      }
+    },
+    "gitignore": {
+      "inputs": {
+        "nixpkgs": [
+          "devenv",
+          "pre-commit-hooks",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1660459072,
+        "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=",
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "type": "github"
+      }
+    },
+    "lowdown-src": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1633514407,
+        "narHash": "sha256-Dw32tiMjdK9t3ETl5fzGrutQTzh2rufgZV4A/BbxuD4=",
+        "owner": "kristapsdz",
+        "repo": "lowdown",
+        "rev": "d2c2b44ff6c27b936ec27358a2653caaef8f73b8",
+        "type": "github"
+      },
+      "original": {
+        "owner": "kristapsdz",
+        "repo": "lowdown",
+        "type": "github"
+      }
+    },
+    "nix": {
+      "inputs": {
+        "lowdown-src": "lowdown-src",
+        "nixpkgs": [
+          "devenv",
+          "nixpkgs"
+        ],
+        "nixpkgs-regression": "nixpkgs-regression"
+      },
+      "locked": {
+        "lastModified": 1676545802,
+        "narHash": "sha256-EK4rZ+Hd5hsvXnzSzk2ikhStJnD63odF7SzsQ8CuSPU=",
+        "owner": "domenkozar",
+        "repo": "nix",
+        "rev": "7c91803598ffbcfe4a55c44ac6d49b2cf07a527f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "domenkozar",
+        "ref": "relaxed-flakes",
+        "repo": "nix",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1678875422,
+        "narHash": "sha256-T3o6NcQPwXjxJMn2shz86Chch4ljXgZn746c2caGxd8=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "126f49a01de5b7e35a43fd43f891ecf6d3a51459",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs-python": {
+      "inputs": {
+        "flake-compat": "flake-compat_2",
+        "flake-utils": "flake-utils_2",
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1707114737,
+        "narHash": "sha256-ZXqv2epXAjDjfWbYn+yy4VOmW+C7SuUBoiZkkDoSqA4=",
+        "owner": "cachix",
+        "repo": "nixpkgs-python",
+        "rev": "f34ed02276bc08fe1c91c1bf0ef3589d68028878",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "nixpkgs-python",
+        "type": "github"
+      }
+    },
+    "nixpkgs-regression": {
+      "locked": {
+        "lastModified": 1643052045,
+        "narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
+        "type": "github"
+      }
+    },
+    "nixpkgs-stable": {
+      "locked": {
+        "lastModified": 1685801374,
+        "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "c37ca420157f4abc31e26f436c1145f8951ff373",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-23.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs_2": {
+      "locked": {
+        "lastModified": 1707092692,
+        "narHash": "sha256-ZbHsm+mGk/izkWtT4xwwqz38fdlwu7nUUKXTOmm4SyE=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "faf912b086576fd1a15fca610166c98d47bc667e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "pre-commit-hooks": {
+      "inputs": {
+        "flake-compat": [
+          "devenv",
+          "flake-compat"
+        ],
+        "flake-utils": "flake-utils",
+        "gitignore": "gitignore",
+        "nixpkgs": [
+          "devenv",
+          "nixpkgs"
+        ],
+        "nixpkgs-stable": "nixpkgs-stable"
+      },
+      "locked": {
+        "lastModified": 1704725188,
+        "narHash": "sha256-qq8NbkhRZF1vVYQFt1s8Mbgo8knj+83+QlL5LBnYGpI=",
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "rev": "ea96f0c05924341c551a797aaba8126334c505d2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "devenv": "devenv",
+        "nixpkgs": "nixpkgs_2",
+        "nixpkgs-python": "nixpkgs-python",
+        "systems": "systems_3"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_2": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_3": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/flake.nix
+++ b/flake.nix
@@ -0,0 +1,61 @@
+{
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    systems.url = "github:nix-systems/default";
+    devenv.url = "github:cachix/devenv";
+    nixpkgs-python = {
+      url = "github:cachix/nixpkgs-python";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+  };
+
+  outputs = {
+    self,
+    nixpkgs,
+    devenv,
+    systems,
+    ...
+  } @ inputs: let
+    forEachSystem = nixpkgs.lib.genAttrs (import systems);
+  in {
+    # packages = forEachSystem (system: {
+    #   default =
+    #     nixpkgs.legacyPackages.${system}.poetry2nix.mkPoetryApplication
+    #     {
+    #       projectDir = self;
+    #       preferWheels = true;
+    #     };
+    # });
+
+    devShells =
+      forEachSystem
+      (system: let
+        pkgs = nixpkgs.legacyPackages.${system};
+      in {
+        default = devenv.lib.mkShell {
+          inherit inputs pkgs;
+
+          modules = [
+            {
+              packages = with pkgs; [pre-commit poethepoet stdenv.cc.cc.lib];
+
+              languages.python = {
+                enable = true;
+                poetry = {
+                  enable = true;
+                  install.enable = true;
+                  install.groups = ["dev" "tests"];
+                };
+                version = "3.11";
+              };
+            }
+          ];
+        };
+      });
+  };
+
+  nixConfig = {
+    extra-trusted-public-keys = "devenv.cachix.org-1:w1cLUi8dv3hnoSPGAuibQv+f9TZLr6cv/Hm9XgU50cw=";
+    extra-substituters = "https://devenv.cachix.org";
+  };
+}
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,59 @@
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "qibotn"
+version = "0.0.1"
+description = "A tensor-network translation module for Qibo"
+authors = ["The Qibo team"]
+license = "Apache License 2.0"
+readme = "README.md"
+homepage = "https://qibo.science/"
+repository = "https://github.com/qiboteam/qibotn/"
+documentation = "https://qibo.science/docs/qibotn/stable"
+keywords = []
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "Topic :: Scientific/Engineering :: Physics",
+]
+packages = [{ include = "qibotn", from = "src" }]
+
+[tool.poetry.dependencies]
+python = "^3.9,<3.12"
+qibo = "^0.2.4"
+quimb = { version = "^1.6.0", extras = ["tensor"] }
+cupy = { version = "^11.6.0", optional = true }
+cuquantum-python-cu11 = { version = "^23.3.0", optional = true }
+
+[tool.poetry.extras]
+cuda = ["cupy", "cuquantum-python-cu11"]
+
+[tool.poetry.group.dev.dependencies]
+ipython = "^7.0.0"
+
+[tool.poetry.group.tests]
+optional = true
+
+[tool.poetry.group.tests.dependencies]
+pytest = "^8.0.0"
+pytest-cov = "^4.1.0"
+pytest-env = "^1.1.3"
+
+[tool.poetry.group.analysis]
+optional = true
+
+[tool.poetry.group.analysis.dependencies]
+pylint = "^3.0.3"
+
+[tool.poe.tasks]
+test = "pytest"
+lint = "pylint src --errors-only"
+lint-warnings = "pylint src --exit-zero"
+
+[tool.pylint.main]
+ignored-modules = ["cupy", "cuquantum", "mpi4py"]
+
 [tool.pylint.reports]
 output-format = "colorized"

--- a/setup.py
+++ b/setup.py
@@ -1,65 +0,0 @@
-import pathlib
-import re
-
-from setuptools import find_packages, setup
-
-HERE = pathlib.Path(__file__).parent.absolute()
-PACKAGE = "qibotn"
-
-
-# Returns the qibotn version
-def version():
-    """Gets the version from the package's __init__ file
-    if there is some problem, let it happily fail"""
-    version_file = HERE / "src" / PACKAGE / "__init__.py"
-    version_regex = r"^__version__ = ['\"]([^'\"]*)['\"]"
-
-    initfile = version_file.read_text(encoding="utf-8")
-    matched = re.search(version_regex, initfile, re.M)
-
-    if matched is not None:
-        return matched.group(1)
-    return "0.0.0"
-
-
-# load long description from README
-setup(
-    name="qibotn",
-    version=version(),
-    description="A tensor-network translation module for quantum computing",
-    author="The Qibo team",
-    author_email="",
-    url="https://github.com/qiboteam/qibotn",
-    packages=find_packages("src"),
-    package_dir={"": "src"},
-    package_data={"": ["*.out", "*.yml"]},
-    include_package_data=True,
-    zip_safe=False,
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "Topic :: Scientific/Engineering :: Physics",
-    ],
-    install_requires=[
-        "qibo>=0.1.10",
-        "qibojit>=0.0.7",
-        "quimb[tensor]>=1.6.0",
-    ],
-    extras_require={
-        "docs": [],
-        "tests": [
-            "pytest>=7.2.0",
-            "pytest-cov>=4.0.0",
-            "pytest-env>=0.8.1",
-        ],
-        "analysis": [
-            "pylint>=2.16.0",
-        ],
-        "cuda": [
-            "cupy>=11.6.0",
-            "cuquantum-python-cu11>=23.3.0",
-        ],
-    },
-    python_requires=">=3.8.0",
-    long_description=(HERE / "README.md").read_text(encoding="utf-8"),
-    long_description_content_type="text/markdown",
-)
--- a/src/qibotn/QiboCircuitConvertor.py
+++ b/src/qibotn/QiboCircuitConvertor.py
@@ -1,110 +0,0 @@
-import cupy as cp
-import numpy as np
-
-
-class QiboCircuitToEinsum:
-    """Convert a circuit to a Tensor Network (TN) representation.
-    The circuit is first processed to an intermediate form by grouping each gate
-    matrix with its corresponding qubit it is acting on to a list. It is then
-    converted to an equivalent TN expression through the class function
-    state_vector_operands() following the Einstein summation convention in the
-    interleave format.
-
-    See document for detail of the format: https://docs.nvidia.com/cuda/cuquantum/python/api/generated/cuquantum.contract.html
-
-    The output is to be used by cuQuantum's contract() for computation of the
-    state vectors of the circuit.
-    """
-
-    def __init__(self, circuit, dtype="complex128"):
-        self.backend = cp
-        self.dtype = getattr(self.backend, dtype)
-        self.init_basis_map(self.backend, dtype)
-        self.init_intermediate_circuit(circuit)
-
-    def state_vector_operands(self):
-        input_bitstring = "0" * len(self.active_qubits)
-
-        input_operands = self._get_bitstring_tensors(input_bitstring)
-
-        (
-            mode_labels,
-            qubits_frontier,
-            next_frontier,
-        ) = self._init_mode_labels_from_qubits(self.active_qubits)
-
-        gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
-            self.gate_tensors, qubits_frontier, next_frontier
-        )
-
-        operands = input_operands + gate_operands
-        mode_labels += gate_mode_labels
-
-        out_list = []
-        for key in qubits_frontier:
-            out_list.append(qubits_frontier[key])
-
-        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
-        operand_exp_interleave.append(out_list)
-        return operand_exp_interleave
-
-    def _init_mode_labels_from_qubits(self, qubits):
-        n = len(qubits)
-        frontier_dict = {q: i for i, q in enumerate(qubits)}
-        mode_labels = [[i] for i in range(n)]
-        return mode_labels, frontier_dict, n
-
-    def _get_bitstring_tensors(self, bitstring):
-        return [self.basis_map[ibit] for ibit in bitstring]
-
-    def _parse_gates_to_mode_labels_operands(
-        self, gates, qubits_frontier, next_frontier
-    ):
-        mode_labels = []
-        operands = []
-
-        for tensor, gate_qubits in gates:
-            operands.append(tensor)
-            input_mode_labels = []
-            output_mode_labels = []
-            for q in gate_qubits:
-                input_mode_labels.append(qubits_frontier[q])
-                output_mode_labels.append(next_frontier)
-                qubits_frontier[q] = next_frontier
-                next_frontier += 1
-            mode_labels.append(output_mode_labels + input_mode_labels)
-        return mode_labels, operands
-
-    def op_shape_from_qubits(self, nqubits):
-        """Modify tensor to cuQuantum shape
-        (qubit_states,input_output) * qubits_involved
-        """
-        return (2, 2) * nqubits
-
-    def init_intermediate_circuit(self, circuit):
-        self.gate_tensors = []
-        gates_qubits = []
-
-        for gate in circuit.queue:
-            gate_qubits = gate.control_qubits + gate.target_qubits
-            gates_qubits.extend(gate_qubits)
-
-            # self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
-            # https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
-            required_shape = self.op_shape_from_qubits(len(gate_qubits))
-            self.gate_tensors.append(
-                (
-                    cp.asarray(gate.matrix(), dtype=self.dtype).reshape(required_shape),
-                    gate_qubits,
-                )
-            )
-
-        # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
-        self.active_qubits = np.unique(gates_qubits)
-
-    def init_basis_map(self, backend, dtype):
-        asarray = backend.asarray
-        state_0 = asarray([1, 0], dtype=dtype)
-        state_1 = asarray([0, 1], dtype=dtype)
-
-        self.basis_map = {"0": state_0, "1": state_1}
--- a/src/qibotn/main.py
+++ b/src/qibotn/main.py
@@ -1,20 +0,0 @@
-import argparse
-
-import qibotn.quimb
-
-
-def parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--nqubits", default=10, type=int, help="Number of quibits in the circuits."
-    )
-    return parser.parse_args()
-
-
-def main(args: argparse.Namespace):
-    print("Testing for %d nqubits" % (args.nqubits))
-    qibotn.quimb.eval(args.nqubits, args.qasm_circ, args.init_state)
-
-
-if __name__ == "__main__":
-    main(parser())
--- a/src/qibotn/backends/init.py
+++ b/src/qibotn/backends/init.py
@@ -0,0 +1,2 @@
+from qibotn.backends.cutensornet import CuTensorNet
+from qibotn.backends.quimb import QuimbBackend
--- a/src/qibotn/backends/cutensornet.py
+++ b/src/qibotn/backends/cutensornet.py
@@ -0,0 +1,188 @@
+import cuquantum  # pylint: disable=import-error
+import numpy as np
+from qibo.backends.numpy import NumpyBackend
+from qibo.config import raise_error
+from qibo.result import QuantumState
+
+CUDA_TYPES = {
+    "complex64": (
+        cuquantum.cudaDataType.CUDA_C_32F,
+        cuquantum.ComputeType.COMPUTE_32F,
+    ),
+    "complex128": (
+        cuquantum.cudaDataType.CUDA_C_64F,
+        cuquantum.ComputeType.COMPUTE_64F,
+    ),
+}
+
+
+class CuTensorNet(NumpyBackend):  # pragma: no cover
+    # CI does not test for GPU
+
+    def __init__(self, runcard):
+        super().__init__()
+        from cuquantum import cutensornet as cutn  # pylint: disable=import-error
+
+        if runcard is not None:
+            self.MPI_enabled = runcard.get("MPI_enabled", False)
+            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
+
+            expectation_enabled_value = runcard.get("expectation_enabled")
+            if expectation_enabled_value is True:
+                self.expectation_enabled = True
+                self.pauli_string_pattern = "XXXZ"
+            elif expectation_enabled_value is False:
+                self.expectation_enabled = False
+            elif isinstance(expectation_enabled_value, dict):
+                self.expectation_enabled = True
+                expectation_enabled_dict = runcard.get("expectation_enabled", {})
+                self.pauli_string_pattern = expectation_enabled_dict.get(
+                    "pauli_string_pattern", None
+                )
+            else:
+                raise TypeError("expectation_enabled has an unexpected type")
+
+            mps_enabled_value = runcard.get("MPS_enabled")
+            if mps_enabled_value is True:
+                self.MPS_enabled = True
+                self.gate_algo = {
+                    "qr_method": False,
+                    "svd_method": {
+                        "partition": "UV",
+                        "abs_cutoff": 1e-12,
+                    },
+                }
+            elif mps_enabled_value is False:
+                self.MPS_enabled = False
+            elif isinstance(mps_enabled_value, dict):
+                self.MPS_enabled = True
+                self.gate_algo = mps_enabled_value
+            else:
+                raise TypeError("MPS_enabled has an unexpected type")
+
+        else:
+            self.MPI_enabled = False
+            self.MPS_enabled = False
+            self.NCCL_enabled = False
+            self.expectation_enabled = False
+
+        self.name = "qibotn"
+        self.cuquantum = cuquantum
+        self.cutn = cutn
+        self.platform = "cutensornet"
+        self.versions["cuquantum"] = self.cuquantum.__version__
+        self.supports_multigpu = True
+        self.handle = self.cutn.create()
+
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
+    def __del__(self):
+        if hasattr(self, "cutn"):
+            self.cutn.destroy(self.handle)
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+
+    def cuda_type(self, dtype="complex64"):
+        if dtype in CUDA_TYPES:
+            return CUDA_TYPES[dtype]
+        else:
+            raise TypeError("Type can be either complex64 or complex128")
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+        """
+
+        import qibotn.eval as eval
+
+        if initial_state is not None:
+            raise_error(NotImplementedError, "QiboTN cannot support initial state.")
+
+        if (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+            state = eval.dense_vector_tn(circuit, self.dtype)
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == True
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+            state = eval.dense_vector_mps(circuit, self.gate_algo, self.dtype)
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == False
+        ):
+            state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
+            if rank > 0:
+                state = np.array(0)
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == False
+        ):
+            state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
+            if rank > 0:
+                state = np.array(0)
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
+        ):
+            state = eval.expectation_pauli_tn(
+                circuit, self.dtype, self.pauli_string_pattern
+            )
+        elif (
+            self.MPI_enabled == True
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == False
+            and self.expectation_enabled == True
+        ):
+            state, rank = eval.expectation_pauli_tn_MPI(
+                circuit, self.dtype, self.pauli_string_pattern, 32
+            )
+            if rank > 0:
+                state = np.array(0)
+        elif (
+            self.MPI_enabled == False
+            and self.MPS_enabled == False
+            and self.NCCL_enabled == True
+            and self.expectation_enabled == True
+        ):
+            state, rank = eval.expectation_pauli_tn_nccl(
+                circuit, self.dtype, self.pauli_string_pattern, 32
+            )
+            if rank > 0:
+                state = np.array(0)
+        else:
+            raise_error(NotImplementedError, "Compute type not supported.")
+
+        if return_array:
+            return state.flatten()
+        else:
+            return QuantumState(state.flatten())
--- a/src/qibotn/backends/quimb.py
+++ b/src/qibotn/backends/quimb.py
@@ -0,0 +1,85 @@
+from qibo.backends.numpy import NumpyBackend
+from qibo.config import raise_error
+from qibo.result import QuantumState
+
+
+class QuimbBackend(NumpyBackend):
+
+    def __init__(self, runcard):
+        super().__init__()
+        import quimb  # pylint: disable=import-error
+
+        if runcard is not None:
+            self.MPI_enabled = runcard.get("MPI_enabled", False)
+            self.NCCL_enabled = runcard.get("NCCL_enabled", False)
+            self.expectation_enabled = runcard.get("expectation_enabled", False)
+
+            mps_enabled_value = runcard.get("MPS_enabled")
+            if mps_enabled_value is True:
+                self.mps_opts = {"method": "svd", "cutoff": 1e-6, "cutoff_mod": "abs"}
+            elif mps_enabled_value is False:
+                self.mps_opts = False
+            elif isinstance(mps_enabled_value, dict):
+                self.mps_opts = mps_enabled_value
+            else:
+                raise TypeError("MPS_enabled has an unexpected type")
+
+        else:
+            self.MPI_enabled = False
+            self.MPS_enabled = False
+            self.NCCL_enabled = False
+            self.expectation_enabled = False
+
+        self.name = "qibotn"
+        self.quimb = quimb
+        self.platform = "QuimbBackend"
+        self.versions["quimb"] = self.quimb.__version__
+
+    def apply_gate(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def apply_gate_density_matrix(self, gate, state, nqubits):  # pragma: no cover
+        raise_error(NotImplementedError, "QiboTN cannot apply gates directly.")
+
+    def assign_measurements(self, measurement_map, circuit_result):
+        raise_error(NotImplementedError, "Not implemented in QiboTN.")
+
+    def set_precision(self, precision):
+        if precision != self.precision:
+            super().set_precision(precision)
+
+    def execute_circuit(
+        self, circuit, initial_state=None, nshots=None, return_array=False
+    ):  # pragma: no cover
+        """Executes a quantum circuit.
+
+        Args:
+            circuit (:class:`qibo.models.circuit.Circuit`): Circuit to execute.
+            initial_state (:class:`qibo.models.circuit.Circuit`): Circuit to prepare the initial state.
+                If ``None`` the default ``|00...0>`` state is used.
+
+        Returns:
+            xxx.
+        """
+
+        import qibotn.eval_qu as eval
+
+        if self.MPI_enabled == True:
+            raise_error(NotImplementedError, "QiboTN quimb backend cannot support MPI.")
+        if self.NCCL_enabled == True:
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support NCCL."
+            )
+        if self.expectation_enabled == True:
+            raise_error(
+                NotImplementedError, "QiboTN quimb backend cannot support expectation"
+            )
+
+        state = eval.dense_vector_tn_qu(
+            circuit.to_qasm(), initial_state, self.mps_opts, backend="numpy"
+        )
+
+        if return_array:
+            return state.flatten()
+        else:
+            return QuantumState(state.flatten())
--- a/src/qibotn/circuit_convertor.py
+++ b/src/qibotn/circuit_convertor.py
@@ -0,0 +1,206 @@
+import cupy as cp
+import numpy as np
+
+# Reference: https://github.com/NVIDIA/cuQuantum/tree/main/python/samples/cutensornet/circuit_converter
+
+
+class QiboCircuitToEinsum:
+    """Convert a circuit to a Tensor Network (TN) representation.
+
+    The circuit is first processed to an intermediate form by grouping each gate matrix
+    with its corresponding qubit it is acting on to a list. It is then converted to an
+    equivalent TN expression through the class function state_vector_operands()
+    following the Einstein summation convention in the interleave format.
+
+    See document for detail of the format: https://docs.nvidia.com/cuda/cuquantum/python/api/generated/cuquantum.contract.html
+
+    The output is to be used by cuQuantum's contract() for computation of the
+    state vectors of the circuit.
+    """
+
+    def __init__(self, circuit, dtype="complex128"):
+        self.backend = cp
+        self.dtype = getattr(self.backend, dtype)
+        self.init_basis_map(self.backend, dtype)
+        self.init_intermediate_circuit(circuit)
+        self.circuit = circuit
+
+    def state_vector_operands(self):
+        input_bitstring = "0" * len(self.active_qubits)
+
+        input_operands = self._get_bitstring_tensors(input_bitstring)
+
+        (
+            mode_labels,
+            qubits_frontier,
+            next_frontier,
+        ) = self._init_mode_labels_from_qubits(self.active_qubits)
+
+        gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
+            self.gate_tensors, qubits_frontier, next_frontier
+        )
+
+        operands = input_operands + gate_operands
+        mode_labels += gate_mode_labels
+
+        out_list = []
+        for key in qubits_frontier:
+            out_list.append(qubits_frontier[key])
+
+        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
+        operand_exp_interleave.append(out_list)
+        return operand_exp_interleave
+
+    def _init_mode_labels_from_qubits(self, qubits):
+        n = len(qubits)
+        frontier_dict = {q: i for i, q in enumerate(qubits)}
+        mode_labels = [[i] for i in range(n)]
+        return mode_labels, frontier_dict, n
+
+    def _get_bitstring_tensors(self, bitstring):
+        return [self.basis_map[ibit] for ibit in bitstring]
+
+    def _parse_gates_to_mode_labels_operands(
+        self, gates, qubits_frontier, next_frontier
+    ):
+        mode_labels = []
+        operands = []
+
+        for tensor, gate_qubits in gates:
+            operands.append(tensor)
+            input_mode_labels = []
+            output_mode_labels = []
+            for q in gate_qubits:
+                input_mode_labels.append(qubits_frontier[q])
+                output_mode_labels.append(next_frontier)
+                qubits_frontier[q] = next_frontier
+                next_frontier += 1
+            mode_labels.append(output_mode_labels + input_mode_labels)
+        return mode_labels, operands
+
+    def op_shape_from_qubits(self, nqubits):
+        """Modify tensor to cuQuantum shape (qubit_states,input_output) *
+        qubits_involved."""
+        return (2, 2) * nqubits
+
+    def init_intermediate_circuit(self, circuit):
+        self.gate_tensors = []
+        gates_qubits = []
+
+        for gate in circuit.queue:
+            gate_qubits = gate.control_qubits + gate.target_qubits
+            gates_qubits.extend(gate_qubits)
+
+            # self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
+            # https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
+            required_shape = self.op_shape_from_qubits(len(gate_qubits))
+            self.gate_tensors.append(
+                (
+                    cp.asarray(gate.matrix(), dtype=self.dtype).reshape(required_shape),
+                    gate_qubits,
+                )
+            )
+
+        # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
+        self.active_qubits = np.unique(gates_qubits)
+
+    def init_basis_map(self, backend, dtype):
+        asarray = backend.asarray
+        state_0 = asarray([1, 0], dtype=dtype)
+        state_1 = asarray([0, 1], dtype=dtype)
+
+        self.basis_map = {"0": state_0, "1": state_1}
+
+    def init_inverse_circuit(self, circuit):
+        self.gate_tensors_inverse = []
+        gates_qubits_inverse = []
+
+        for gate in circuit.queue:
+            gate_qubits = gate.control_qubits + gate.target_qubits
+            gates_qubits_inverse.extend(gate_qubits)
+
+            # self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
+            # https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
+            required_shape = self.op_shape_from_qubits(len(gate_qubits))
+            self.gate_tensors_inverse.append(
+                (
+                    cp.asarray(gate.matrix()).reshape(required_shape),
+                    gate_qubits,
+                )
+            )
+
+        # self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
+        self.active_qubits_inverse = np.unique(gates_qubits_inverse)
+
+    def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
+        """Populate the gates for all pauli operators.
+
+        Args:
+            pauli_map: A dictionary mapping qubits to pauli operators.
+            dtype: Data type for the tensor operands.
+            backend: The package the tensor operands belong to.
+
+        Returns:
+            A sequence of pauli gates.
+        """
+        asarray = backend.asarray
+        pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
+        pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
+        pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
+        pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
+
+        operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
+        gates = []
+        for qubit, pauli_char in pauli_map.items():
+            operand = operand_map.get(pauli_char)
+            if operand is None:
+                raise ValueError("pauli string character must be one of I/X/Y/Z")
+            gates.append((operand, (qubit,)))
+        return gates
+
+    def expectation_operands(self, pauli_string):
+        input_bitstring = "0" * self.circuit.nqubits
+
+        input_operands = self._get_bitstring_tensors(input_bitstring)
+        pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
+        pauli_map = pauli_string
+
+        (
+            mode_labels,
+            qubits_frontier,
+            next_frontier,
+        ) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
+
+        gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
+            self.gate_tensors, qubits_frontier, next_frontier
+        )
+
+        operands = input_operands + gate_operands
+        mode_labels += gate_mode_labels
+
+        self.init_inverse_circuit(self.circuit.invert())
+
+        next_frontier = max(qubits_frontier.values()) + 1
+
+        pauli_gates = self.get_pauli_gates(
+            pauli_map, dtype=self.dtype, backend=self.backend
+        )
+
+        gates_inverse = pauli_gates + self.gate_tensors_inverse
+
+        (
+            gate_mode_labels_inverse,
+            gate_operands_inverse,
+        ) = self._parse_gates_to_mode_labels_operands(
+            gates_inverse, qubits_frontier, next_frontier
+        )
+        mode_labels = (
+            mode_labels
+            + gate_mode_labels_inverse
+            + [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
+        )
+        operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]
+
+        operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
+
+        return operand_exp_interleave
--- a/src/qibotn/QiboCircuitToMPS.py
+++ b/src/qibotn/QiboCircuitToMPS.py
@@ -2,8 +2,8 @@ import cupy as cp
 import numpy as np
 from cuquantum import cutensornet as cutn

-from qibotn.MPSUtils import apply_gate, initial
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
+from qibotn.circuit_convertor import QiboCircuitToEinsum
+from qibotn.mps_utils import apply_gate, initial


 class QiboCircuitToMPS:
@@ -21,7 +21,7 @@ class QiboCircuitToMPS:
        self.handle = cutn.create()
        self.dtype = dtype
        self.mps_tensors = initial(self.num_qubits, dtype=dtype)
-        circuitconvertor = QiboCircuitToEinsum(circ_qibo)
+        circuitconvertor = QiboCircuitToEinsum(circ_qibo, dtype=dtype)

        for gate, qubits in circuitconvertor.gate_tensors:
            # mapping from qubits to qubit indices
--- a/src/qibotn/cutn.py
+++ b/src/qibotn/cutn.py
@@ -1,60 +0,0 @@
-import multiprocessing
-
-import cupy as cp
-from cupy.cuda.runtime import getDeviceCount
-from cuquantum import contract
-from cuquantum import cutensornet as cutn
-
-from qibotn.mps_contraction_helper import MPSContractionHelper
-from qibotn.QiboCircuitConvertor import QiboCircuitToEinsum
-from qibotn.QiboCircuitToMPS import QiboCircuitToMPS
-
-
-def eval(qibo_circ, datatype):
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    return contract(*myconvertor.state_vector_operands())
-
-
-def eval_tn_MPI(qibo_circ, datatype, n_samples=8):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction using multi node and multi GPU through MPI.
-    The conversion is performed by QiboCircuitToEinsum(), after which it goes through 2 steps: pathfinder and execution.
-    The pathfinder looks at user defined number of samples (n_samples) iteratively to select the least costly contraction path. This is sped up with multi thread.
-    After pathfinding the optimal path is used in the actual contraction to give a dense vector representation of the TN.
-    """
-
-    from mpi4py import MPI  # this line initializes MPI
-
-    ncpu_threads = multiprocessing.cpu_count() // 2
-
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    device_id = rank % getDeviceCount()
-    cp.cuda.Device(device_id).use()
-
-    handle = cutn.create()
-    cutn.distributed_reset_configuration(handle, *cutn.get_mpi_comm_pointer(comm))
-    network_opts = cutn.NetworkOptions(handle=handle, blocking="auto")
-
-    # Perform circuit conversion
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    operands_interleave = myconvertor.state_vector_operands()
-
-    # Pathfinder: To search for the optimal path. Optimal path are assigned to path and info attribute of the network object.
-    network = cutn.Network(*operands_interleave, options=network_opts)
-    network.contract_path(optimize={"samples": n_samples, "threads": ncpu_threads})
-
-    # Execution: To execute the contraction using the optimal path found previously
-    result = network.contract()
-
-    cutn.destroy(handle)
-
-    return result, rank
-
-
-def eval_mps(qibo_circ, gate_algo, datatype):
-    myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
-    mps_helper = MPSContractionHelper(myconvertor.num_qubits)
-
-    return mps_helper.contract_state_vector(
-        myconvertor.mps_tensors, {"handle": myconvertor.handle}
-    )
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -0,0 +1,352 @@
+import cupy as cp
+from cupy.cuda.runtime import getDeviceCount
+from cuquantum import contract
+
+from qibotn.circuit_convertor import QiboCircuitToEinsum
+from qibotn.circuit_to_mps import QiboCircuitToMPS
+from qibotn.mps_contraction_helper import MPSContractionHelper
+
+
+def dense_vector_tn(qibo_circ, datatype):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    dense vector."""
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    return contract(*myconvertor.state_vector_operands())
+
+
+def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string."""
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    return contract(
+        *myconvertor.expectation_operands(
+            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+        )
+    )
+
+
+def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
+    """
+
+    from cuquantum import Network
+    from mpi4py import MPI
+
+    root = 0
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    device_id = rank % getDeviceCount()
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+
+    operands = myconvertor.state_vector_operands()
+
+    # Assign the device for each process.
+    device_id = rank % getDeviceCount()
+
+    # Create network object.
+    network = Network(*operands, options={"device_id": device_id})
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
+    )
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
+    slices = range(slice_begin, slice_end)
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+
+    # Sum the partial contribution from each process on root.
+    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+
+    return result, rank
+
+
+def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction
+    using multi node and multi GPU through NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The pathfinder looks
+    at user defined number of samples (n_samples) iteratively to select
+    the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give a dense vector representation of the TN.
+    """
+    from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
+
+    root = 0
+    comm_mpi = MPI.COMM_WORLD
+    rank = comm_mpi.Get_rank()
+    size = comm_mpi.Get_size()
+
+    device_id = rank % getDeviceCount()
+
+    cp.cuda.Device(device_id).use()
+
+    # Set up the NCCL communicator.
+    nccl_id = nccl.get_unique_id() if rank == root else None
+    nccl_id = comm_mpi.bcast(nccl_id, root)
+    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    operands = myconvertor.state_vector_operands()
+
+    network = Network(*operands)
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
+    )
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm_mpi.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
+    slices = range(slice_begin, slice_end)
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+
+    # Sum the partial contribution from each process on root.
+    stream_ptr = cp.cuda.get_current_stream().ptr
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )
+
+    return result, rank
+
+
+def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    NCCL.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
+    """
+    from cupy.cuda import nccl
+    from cuquantum import Network
+    from mpi4py import MPI
+
+    root = 0
+    comm_mpi = MPI.COMM_WORLD
+    rank = comm_mpi.Get_rank()
+    size = comm_mpi.Get_size()
+
+    device_id = rank % getDeviceCount()
+
+    cp.cuda.Device(device_id).use()
+
+    # Set up the NCCL communicator.
+    nccl_id = nccl.get_unique_id() if rank == root else None
+    nccl_id = comm_mpi.bcast(nccl_id, root)
+    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    operands = myconvertor.expectation_operands(
+        pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+    )
+
+    network = Network(*operands)
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
+    )
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm_mpi.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
+    slices = range(slice_begin, slice_end)
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+
+    # Sum the partial contribution from each process on root.
+    stream_ptr = cp.cuda.get_current_stream().ptr
+    comm_nccl.reduce(
+        result.data.ptr,
+        result.data.ptr,
+        result.size,
+        nccl.NCCL_FLOAT64,
+        nccl.NCCL_SUM,
+        root,
+        stream_ptr,
+    )
+
+    return result, rank
+
+
+def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string using multi node and multi GPU through
+    MPI.
+
+    The conversion is performed by QiboCircuitToEinsum(), after which it
+    goes through 2 steps: pathfinder and execution. The
+    pauli_string_pattern is used to generate the pauli string
+    corresponding to the number of qubits of the system. The pathfinder
+    looks at user defined number of samples (n_samples) iteratively to
+    select the least costly contraction path. This is sped up with multi
+    thread. After pathfinding the optimal path is used in the actual
+    contraction to give an expectation value.
+    """
+    from cuquantum import Network
+    from mpi4py import MPI  # this line initializes MPI
+
+    root = 0
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    device_id = rank % getDeviceCount()
+
+    # Perform circuit conversion
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+
+    operands = myconvertor.expectation_operands(
+        pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+    )
+
+    # Assign the device for each process.
+    device_id = rank % getDeviceCount()
+
+    # Create network object.
+    network = Network(*operands, options={"device_id": device_id})
+
+    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    path, info = network.contract_path(
+        optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
+    )
+
+    # Select the best path from all ranks.
+    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
+
+    # Broadcast info from the sender to all other ranks.
+    info = comm.bcast(info, sender)
+
+    # Set path and slices.
+    path, info = network.contract_path(
+        optimize={"path": info.path, "slicing": info.slices}
+    )
+
+    # Calculate this process's share of the slices.
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
+    slices = range(slice_begin, slice_end)
+
+    # Contract the group of slices the process is responsible for.
+    result = network.contract(slices=slices)
+
+    # Sum the partial contribution from each process on root.
+    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+
+    return result, rank
+
+
+def dense_vector_mps(qibo_circ, gate_algo, datatype):
+    """Convert qibo circuit to matrix product state (MPS) format and perform
+    contraction to dense vector."""
+    myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
+    mps_helper = MPSContractionHelper(myconvertor.num_qubits)
+
+    return mps_helper.contract_state_vector(
+        myconvertor.mps_tensors, {"handle": myconvertor.handle}
+    )
+
+
+def pauli_string_gen(nqubits, pauli_string_pattern):
+    """Used internally to generate the string based on given pattern and number
+    of qubit.
+
+    Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
+    """
+    if nqubits <= 0:
+        return "Invalid input. N should be a positive integer."
+
+    result = ""
+
+    for i in range(nqubits):
+        char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
+        result += char_to_add
+    return result
--- a/src/qibotn/eval_qu.py
+++ b/src/qibotn/eval_qu.py
@@ -0,0 +1,31 @@
+import numpy as np
+import quimb.tensor as qtn
+
+
+def init_state_tn(nqubits, init_state_sv):
+    """Create a matrix product state directly from a dense vector."""
+
+    dims = tuple(2 * np.ones(nqubits, dtype=int))
+
+    return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
+
+
+def dense_vector_tn_qu(qasm: str, initial_state, mps_opts, backend="numpy"):
+    """Evaluate QASM with Quimb.
+
+    backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
+    """
+
+    if initial_state is not None:
+        nqubits = int(np.log2(len(initial_state)))
+        initial_state = init_state_tn(nqubits, initial_state)
+
+    circ_cls = qtn.circuit.CircuitMPS if mps_opts else qtn.circuit.Circuit
+    circ_quimb = circ_cls.from_openqasm2_str(
+        qasm, psi0=initial_state, gate_opts=mps_opts
+    )
+
+    interim = circ_quimb.psi.full_simplify(seq="DRC")
+    amplitudes = interim.to_dense(backend=backend)
+
+    return amplitudes
--- a/src/qibotn/mps_contraction_helper.py
+++ b/src/qibotn/mps_contraction_helper.py
@@ -1,9 +1,10 @@
-from cuquantum import CircuitToEinsum, contract, contract_path, tensor
+from cuquantum import contract, contract_path
+
+# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb


 class MPSContractionHelper:
-    """
-    A helper class to compute various quantities for a given MPS.
+    """A helper class to compute various quantities for a given MPS.

    Interleaved format is used to construct the input args for `cuquantum.contract`.
    A concrete example on how the modes are populated for a 7-site MPS is provided below:
@@ -41,8 +42,8 @@ class MPSContractionHelper:
        ]

    def contract_norm(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the norm of the MPS.
+        """Contract the corresponding tensor network to form the norm of the
+        MPS.

        Args:
            mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -62,8 +63,8 @@ class MPSContractionHelper:
        return self._contract(interleaved_inputs, options=options).real

    def contract_state_vector(self, mps_tensors, options=None):
-        """
-        Contract the corresponding tensor network to form the state vector representation of the MPS.
+        """Contract the corresponding tensor network to form the state vector
+        representation of the MPS.

        Args:
            mps_tensors: A list of rank-3 ndarray-like tensor objects.
@@ -84,8 +85,8 @@ class MPSContractionHelper:
    def contract_expectation(
        self, mps_tensors, operator, qubits, options=None, normalize=False
    ):
-        """
-        Contract the corresponding tensor network to form the state vector representation of the MPS.
+        """Contract the corresponding tensor network to form the expectation of
+        the MPS.

        Args:
            mps_tensors: A list of rank-3 ndarray-like tensor objects.
--- a/src/qibotn/mps_utils.py
+++ b/src/qibotn/mps_utils.py
@@ -2,20 +2,18 @@ import cupy as cp
 from cuquantum import contract
 from cuquantum.cutensornet.experimental import contract_decompose

+# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
+

 def initial(num_qubits, dtype):
-    """
-    Generate the MPS with an initial state of |00...00>
-    """
+    """Generate the MPS with an initial state of |00...00>"""
    state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
    mps_tensors = [state_tensor] * num_qubits
    return mps_tensors


 def mps_site_right_swap(mps_tensors, i, **kwargs):
-    """
-    Perform the swap operation between the ith and i+1th MPS tensors.
-    """
+    """Perform the swap operation between the ith and i+1th MPS tensors."""
    # contraction followed by QR decomposition
    a, _, b = contract_decompose(
        "ipj,jqk->iqj,jpk",
@@ -28,8 +26,7 @@ def mps_site_right_swap(mps_tensors, i, **kwargs):


 def apply_gate(mps_tensors, gate, qubits, **kwargs):
-    """
-    Apply the gate operand to the MPS tensors in-place.
+    """Apply the gate operand to the MPS tensors in-place.

    Args:
        mps_tensors: A list of rank-3 ndarray-like tensor objects.
--- a/src/qibotn/quimb.py
+++ b/src/qibotn/quimb.py
@@ -1,54 +0,0 @@
-import numpy as np
-import quimb.tensor as qtn
-from qibo.models import Circuit as QiboCircuit
-
-
-def from_qibo(
-    circuit: QiboCircuit,
-    is_mps: False,
-    psi0=None,
-    method="svd",
-    cutoff=1e-6,
-    cutoff_mode="abs",
-):
-    nqubits = circuit.nqubits
-    gate_opt = {}
-    if is_mps:
-        tncirc = qtn.CircuitMPS(nqubits, psi0=psi0)
-        gate_opt["method"] = method
-        gate_opt["cutoff"] = cutoff
-        gate_opt["cutoff_mode"] = cutoff_mode
-    else:
-        tncirc = qtn.Circuit(nqubits, psi0=psi0)
-
-    for gate in circuit.queue:
-        tncirc.apply_gate(
-            gate.name,
-            *gate.parameters,
-            *gate.qubits,
-            parametrize=False if is_mps else (len(gate.parameters) > 0),
-            **gate_opt
-        )
-
-    return tncirc
-
-
-def init_state_tn(nqubits, init_state_sv):
-    dims = tuple(2 * np.ones(nqubits, dtype=int))
-
-    return qtn.tensor_1d.MatrixProductState.from_dense(init_state_sv, dims)
-
-
-def eval(qasm: str, init_state, is_mps, backend="numpy"):
-    """Evaluate QASM with Quimb
-
-    backend (quimb): numpy, cupy, jax. Passed to ``opt_einsum``.
-
-    """
-    circuit = QiboCircuit.from_qasm(qasm)
-    init_state_mps = init_state_tn(circuit.nqubits, init_state)
-    circ_quimb = from_qibo(circuit, is_mps, psi0=init_state_mps)
-    interim = circ_quimb.psi.full_simplify(seq="DRC")
-    amplitudes = interim.to_dense(backend=backend).flatten()
-
-    return amplitudes
--- a/tests/test_cuquantum_cutensor_backend.py
+++ b/tests/test_cuquantum_cutensor_backend.py
@@ -32,14 +32,16 @@ def test_eval(nqubits: int, dtype="complex128"):
        dtype (str): The data type for precision, 'complex64' for single,
            'complex128' for double.
    """
-    import qibotn.cutn
+    import qibotn.eval

    # Test qibo
    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
    qibo_time, (qibo_circ, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))

    # Test Cuquantum
-    cutn_time, result_tn = time(lambda: qibotn.cutn.eval(qibo_circ, dtype).flatten())
+    cutn_time, result_tn = time(
+        lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten()
+    )

    assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time
    assert np.allclose(result_sv, result_tn), "Resulting dense vectors do not match"
@@ -55,7 +57,7 @@ def test_mps(nqubits: int, dtype="complex128"):
        dtype (str): The data type for precision, 'complex64' for single,
            'complex128' for double.
    """
-    import qibotn.cutn
+    import qibotn.eval

    # Test qibo
    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
@@ -74,7 +76,7 @@ def test_mps(nqubits: int, dtype="complex128"):
    }

    cutn_time, result_tn = time(
-        lambda: qibotn.cutn.eval_mps(circ_qibo, gate_algo, dtype).flatten()
+        lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten()
    )

    print(f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")
--- a/tests/test_qasm_quimb_backend.py
+++ b/tests/test_qasm_quimb_backend.py
@@ -25,29 +25,41 @@ def qibo_qft(nqubits, init_state, swaps):
    [(1, 1e-6, True), (2, 1e-6, False), (5, 1e-3, True), (10, 1e-3, False)],
 )
 def test_eval(nqubits: int, tolerance: float, is_mps: bool):
+    """Evaluate circuit with Quimb backend.
+
+    Args:
+        nqubits (int): Total number of qubits in the system.
+        tolerance (float): Maximum limit allowed for difference in results
+        is_mps (bool): True if state is MPS and False for tensor network structure
+    """
    # hack quimb to use the correct number of processes
    # TODO: remove completely, or at least delegate to the backend
    # implementation
    os.environ["QUIMB_NUM_PROCS"] = str(os.cpu_count())
-    import qibotn.quimb
+    import qibotn.eval_qu

    init_state = create_init_state(nqubits=nqubits)
    init_state_tn = copy.deepcopy(init_state)

    # Test qibo
    qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
-    # qibo_time, (qibo_circ, result_sv) = time(
-    # lambda: qibo_qft(nqubits, init_state, swaps=True)
-    # )
+
    qibo_circ, result_sv = qibo_qft(nqubits, init_state, swaps=True)

    # Convert to qasm for other backends
    qasm_circ = qibo_circ.to_qasm()

    # Test quimb
-    result_tn = qibotn.quimb.eval(
-        qasm_circ, init_state_tn, is_mps, backend=config.quimb.backend
-    )
+    if is_mps:
+        gate_opt = {}
+        gate_opt["method"] = "svd"
+        gate_opt["cutoff"] = 1e-6
+        gate_opt["cutoff_mode"] = "abs"
+    else:
+        gate_opt = None
+    result_tn = qibotn.eval_qu.dense_vector_tn_qu(
+        qasm_circ, init_state_tn, gate_opt, backend=config.quimb.backend
+    ).flatten()

    assert np.allclose(
        result_sv, result_tn, atol=tolerance