Merge pull request #74 from qiboteam/update_tn

Update tn
This commit is contained in:
Andy Tan
2025-09-08 10:13:43 +08:00
committed by GitHub
6 changed files with 854 additions and 371 deletions

View File

@@ -1,21 +1,28 @@
import numpy as np
from qibo import hamiltonians
from qibo.backends import NumpyBackend
from qibo.config import raise_error
from qibo.result import QuantumState
from qibotn.backends.abstract import QibotnBackend
CUDA_TYPES = {}
from qibotn.result import TensorNetworkResult
class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
# CI does not test for GPU
"""Creates CuQuantum backend for QiboTN."""
def __init__(self, runcard):
def __init__(self, runcard=None):
super().__init__()
from cuquantum import cutensornet as cutn # pylint: disable=import-error
from cuquantum import __version__ # pylint: disable=import-error
self.name = "qibotn"
self.platform = "cutensornet"
self.versions["cuquantum"] = __version__
self.supports_multigpu = True
self.configure_tn_simulation(runcard)
def configure_tn_simulation(self, runcard):
self.rank = None
if runcard is not None:
self.MPI_enabled = runcard.get("MPI_enabled", False)
self.NCCL_enabled = runcard.get("NCCL_enabled", False)
@@ -23,15 +30,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
expectation_enabled_value = runcard.get("expectation_enabled")
if expectation_enabled_value is True:
self.expectation_enabled = True
self.pauli_string_pattern = "XXXZ"
self.observable = None
elif expectation_enabled_value is False:
self.expectation_enabled = False
elif isinstance(expectation_enabled_value, dict):
self.expectation_enabled = True
expectation_enabled_dict = runcard.get("expectation_enabled", {})
self.pauli_string_pattern = expectation_enabled_dict.get(
"pauli_string_pattern", None
)
self.observable = runcard.get("expectation_enabled", {})
elif isinstance(
expectation_enabled_value, hamiltonians.SymbolicHamiltonian
):
self.expectation_enabled = True
self.observable = expectation_enabled_value
else:
raise TypeError("expectation_enabled has an unexpected type")
@@ -59,44 +68,6 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
self.NCCL_enabled = False
self.expectation_enabled = False
self.name = "qibotn"
self.cuquantum = cuquantum
self.cutn = cutn
self.platform = "cutensornet"
self.versions["cuquantum"] = self.cuquantum.__version__
self.supports_multigpu = True
self.handle = self.cutn.create()
global CUDA_TYPES
CUDA_TYPES = {
"complex64": (
self.cuquantum.cudaDataType.CUDA_C_32F,
self.cuquantum.ComputeType.COMPUTE_32F,
),
"complex128": (
self.cuquantum.cudaDataType.CUDA_C_64F,
self.cuquantum.ComputeType.COMPUTE_64F,
),
}
def __del__(self):
if hasattr(self, "cutn"):
self.cutn.destroy(self.handle)
def cuda_type(self, dtype="complex64"):
"""Get CUDA Type.
Parameters:
dtype (str, optional): Either single ("complex64") or double (complex128) precision. Defaults to "complex64".
Returns:
CUDA Type: tuple of cuquantum.cudaDataType and cuquantum.ComputeType
"""
if dtype in CUDA_TYPES:
return CUDA_TYPES[dtype]
else:
raise TypeError("Type can be either complex64 or complex128")
def execute_circuit(
self, circuit, initial_state=None, nshots=None, return_array=False
): # pragma: no cover
@@ -136,8 +107,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == False
and self.expectation_enabled == False
):
state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
if rank > 0:
state, self.rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32)
if self.rank > 0:
state = np.array(0)
elif (
self.MPI_enabled == False
@@ -145,8 +116,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == True
and self.expectation_enabled == False
):
state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
if rank > 0:
state, self.rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32)
if self.rank > 0:
state = np.array(0)
elif (
self.MPI_enabled == False
@@ -154,19 +125,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == False
and self.expectation_enabled == True
):
state = eval.expectation_pauli_tn(
circuit, self.dtype, self.pauli_string_pattern
)
state = eval.expectation_tn(circuit, self.dtype, self.observable)
elif (
self.MPI_enabled == True
and self.MPS_enabled == False
and self.NCCL_enabled == False
and self.expectation_enabled == True
):
state, rank = eval.expectation_pauli_tn_MPI(
circuit, self.dtype, self.pauli_string_pattern, 32
state, self.rank = eval.expectation_tn_MPI(
circuit, self.dtype, self.observable, 32
)
if rank > 0:
if self.rank > 0:
state = np.array(0)
elif (
self.MPI_enabled == False
@@ -174,15 +143,27 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover
and self.NCCL_enabled == True
and self.expectation_enabled == True
):
state, rank = eval.expectation_pauli_tn_nccl(
circuit, self.dtype, self.pauli_string_pattern, 32
state, self.rank = eval.expectation_tn_nccl(
circuit, self.dtype, self.observable, 32
)
if rank > 0:
if self.rank > 0:
state = np.array(0)
else:
raise_error(NotImplementedError, "Compute type not supported.")
if return_array:
return state.flatten()
if self.expectation_enabled:
return state.flatten().real
else:
return QuantumState(state.flatten())
if return_array:
statevector = state.flatten()
else:
statevector = state
return TensorNetworkResult(
nqubits=circuit.nqubits,
backend=self,
measures=None,
measured_probabilities=None,
prob_type=None,
statevector=statevector,
)

View File

@@ -195,12 +195,12 @@ class QiboCircuitToEinsum:
gates.append((operand, (qubit,)))
return gates
def expectation_operands(self, pauli_string):
def expectation_operands(self, ham_gates):
"""Create the operands for pauli string expectation computation in the
interleave format.
Parameters:
pauli_string: A string representating the list of pauli gates.
ham_gates: A list of gates derived from Qibo hamiltonian object.
Returns:
Operands for the contraction in the interleave format.
@@ -208,8 +208,6 @@ class QiboCircuitToEinsum:
input_bitstring = "0" * self.circuit.nqubits
input_operands = self._get_bitstring_tensors(input_bitstring)
pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string))
pauli_map = pauli_string
(
mode_labels,
@@ -228,11 +226,7 @@ class QiboCircuitToEinsum:
next_frontier = max(qubits_frontier.values()) + 1
pauli_gates = self.get_pauli_gates(
pauli_map, dtype=self.dtype, backend=self.backend
)
gates_inverse = pauli_gates + self.gate_tensors_inverse
gates_inverse = ham_gates + self.gate_tensors_inverse
(
gate_mode_labels_inverse,

View File

@@ -1,45 +1,238 @@
import cupy as cp
import cuquantum.cutensornet as cutn
from cupy.cuda import nccl
from cupy.cuda.runtime import getDeviceCount
from cuquantum import contract
from cuquantum import Network, contract
from mpi4py import MPI
from qibo import hamiltonians
from qibo.symbols import I, X, Y, Z
from qibotn.circuit_convertor import QiboCircuitToEinsum
from qibotn.circuit_to_mps import QiboCircuitToMPS
from qibotn.mps_contraction_helper import MPSContractionHelper
def dense_vector_tn(qibo_circ, datatype):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
def check_observable(observable, circuit_nqubit):
"""Checks the type of observable and returns the appropriate Hamiltonian."""
if observable is None:
return build_observable(circuit_nqubit)
elif isinstance(observable, dict):
return create_hamiltonian_from_dict(observable, circuit_nqubit)
elif isinstance(observable, hamiltonians.SymbolicHamiltonian):
# TODO: check if the observable is compatible with the circuit
return observable
else:
raise TypeError("Invalid observable type.")
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
def build_observable(circuit_nqubit):
"""Helper function to construct a target observable."""
hamiltonian_form = 0
for i in range(circuit_nqubit):
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian
def create_hamiltonian_from_dict(data, circuit_nqubit):
"""Create a Qibo SymbolicHamiltonian from a dictionary representation.
Ensures that each Hamiltonian term explicitly acts on all circuit qubits
by adding identity (`I`) gates where needed.
Args:
data (dict): Dictionary containing Hamiltonian terms.
circuit_nqubit (int): Total number of qubits in the quantum circuit.
Returns:
Dense vector of quantum circuit.
hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
PAULI_GATES = {"X": X, "Y": Y, "Z": Z}
terms = []
for term in data["terms"]:
coeff = term["coefficient"]
operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)]
# Convert the operator list into a dictionary {qubit_index: gate}
operator_dict = {q: PAULI_GATES[g] for g, q in operators}
# Build the full term ensuring all qubits are covered
full_term_expr = [
operator_dict[q](q) if q in operator_dict else I(q)
for q in range(circuit_nqubit)
]
# Multiply all operators together to form a single term
term_expr = full_term_expr[0]
for op in full_term_expr[1:]:
term_expr *= op
# Scale by the coefficient
final_term = coeff * term_expr
terms.append(final_term)
if not terms:
raise ValueError("No valid Hamiltonian terms were added.")
# Combine all terms
hamiltonian_form = sum(terms)
return hamiltonians.SymbolicHamiltonian(hamiltonian_form)
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
"""Populate the gates for all pauli operators.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
pauli_map: A dictionary mapping qubits to pauli operators.
dtype: Data type for the tensor operands.
backend: The package the tensor operands belong to.
Returns:
Expectation of quantum circuit due to pauli string.
A sequence of pauli gates.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(
*myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
asarray = backend.asarray
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
gates = []
for qubit, pauli_char, coeff in pauli_map:
operand = operand_map.get(pauli_char)
if operand is None:
raise ValueError("pauli string character must be one of I/X/Y/Z")
operand = coeff * operand
gates.append((operand, (qubit,)))
return gates
def extract_gates_and_qubits(hamiltonian):
"""
Extracts the gates and their corresponding qubits from a Qibo Hamiltonian.
Parameters:
hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian):
A Qibo Hamiltonian object.
Returns:
list of tuples: [(coefficient, [(gate, qubit), ...]), ...]
- coefficient: The prefactor of the term.
- list of (gate, qubit): Each term's gates and the qubits they act on.
"""
extracted_terms = []
if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
for term in hamiltonian.terms:
coeff = term.coefficient # Extract coefficient
gate_qubit_list = []
# Extract gate and qubit information
for factor in term.factors:
gate_name = str(factor)[
0
] # Extract the gate type (X, Y, Z) from 'X0', 'Z1'
qubit = int(str(factor)[1:]) # Extract the qubit index
gate_qubit_list.append((qubit, gate_name, coeff))
coeff = 1.0
extracted_terms.append(gate_qubit_list)
else:
raise ValueError(
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
)
return extracted_terms
def initialize_mpi():
"""Initialize MPI communication and device selection."""
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
return comm, rank, size, device_id
def initialize_nccl(comm_mpi, rank, size):
"""Initialize NCCL communication."""
nccl_id = nccl.get_unique_id() if rank == 0 else None
nccl_id = comm_mpi.bcast(nccl_id, root=0)
return nccl.NcclCommunicator(size, nccl_id, rank)
def get_operands(qibo_circ, datatype, rank, comm):
"""Perform circuit conversion and broadcast operands."""
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
return comm.bcast(operands, root=0)
def compute_optimal_path(network, n_samples, size, comm):
"""Compute contraction path and broadcast optimal selection."""
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
opt_cost, sender = comm.allreduce(
sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
)
return comm.bcast(info, sender)
def compute_slices(info, rank, size):
"""Determine the slice range each process should compute."""
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
return range(slice_begin, slice_end)
def reduce_result(result, comm, method="MPI", root=0):
"""Reduce results across processes."""
if method == "MPI":
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
elif method == "NCCL":
stream_ptr = cp.cuda.get_current_stream().ptr
if result.dtype == cp.complex128:
count = result.size * 2 # complex128 has 2 float64 numbers
nccl_type = nccl.NCCL_FLOAT64
elif result.dtype == cp.complex64:
count = result.size * 2 # complex64 has 2 float32 numbers
nccl_type = nccl.NCCL_FLOAT32
else:
raise TypeError(f"Unsupported dtype for NCCL reduce: {result.dtype}")
comm.reduce(
result.data.ptr,
result.data.ptr,
count,
nccl_type,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result
else:
raise ValueError(f"Unknown reduce method: {method}")
def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,60 +254,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
Returns:
Dense vector of quantum circuit.
"""
from cuquantum import Network
from mpi4py import MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
# Assign the device for each process.
device_id = rank % getDeviceCount()
# Create network object.
comm, rank, size, device_id = initialize_mpi()
operands = get_operands(qibo_circ, datatype, rank, comm)
network = Network(*operands, options={"device_id": device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
info = compute_optimal_path(network, n_samples, size, comm)
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
slices = compute_slices(info, rank, size)
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
return reduce_result(result, comm, method="MPI"), rank
def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -136,74 +285,35 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
Returns:
Dense vector of quantum circuit.
"""
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
comm_mpi, rank, size, device_id = initialize_mpi()
comm_nccl = initialize_nccl(comm_mpi, rank, size)
operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
info = compute_optimal_path(network, n_samples, size, comm_mpi)
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
slices = compute_slices(info, rank, size)
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result, rank
return reduce_result(result, comm_nccl, method="NCCL"), rank
def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
def dense_vector_tn(qibo_circ, datatype):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
def expectation_tn_nccl(qibo_circ, datatype, observable, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string using multi node and multi GPU through
NCCL.
@@ -226,76 +336,53 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
Returns:
Expectation of quantum circuit due to pauli string.
"""
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
comm_mpi, rank, size, device_id = initialize_mpi()
device_id = rank % getDeviceCount()
comm_nccl = initialize_nccl(comm_mpi, rank, size)
cp.cuda.Device(device_id).use()
observable = check_observable(observable, qibo_circ.nqubits)
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
ham_gate_map = extract_gates_and_qubits(observable)
# Perform circuit conversion
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
network = Network(*operands)
exp = 0
for each_ham in ham_gate_map:
ham_gates = get_ham_gates(each_ham)
# Perform circuit conversion
if rank == 0:
operands = myconvertor.expectation_operands(ham_gates)
else:
operands = None
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
operands = comm_mpi.bcast(operands, root=0)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
network = Network(*operands)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
info = compute_optimal_path(network, n_samples, size, comm_mpi)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Recompute path with the selected optimal settings
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
slices = compute_slices(info, rank, size)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
# Sum the partial contribution from each process on root.
result = reduce_result(result, comm_nccl, method="NCCL", root=0)
return result, rank
exp += result
return exp, rank
def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
def expectation_tn_MPI(qibo_circ, datatype, observable, n_samples=8):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string using multi node and multi GPU through
MPI.
@@ -318,61 +405,76 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
Returns:
Expectation of quantum circuit due to pauli string.
"""
from cuquantum import Network
from mpi4py import MPI # this line initializes MPI
# Initialize MPI and device
comm, rank, size, device_id = initialize_mpi()
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
observable = check_observable(observable, qibo_circ.nqubits)
device_id = rank % getDeviceCount()
ham_gate_map = extract_gates_and_qubits(observable)
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
exp = 0
for each_ham in ham_gate_map:
ham_gates = get_ham_gates(each_ham)
# Perform circuit conversion
# Perform circuit conversion
if rank == 0:
operands = myconvertor.expectation_operands(ham_gates)
else:
operands = None
operands = comm.bcast(operands, root=0)
# Create network object.
network = Network(*operands, options={"device_id": device_id})
# Compute optimal contraction path
info = compute_optimal_path(network, n_samples, size, comm)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Compute slice range for each rank
slices = compute_slices(info, rank, size)
# Perform contraction
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = reduce_result(result, comm, method="MPI", root=0)
if rank == 0:
exp += result
return exp, rank
def expectation_tn(qibo_circ, datatype, observable):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns:
Expectation of quantum circuit due to pauli string.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
observable = check_observable(observable, qibo_circ.nqubits)
# Assign the device for each process.
device_id = rank % getDeviceCount()
# Create network object.
network = Network(*operands, options={"device_id": device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
ham_gate_map = extract_gates_and_qubits(observable)
exp = 0
for each_ham in ham_gate_map:
ham_gates = get_ham_gates(each_ham)
expectation_operands = myconvertor.expectation_operands(ham_gates)
exp += contract(*expectation_operands)
return exp
def dense_vector_mps(qibo_circ, gate_algo, datatype):
@@ -393,27 +495,3 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
return mps_helper.contract_state_vector(
myconvertor.mps_tensors, {"handle": myconvertor.handle}
)
def pauli_string_gen(nqubits, pauli_string_pattern):
"""Used internally to generate the string based on given pattern and number
of qubit.
Parameters:
nqubits(int): Number of qubits of Quantum Circuit
pauli_string_pattern(str): Strings representing sequence of pauli gates.
Returns:
String representation of the actual pauli string from the pattern.
Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX
"""
if nqubits <= 0:
return "Invalid input. N should be a positive integer."
result = ""
for i in range(nqubits):
char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)]
result += char_to_add
return result

View File

@@ -9,16 +9,16 @@ import pytest
# backends to be tested
# TODO: add cutensornet and quimb here as well
BACKENDS = ["qmatchatea"]
BACKENDS = ["cutensornet"]
# BACKENDS = ["qmatchatea"]
def get_backend(backend_name):
from qibotn.backends.cutensornet import CuTensorNet
from qibotn.backends.qmatchatea import QMatchaTeaBackend
NAME2BACKEND = {
"qmatchatea": QMatchaTeaBackend,
}
NAME2BACKEND = {"qmatchatea": QMatchaTeaBackend, "cutensornet": CuTensorNet}
return NAME2BACKEND[backend_name]()

View File

@@ -1,11 +1,13 @@
from timeit import default_timer as timer
import math
import config
import cupy as cp
import numpy as np
import pytest
import qibo
from qibo import construct_backend, hamiltonians
from qibo.models import QFT
from qibo.symbols import X, Z
ABS_TOL = 1e-7
def qibo_qft(nqubits, swaps):
@@ -14,37 +16,73 @@ def qibo_qft(nqubits, swaps):
return circ_qibo, state_vec
def time(func):
start = timer()
res = func()
end = timer()
time = end - start
return time, res
def build_observable(nqubits):
"""Helper function to construct a target observable."""
hamiltonian_form = 0
for i in range(nqubits):
hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits)
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian, hamiltonian_form
def build_observable_dict(nqubits):
"""Construct a target observable as a dictionary representation.
Returns a dictionary suitable for `create_hamiltonian_from_dict`.
"""
terms = []
for i in range(nqubits):
term = {
"coefficient": 0.5,
"operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)],
}
terms.append(term)
return {"terms": terms}
@pytest.mark.gpu
@pytest.mark.parametrize("nqubits", [1, 2, 5, 10])
def test_eval(nqubits: int, dtype="complex128"):
"""Evaluate QASM with cuQuantum.
"""
Args:
nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
import qibotn.eval
# Test qibo
qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
qibo_time, (qibo_circ, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test Cuquantum
cutn_time, result_tn = time(
lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten()
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with no settings specified. Default is dense vector calculation without MPI or NCCL.
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_sv_cp, result_tn.statevector.flatten()
), "Resulting dense vectors do not match"
assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time
assert np.allclose(result_sv, result_tn), "Resulting dense vectors do not match"
# Test with explicit settings specified.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_sv_cp, result_tn.statevector.flatten()
), "Resulting dense vectors do not match"
@pytest.mark.gpu
@@ -57,28 +95,105 @@ def test_mps(nqubits: int, dtype="complex128"):
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
import qibotn.eval
# Test qibo
qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform)
qibo_time, (circ_qibo, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True))
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test of MPS
gate_algo = {
"qr_method": False,
"svd_method": {
"partition": "UV",
"abs_cutoff": 1e-12,
},
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with simple MPS settings specified using bool. Uses the default MPS parameters.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": True,
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_tn.statevector.flatten(), result_sv_cp
), "Resulting dense vectors do not match"
cutn_time, result_tn = time(
lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten()
# Test with explicit MPS computation settings specified using Dict. Users able to specify parameters like qr_method etc.
comp_set_w_MPS_config_para = {
"MPI_enabled": False,
"MPS_enabled": {
"qr_method": False,
"svd_method": {
"partition": "UV",
"abs_cutoff": 1e-12,
},
},
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_MPS_config_para)
result_tn = backend.execute_circuit(circuit=qibo_circ)
print(
f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}"
)
assert cp.allclose(
result_tn.statevector.flatten(), result_sv_cp
), "Resulting dense vectors do not match"
@pytest.mark.parametrize("nqubits", [2, 5, 10])
def test_expectation(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
)
print(f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}")
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
assert cp.allclose(result_tn, result_sv_cp)
# Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
assert math.isclose(
exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL
)

View File

@@ -0,0 +1,315 @@
# mpirun --allow-run-as-root -np 2 python -m pytest --with-mpi test_cuquantum_cutensor_mpi_backend.py
import math
import cupy as cp
import numpy as np
import pytest
import qibo
from qibo import construct_backend, hamiltonians
from qibo.models import QFT
from qibo.symbols import X, Z
ABS_TOL = 1e-7
def qibo_qft(nqubits, swaps):
circ_qibo = QFT(nqubits, swaps)
state_vec = circ_qibo().state(numpy=True)
return circ_qibo, state_vec
def build_observable(nqubits):
"""Helper function to construct a target observable."""
hamiltonian_form = 0
for i in range(nqubits):
hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits)
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
return hamiltonian, hamiltonian_form
def build_observable_dict(nqubits):
"""Construct a target observable as a dictionary representation.
Returns a dictionary suitable for `create_hamiltonian_from_dict`.
"""
terms = []
for i in range(nqubits):
term = {
"coefficient": 0.5,
"operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)],
}
terms.append(term)
return {"terms": terms}
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_eval_mpi(nqubits: int, dtype="complex128"):
"""
Args:
nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
# Test qibo
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with explicit settings specified.
comp_set_w_bool = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
result_tn_cp = cp.asarray(result_tn.statevector.flatten())
print(f"State vector difference: {abs(result_tn_cp - result_sv_cp).max():0.3e}")
if backend.rank == 0:
assert cp.allclose(
result_sv_cp, result_tn_cp
), "Resulting dense vectors do not match"
else:
assert (
isinstance(result_tn_cp, cp.ndarray)
and result_tn_cp.size == 1
and result_tn_cp.item() == 0
), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_expectation_mpi(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": True,
"MPS_enabled": False,
"NCCL_enabled": False,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_eval_nccl(nqubits: int, dtype="complex128"):
"""
Args:
nqubits (int): Total number of qubits in the system.
dtype (str): The data type for precision, 'complex64' for single,
'complex128' for double.
"""
# Test qibo
qibo.set_backend(backend="numpy")
qibo_circ, result_sv = qibo_qft(nqubits, swaps=True)
result_sv_cp = cp.asarray(result_sv)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with explicit settings specified.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": False,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
result_tn_cp = cp.asarray(result_tn.statevector.flatten())
if backend.rank == 0:
assert cp.allclose(
result_sv_cp, result_tn_cp
), "Resulting dense vectors do not match"
else:
assert (
isinstance(result_tn_cp, cp.ndarray)
and result_tn_cp.size == 1
and result_tn_cp.item() == 0
), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}"
@pytest.mark.gpu
@pytest.mark.mpi
@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10])
def test_expectation_NCCL(nqubits: int, dtype="complex128"):
# Test qibo
qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True)
ham, ham_form = build_observable(nqubits)
numpy_backend = construct_backend("numpy")
exact_expval = numpy_backend.calculate_expectation_state(
hamiltonian=ham,
state=state_vec_qibo,
normalize=False,
)
# Test cutensornet
backend = construct_backend(backend="qibotn", platform="cutensornet")
# Test with simple settings using bool. Uses default Hamilitonian for expectation calculation.
comp_set_w_bool = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": True,
}
backend.configure_tn_simulation(comp_set_w_bool)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object.
comp_set_w_hamiltonian_obj = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": ham,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_obj)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"
# Test with user defined hamiltonian using Dictionary object form of hamiltonian.
ham_dict = build_observable_dict(nqubits)
comp_set_w_hamiltonian_dict = {
"MPI_enabled": False,
"MPS_enabled": False,
"NCCL_enabled": True,
"expectation_enabled": ham_dict,
}
backend.configure_tn_simulation(comp_set_w_hamiltonian_dict)
result_tn = backend.execute_circuit(circuit=qibo_circ)
if backend.rank == 0:
# Compare numerical values
assert math.isclose(
exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL
), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}"
else:
# Rank > 0: must be hardcoded [0] (int)
assert (
isinstance(result_tn, (np.ndarray, cp.ndarray))
and result_tn.size == 1
and np.issubdtype(result_tn.dtype, np.integer)
and result_tn.item() == 0
), f"Rank {backend.rank}: expected int array [0], got {result_tn}"