diff --git a/src/qibotn/backends/cutensornet.py b/src/qibotn/backends/cutensornet.py index 553fc51..616cda9 100644 --- a/src/qibotn/backends/cutensornet.py +++ b/src/qibotn/backends/cutensornet.py @@ -1,21 +1,28 @@ import numpy as np +from qibo import hamiltonians from qibo.backends import NumpyBackend from qibo.config import raise_error -from qibo.result import QuantumState from qibotn.backends.abstract import QibotnBackend - -CUDA_TYPES = {} +from qibotn.result import TensorNetworkResult class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover # CI does not test for GPU """Creates CuQuantum backend for QiboTN.""" - def __init__(self, runcard): + def __init__(self, runcard=None): super().__init__() - from cuquantum import cutensornet as cutn # pylint: disable=import-error + from cuquantum import __version__ # pylint: disable=import-error + self.name = "qibotn" + self.platform = "cutensornet" + self.versions["cuquantum"] = __version__ + self.supports_multigpu = True + self.configure_tn_simulation(runcard) + + def configure_tn_simulation(self, runcard): + self.rank = None if runcard is not None: self.MPI_enabled = runcard.get("MPI_enabled", False) self.NCCL_enabled = runcard.get("NCCL_enabled", False) @@ -23,15 +30,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover expectation_enabled_value = runcard.get("expectation_enabled") if expectation_enabled_value is True: self.expectation_enabled = True - self.pauli_string_pattern = "XXXZ" + self.observable = None elif expectation_enabled_value is False: self.expectation_enabled = False elif isinstance(expectation_enabled_value, dict): self.expectation_enabled = True - expectation_enabled_dict = runcard.get("expectation_enabled", {}) - self.pauli_string_pattern = expectation_enabled_dict.get( - "pauli_string_pattern", None - ) + self.observable = runcard.get("expectation_enabled", {}) + elif isinstance( + expectation_enabled_value, hamiltonians.SymbolicHamiltonian + ): + self.expectation_enabled = True + self.observable = expectation_enabled_value else: raise TypeError("expectation_enabled has an unexpected type") @@ -59,44 +68,6 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover self.NCCL_enabled = False self.expectation_enabled = False - self.name = "qibotn" - self.cuquantum = cuquantum - self.cutn = cutn - self.platform = "cutensornet" - self.versions["cuquantum"] = self.cuquantum.__version__ - self.supports_multigpu = True - self.handle = self.cutn.create() - - global CUDA_TYPES - CUDA_TYPES = { - "complex64": ( - self.cuquantum.cudaDataType.CUDA_C_32F, - self.cuquantum.ComputeType.COMPUTE_32F, - ), - "complex128": ( - self.cuquantum.cudaDataType.CUDA_C_64F, - self.cuquantum.ComputeType.COMPUTE_64F, - ), - } - - def __del__(self): - if hasattr(self, "cutn"): - self.cutn.destroy(self.handle) - - def cuda_type(self, dtype="complex64"): - """Get CUDA Type. - - Parameters: - dtype (str, optional): Either single ("complex64") or double (complex128) precision. Defaults to "complex64". - - Returns: - CUDA Type: tuple of cuquantum.cudaDataType and cuquantum.ComputeType - """ - if dtype in CUDA_TYPES: - return CUDA_TYPES[dtype] - else: - raise TypeError("Type can be either complex64 or complex128") - def execute_circuit( self, circuit, initial_state=None, nshots=None, return_array=False ): # pragma: no cover @@ -136,8 +107,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover and self.NCCL_enabled == False and self.expectation_enabled == False ): - state, rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32) - if rank > 0: + state, self.rank = eval.dense_vector_tn_MPI(circuit, self.dtype, 32) + if self.rank > 0: state = np.array(0) elif ( self.MPI_enabled == False @@ -145,8 +116,8 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover and self.NCCL_enabled == True and self.expectation_enabled == False ): - state, rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32) - if rank > 0: + state, self.rank = eval.dense_vector_tn_nccl(circuit, self.dtype, 32) + if self.rank > 0: state = np.array(0) elif ( self.MPI_enabled == False @@ -154,19 +125,17 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover and self.NCCL_enabled == False and self.expectation_enabled == True ): - state = eval.expectation_pauli_tn( - circuit, self.dtype, self.pauli_string_pattern - ) + state = eval.expectation_tn(circuit, self.dtype, self.observable) elif ( self.MPI_enabled == True and self.MPS_enabled == False and self.NCCL_enabled == False and self.expectation_enabled == True ): - state, rank = eval.expectation_pauli_tn_MPI( - circuit, self.dtype, self.pauli_string_pattern, 32 + state, self.rank = eval.expectation_tn_MPI( + circuit, self.dtype, self.observable, 32 ) - if rank > 0: + if self.rank > 0: state = np.array(0) elif ( self.MPI_enabled == False @@ -174,15 +143,27 @@ class CuTensorNet(QibotnBackend, NumpyBackend): # pragma: no cover and self.NCCL_enabled == True and self.expectation_enabled == True ): - state, rank = eval.expectation_pauli_tn_nccl( - circuit, self.dtype, self.pauli_string_pattern, 32 + state, self.rank = eval.expectation_tn_nccl( + circuit, self.dtype, self.observable, 32 ) - if rank > 0: + if self.rank > 0: state = np.array(0) else: raise_error(NotImplementedError, "Compute type not supported.") - if return_array: - return state.flatten() + if self.expectation_enabled: + return state.flatten().real else: - return QuantumState(state.flatten()) + if return_array: + statevector = state.flatten() + else: + statevector = state + + return TensorNetworkResult( + nqubits=circuit.nqubits, + backend=self, + measures=None, + measured_probabilities=None, + prob_type=None, + statevector=statevector, + ) diff --git a/src/qibotn/circuit_convertor.py b/src/qibotn/circuit_convertor.py index 03e96fa..1c8b3ee 100644 --- a/src/qibotn/circuit_convertor.py +++ b/src/qibotn/circuit_convertor.py @@ -195,12 +195,12 @@ class QiboCircuitToEinsum: gates.append((operand, (qubit,))) return gates - def expectation_operands(self, pauli_string): + def expectation_operands(self, ham_gates): """Create the operands for pauli string expectation computation in the interleave format. Parameters: - pauli_string: A string representating the list of pauli gates. + ham_gates: A list of gates derived from Qibo hamiltonian object. Returns: Operands for the contraction in the interleave format. @@ -208,8 +208,6 @@ class QiboCircuitToEinsum: input_bitstring = "0" * self.circuit.nqubits input_operands = self._get_bitstring_tensors(input_bitstring) - pauli_string = dict(zip(range(self.circuit.nqubits), pauli_string)) - pauli_map = pauli_string ( mode_labels, @@ -228,11 +226,7 @@ class QiboCircuitToEinsum: next_frontier = max(qubits_frontier.values()) + 1 - pauli_gates = self.get_pauli_gates( - pauli_map, dtype=self.dtype, backend=self.backend - ) - - gates_inverse = pauli_gates + self.gate_tensors_inverse + gates_inverse = ham_gates + self.gate_tensors_inverse ( gate_mode_labels_inverse, diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py index 245aa5e..afba962 100644 --- a/src/qibotn/eval.py +++ b/src/qibotn/eval.py @@ -1,45 +1,238 @@ import cupy as cp +import cuquantum.cutensornet as cutn +from cupy.cuda import nccl from cupy.cuda.runtime import getDeviceCount -from cuquantum import contract +from cuquantum import Network, contract +from mpi4py import MPI +from qibo import hamiltonians +from qibo.symbols import I, X, Y, Z from qibotn.circuit_convertor import QiboCircuitToEinsum from qibotn.circuit_to_mps import QiboCircuitToMPS from qibotn.mps_contraction_helper import MPSContractionHelper -def dense_vector_tn(qibo_circ, datatype): - """Convert qibo circuit to tensornet (TN) format and perform contraction to - dense vector. +def check_observable(observable, circuit_nqubit): + """Checks the type of observable and returns the appropriate Hamiltonian.""" + if observable is None: + return build_observable(circuit_nqubit) + elif isinstance(observable, dict): + return create_hamiltonian_from_dict(observable, circuit_nqubit) + elif isinstance(observable, hamiltonians.SymbolicHamiltonian): + # TODO: check if the observable is compatible with the circuit + return observable + else: + raise TypeError("Invalid observable type.") - Parameters: - qibo_circ: The quantum circuit object. - datatype (str): Either single ("complex64") or double (complex128) precision. + +def build_observable(circuit_nqubit): + """Helper function to construct a target observable.""" + hamiltonian_form = 0 + for i in range(circuit_nqubit): + hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit) + + hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form) + return hamiltonian + + +def create_hamiltonian_from_dict(data, circuit_nqubit): + """Create a Qibo SymbolicHamiltonian from a dictionary representation. + + Ensures that each Hamiltonian term explicitly acts on all circuit qubits + by adding identity (`I`) gates where needed. + + Args: + data (dict): Dictionary containing Hamiltonian terms. + circuit_nqubit (int): Total number of qubits in the quantum circuit. Returns: - Dense vector of quantum circuit. + hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian. """ - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - return contract(*myconvertor.state_vector_operands()) + PAULI_GATES = {"X": X, "Y": Y, "Z": Z} + + terms = [] + + for term in data["terms"]: + coeff = term["coefficient"] + operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)] + + # Convert the operator list into a dictionary {qubit_index: gate} + operator_dict = {q: PAULI_GATES[g] for g, q in operators} + + # Build the full term ensuring all qubits are covered + full_term_expr = [ + operator_dict[q](q) if q in operator_dict else I(q) + for q in range(circuit_nqubit) + ] + + # Multiply all operators together to form a single term + term_expr = full_term_expr[0] + for op in full_term_expr[1:]: + term_expr *= op + + # Scale by the coefficient + final_term = coeff * term_expr + terms.append(final_term) + + if not terms: + raise ValueError("No valid Hamiltonian terms were added.") + + # Combine all terms + hamiltonian_form = sum(terms) + + return hamiltonians.SymbolicHamiltonian(hamiltonian_form) -def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern): - """Convert qibo circuit to tensornet (TN) format and perform contraction to - expectation of given Pauli string. +def get_ham_gates(pauli_map, dtype="complex128", backend=cp): + """Populate the gates for all pauli operators. Parameters: - qibo_circ: The quantum circuit object. - datatype (str): Either single ("complex64") or double (complex128) precision. - pauli_string_pattern(str): pauli string pattern. + pauli_map: A dictionary mapping qubits to pauli operators. + dtype: Data type for the tensor operands. + backend: The package the tensor operands belong to. Returns: - Expectation of quantum circuit due to pauli string. + A sequence of pauli gates. """ - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - return contract( - *myconvertor.expectation_operands( - pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) + asarray = backend.asarray + pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype) + pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype) + pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype) + pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype) + + operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z} + gates = [] + for qubit, pauli_char, coeff in pauli_map: + operand = operand_map.get(pauli_char) + if operand is None: + raise ValueError("pauli string character must be one of I/X/Y/Z") + operand = coeff * operand + gates.append((operand, (qubit,))) + return gates + + +def extract_gates_and_qubits(hamiltonian): + """ + Extracts the gates and their corresponding qubits from a Qibo Hamiltonian. + + Parameters: + hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian): + A Qibo Hamiltonian object. + + Returns: + list of tuples: [(coefficient, [(gate, qubit), ...]), ...] + - coefficient: The prefactor of the term. + - list of (gate, qubit): Each term's gates and the qubits they act on. + """ + extracted_terms = [] + + if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian): + for term in hamiltonian.terms: + coeff = term.coefficient # Extract coefficient + gate_qubit_list = [] + + # Extract gate and qubit information + for factor in term.factors: + gate_name = str(factor)[ + 0 + ] # Extract the gate type (X, Y, Z) from 'X0', 'Z1' + qubit = int(str(factor)[1:]) # Extract the qubit index + gate_qubit_list.append((qubit, gate_name, coeff)) + coeff = 1.0 + + extracted_terms.append(gate_qubit_list) + + else: + raise ValueError( + "Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian." ) + + return extracted_terms + + +def initialize_mpi(): + """Initialize MPI communication and device selection.""" + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + device_id = rank % getDeviceCount() + cp.cuda.Device(device_id).use() + return comm, rank, size, device_id + + +def initialize_nccl(comm_mpi, rank, size): + """Initialize NCCL communication.""" + nccl_id = nccl.get_unique_id() if rank == 0 else None + nccl_id = comm_mpi.bcast(nccl_id, root=0) + return nccl.NcclCommunicator(size, nccl_id, rank) + + +def get_operands(qibo_circ, datatype, rank, comm): + """Perform circuit conversion and broadcast operands.""" + if rank == 0: + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + operands = myconvertor.state_vector_operands() + else: + operands = None + return comm.bcast(operands, root=0) + + +def compute_optimal_path(network, n_samples, size, comm): + """Compute contraction path and broadcast optimal selection.""" + path, info = network.contract_path( + optimize={ + "samples": n_samples, + "slicing": { + "min_slices": max(32, size), + "memory_model": cutn.MemoryModel.CUTENSOR, + }, + } ) + opt_cost, sender = comm.allreduce( + sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC + ) + return comm.bcast(info, sender) + + +def compute_slices(info, rank, size): + """Determine the slice range each process should compute.""" + num_slices = info.num_slices + chunk, extra = num_slices // size, num_slices % size + slice_begin = rank * chunk + min(rank, extra) + slice_end = ( + num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) + ) + return range(slice_begin, slice_end) + + +def reduce_result(result, comm, method="MPI", root=0): + """Reduce results across processes.""" + if method == "MPI": + return comm.reduce(sendobj=result, op=MPI.SUM, root=root) + + elif method == "NCCL": + stream_ptr = cp.cuda.get_current_stream().ptr + if result.dtype == cp.complex128: + count = result.size * 2 # complex128 has 2 float64 numbers + nccl_type = nccl.NCCL_FLOAT64 + elif result.dtype == cp.complex64: + count = result.size * 2 # complex64 has 2 float32 numbers + nccl_type = nccl.NCCL_FLOAT32 + else: + raise TypeError(f"Unsupported dtype for NCCL reduce: {result.dtype}") + + comm.reduce( + result.data.ptr, + result.data.ptr, + count, + nccl_type, + nccl.NCCL_SUM, + root, + stream_ptr, + ) + return result + else: + raise ValueError(f"Unknown reduce method: {method}") def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): @@ -61,60 +254,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): Returns: Dense vector of quantum circuit. """ - - from cuquantum import Network - from mpi4py import MPI - - root = 0 - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - device_id = rank % getDeviceCount() - - # Perform circuit conversion - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - - operands = myconvertor.state_vector_operands() - - # Assign the device for each process. - device_id = rank % getDeviceCount() - - # Create network object. + comm, rank, size, device_id = initialize_mpi() + operands = get_operands(qibo_circ, datatype, rank, comm) network = Network(*operands, options={"device_id": device_id}) - - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}} - ) - - # Select the best path from all ranks. - opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm.bcast(info, sender) - - # Set path and slices. + info = compute_optimal_path(network, n_samples, size, comm) path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) - - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) - - # Contract the group of slices the process is responsible for. + slices = compute_slices(info, rank, size) result = network.contract(slices=slices) - - # Sum the partial contribution from each process on root. - result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) - - return result, rank + return reduce_result(result, comm, method="MPI"), rank def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): @@ -136,74 +285,35 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): Returns: Dense vector of quantum circuit. """ - from cupy.cuda import nccl - from cuquantum import Network - from mpi4py import MPI - - root = 0 - comm_mpi = MPI.COMM_WORLD - rank = comm_mpi.Get_rank() - size = comm_mpi.Get_size() - - device_id = rank % getDeviceCount() - - cp.cuda.Device(device_id).use() - - # Set up the NCCL communicator. - nccl_id = nccl.get_unique_id() if rank == root else None - nccl_id = comm_mpi.bcast(nccl_id, root) - comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank) - - # Perform circuit conversion - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - operands = myconvertor.state_vector_operands() - + comm_mpi, rank, size, device_id = initialize_mpi() + comm_nccl = initialize_nccl(comm_mpi, rank, size) + operands = get_operands(qibo_circ, datatype, rank, comm_mpi) network = Network(*operands) - - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}} - ) - - # Select the best path from all ranks. - opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm_mpi.bcast(info, sender) - - # Set path and slices. + info = compute_optimal_path(network, n_samples, size, comm_mpi) path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) - - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) - - # Contract the group of slices the process is responsible for. + slices = compute_slices(info, rank, size) result = network.contract(slices=slices) - - # Sum the partial contribution from each process on root. - stream_ptr = cp.cuda.get_current_stream().ptr - comm_nccl.reduce( - result.data.ptr, - result.data.ptr, - result.size, - nccl.NCCL_FLOAT64, - nccl.NCCL_SUM, - root, - stream_ptr, - ) - - return result, rank + return reduce_result(result, comm_nccl, method="NCCL"), rank -def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8): +def dense_vector_tn(qibo_circ, datatype): + """Convert qibo circuit to tensornet (TN) format and perform contraction to + dense vector. + + Parameters: + qibo_circ: The quantum circuit object. + datatype (str): Either single ("complex64") or double (complex128) precision. + + Returns: + Dense vector of quantum circuit. + """ + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + return contract(*myconvertor.state_vector_operands()) + + +def expectation_tn_nccl(qibo_circ, datatype, observable, n_samples=8): """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through NCCL. @@ -226,76 +336,53 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl Returns: Expectation of quantum circuit due to pauli string. """ - from cupy.cuda import nccl - from cuquantum import Network - from mpi4py import MPI - root = 0 - comm_mpi = MPI.COMM_WORLD - rank = comm_mpi.Get_rank() - size = comm_mpi.Get_size() + comm_mpi, rank, size, device_id = initialize_mpi() - device_id = rank % getDeviceCount() + comm_nccl = initialize_nccl(comm_mpi, rank, size) - cp.cuda.Device(device_id).use() + observable = check_observable(observable, qibo_circ.nqubits) - # Set up the NCCL communicator. - nccl_id = nccl.get_unique_id() if rank == root else None - nccl_id = comm_mpi.bcast(nccl_id, root) - comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank) + ham_gate_map = extract_gates_and_qubits(observable) - # Perform circuit conversion - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - operands = myconvertor.expectation_operands( - pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) - ) + if rank == 0: + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - network = Network(*operands) + exp = 0 + for each_ham in ham_gate_map: + ham_gates = get_ham_gates(each_ham) + # Perform circuit conversion + if rank == 0: + operands = myconvertor.expectation_operands(ham_gates) + else: + operands = None - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}} - ) + operands = comm_mpi.bcast(operands, root=0) - # Select the best path from all ranks. - opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) + network = Network(*operands) - # Broadcast info from the sender to all other ranks. - info = comm_mpi.bcast(info, sender) + # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. + info = compute_optimal_path(network, n_samples, size, comm_mpi) - # Set path and slices. - path, info = network.contract_path( - optimize={"path": info.path, "slicing": info.slices} - ) + # Recompute path with the selected optimal settings + path, info = network.contract_path( + optimize={"path": info.path, "slicing": info.slices} + ) - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) + slices = compute_slices(info, rank, size) - # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) + # Contract the group of slices the process is responsible for. + result = network.contract(slices=slices) - # Sum the partial contribution from each process on root. - stream_ptr = cp.cuda.get_current_stream().ptr - comm_nccl.reduce( - result.data.ptr, - result.data.ptr, - result.size, - nccl.NCCL_FLOAT64, - nccl.NCCL_SUM, - root, - stream_ptr, - ) + # Sum the partial contribution from each process on root. + result = reduce_result(result, comm_nccl, method="NCCL", root=0) - return result, rank + exp += result + + return exp, rank -def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_samples=8): +def expectation_tn_MPI(qibo_circ, datatype, observable, n_samples=8): """Convert qibo circuit to tensornet (TN) format and perform contraction to expectation of given Pauli string using multi node and multi GPU through MPI. @@ -318,61 +405,76 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample Returns: Expectation of quantum circuit due to pauli string. """ - from cuquantum import Network - from mpi4py import MPI # this line initializes MPI + # Initialize MPI and device + comm, rank, size, device_id = initialize_mpi() - root = 0 - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() + observable = check_observable(observable, qibo_circ.nqubits) - device_id = rank % getDeviceCount() + ham_gate_map = extract_gates_and_qubits(observable) - # Perform circuit conversion + if rank == 0: + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + exp = 0 + for each_ham in ham_gate_map: + ham_gates = get_ham_gates(each_ham) + # Perform circuit conversion + # Perform circuit conversion + if rank == 0: + operands = myconvertor.expectation_operands(ham_gates) + else: + operands = None + + operands = comm.bcast(operands, root=0) + + # Create network object. + network = Network(*operands, options={"device_id": device_id}) + + # Compute optimal contraction path + info = compute_optimal_path(network, n_samples, size, comm) + + # Set path and slices. + path, info = network.contract_path( + optimize={"path": info.path, "slicing": info.slices} + ) + + # Compute slice range for each rank + slices = compute_slices(info, rank, size) + + # Perform contraction + result = network.contract(slices=slices) + + # Sum the partial contribution from each process on root. + result = reduce_result(result, comm, method="MPI", root=0) + + if rank == 0: + exp += result + + return exp, rank + + +def expectation_tn(qibo_circ, datatype, observable): + """Convert qibo circuit to tensornet (TN) format and perform contraction to + expectation of given Pauli string. + + Parameters: + qibo_circ: The quantum circuit object. + datatype (str): Either single ("complex64") or double (complex128) precision. + pauli_string_pattern(str): pauli string pattern. + + Returns: + Expectation of quantum circuit due to pauli string. + """ myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - operands = myconvertor.expectation_operands( - pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) - ) + observable = check_observable(observable, qibo_circ.nqubits) - # Assign the device for each process. - device_id = rank % getDeviceCount() - - # Create network object. - network = Network(*operands, options={"device_id": device_id}) - - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={"samples": n_samples, "slicing": {"min_slices": max(32, size)}} - ) - - # Select the best path from all ranks. - opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm.bcast(info, sender) - - # Set path and slices. - path, info = network.contract_path( - optimize={"path": info.path, "slicing": info.slices} - ) - - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) - - # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) - - # Sum the partial contribution from each process on root. - result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) - - return result, rank + ham_gate_map = extract_gates_and_qubits(observable) + exp = 0 + for each_ham in ham_gate_map: + ham_gates = get_ham_gates(each_ham) + expectation_operands = myconvertor.expectation_operands(ham_gates) + exp += contract(*expectation_operands) + return exp def dense_vector_mps(qibo_circ, gate_algo, datatype): @@ -393,27 +495,3 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype): return mps_helper.contract_state_vector( myconvertor.mps_tensors, {"handle": myconvertor.handle} ) - - -def pauli_string_gen(nqubits, pauli_string_pattern): - """Used internally to generate the string based on given pattern and number - of qubit. - - Parameters: - nqubits(int): Number of qubits of Quantum Circuit - pauli_string_pattern(str): Strings representing sequence of pauli gates. - - Returns: - String representation of the actual pauli string from the pattern. - - Example: pattern: "XZ", number of qubit: 7, output = XZXZXZX - """ - if nqubits <= 0: - return "Invalid input. N should be a positive integer." - - result = "" - - for i in range(nqubits): - char_to_add = pauli_string_pattern[i % len(pauli_string_pattern)] - result += char_to_add - return result diff --git a/tests/conftest.py b/tests/conftest.py index 0a18bfa..c5e9ed4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,16 +9,16 @@ import pytest # backends to be tested # TODO: add cutensornet and quimb here as well -BACKENDS = ["qmatchatea"] +BACKENDS = ["cutensornet"] +# BACKENDS = ["qmatchatea"] def get_backend(backend_name): + from qibotn.backends.cutensornet import CuTensorNet from qibotn.backends.qmatchatea import QMatchaTeaBackend - NAME2BACKEND = { - "qmatchatea": QMatchaTeaBackend, - } + NAME2BACKEND = {"qmatchatea": QMatchaTeaBackend, "cutensornet": CuTensorNet} return NAME2BACKEND[backend_name]() diff --git a/tests/test_cuquantum_cutensor_backend.py b/tests/test_cuquantum_cutensor_backend.py index c8f1e19..2bd4c26 100644 --- a/tests/test_cuquantum_cutensor_backend.py +++ b/tests/test_cuquantum_cutensor_backend.py @@ -1,11 +1,13 @@ -from timeit import default_timer as timer +import math -import config import cupy as cp -import numpy as np import pytest import qibo +from qibo import construct_backend, hamiltonians from qibo.models import QFT +from qibo.symbols import X, Z + +ABS_TOL = 1e-7 def qibo_qft(nqubits, swaps): @@ -14,37 +16,73 @@ def qibo_qft(nqubits, swaps): return circ_qibo, state_vec -def time(func): - start = timer() - res = func() - end = timer() - time = end - start - return time, res +def build_observable(nqubits): + """Helper function to construct a target observable.""" + hamiltonian_form = 0 + for i in range(nqubits): + hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits) + + hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form) + return hamiltonian, hamiltonian_form + + +def build_observable_dict(nqubits): + """Construct a target observable as a dictionary representation. + + Returns a dictionary suitable for `create_hamiltonian_from_dict`. + """ + terms = [] + + for i in range(nqubits): + term = { + "coefficient": 0.5, + "operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)], + } + terms.append(term) + + return {"terms": terms} @pytest.mark.gpu @pytest.mark.parametrize("nqubits", [1, 2, 5, 10]) def test_eval(nqubits: int, dtype="complex128"): - """Evaluate QASM with cuQuantum. - + """ Args: nqubits (int): Total number of qubits in the system. dtype (str): The data type for precision, 'complex64' for single, 'complex128' for double. """ - import qibotn.eval - # Test qibo - qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform) - qibo_time, (qibo_circ, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True)) + qibo.set_backend(backend="numpy") + qibo_circ, result_sv = qibo_qft(nqubits, swaps=True) + result_sv_cp = cp.asarray(result_sv) - # Test Cuquantum - cutn_time, result_tn = time( - lambda: qibotn.eval.dense_vector_tn(qibo_circ, dtype).flatten() + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + # Test with no settings specified. Default is dense vector calculation without MPI or NCCL. + result_tn = backend.execute_circuit(circuit=qibo_circ) + print( + f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}" ) + assert cp.allclose( + result_sv_cp, result_tn.statevector.flatten() + ), "Resulting dense vectors do not match" - assert 1e-2 * qibo_time < cutn_time < 1e2 * qibo_time - assert np.allclose(result_sv, result_tn), "Resulting dense vectors do not match" + # Test with explicit settings specified. + comp_set_w_bool = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": False, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + print( + f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}" + ) + assert cp.allclose( + result_sv_cp, result_tn.statevector.flatten() + ), "Resulting dense vectors do not match" @pytest.mark.gpu @@ -57,28 +95,105 @@ def test_mps(nqubits: int, dtype="complex128"): dtype (str): The data type for precision, 'complex64' for single, 'complex128' for double. """ - import qibotn.eval # Test qibo - qibo.set_backend(backend=config.qibo.backend, platform=config.qibo.platform) - - qibo_time, (circ_qibo, result_sv) = time(lambda: qibo_qft(nqubits, swaps=True)) - + qibo.set_backend(backend="numpy") + qibo_circ, result_sv = qibo_qft(nqubits, swaps=True) result_sv_cp = cp.asarray(result_sv) - # Test of MPS - gate_algo = { - "qr_method": False, - "svd_method": { - "partition": "UV", - "abs_cutoff": 1e-12, - }, + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + # Test with simple MPS settings specified using bool. Uses the default MPS parameters. + comp_set_w_bool = { + "MPI_enabled": False, + "MPS_enabled": True, + "NCCL_enabled": False, + "expectation_enabled": False, } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + print( + f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}" + ) + assert cp.allclose( + result_tn.statevector.flatten(), result_sv_cp + ), "Resulting dense vectors do not match" - cutn_time, result_tn = time( - lambda: qibotn.eval.dense_vector_mps(circ_qibo, gate_algo, dtype).flatten() + # Test with explicit MPS computation settings specified using Dict. Users able to specify parameters like qr_method etc. + comp_set_w_MPS_config_para = { + "MPI_enabled": False, + "MPS_enabled": { + "qr_method": False, + "svd_method": { + "partition": "UV", + "abs_cutoff": 1e-12, + }, + }, + "NCCL_enabled": False, + "expectation_enabled": False, + } + backend.configure_tn_simulation(comp_set_w_MPS_config_para) + result_tn = backend.execute_circuit(circuit=qibo_circ) + print( + f"State vector difference: {abs(result_tn.statevector.flatten() - result_sv_cp).max():0.3e}" + ) + assert cp.allclose( + result_tn.statevector.flatten(), result_sv_cp + ), "Resulting dense vectors do not match" + + +@pytest.mark.parametrize("nqubits", [2, 5, 10]) +def test_expectation(nqubits: int, dtype="complex128"): + + # Test qibo + qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True) + ham, ham_form = build_observable(nqubits) + numpy_backend = construct_backend("numpy") + exact_expval = numpy_backend.calculate_expectation_state( + hamiltonian=ham, + state=state_vec_qibo, + normalize=False, ) - print(f"State vector difference: {abs(result_tn - result_sv_cp).max():0.3e}") + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") - assert cp.allclose(result_tn, result_sv_cp) + # Test with simple settings using bool. Uses default Hamilitonian for expectation calculation. + comp_set_w_bool = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": True, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + assert math.isclose( + exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL + ) + + # Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object. + comp_set_w_hamiltonian_obj = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": ham, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_obj) + result_tn = backend.execute_circuit(circuit=qibo_circ) + assert math.isclose( + exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL + ) + + # Test with user defined hamiltonian using Dictionary object form of hamiltonian. + ham_dict = build_observable_dict(nqubits) + comp_set_w_hamiltonian_dict = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": ham_dict, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_dict) + result_tn = backend.execute_circuit(circuit=qibo_circ) + assert math.isclose( + exact_expval.item(), result_tn.real.get().item(), abs_tol=ABS_TOL + ) diff --git a/tests/test_cuquantum_cutensor_mpi_backend.py b/tests/test_cuquantum_cutensor_mpi_backend.py new file mode 100644 index 0000000..0645800 --- /dev/null +++ b/tests/test_cuquantum_cutensor_mpi_backend.py @@ -0,0 +1,315 @@ +# mpirun --allow-run-as-root -np 2 python -m pytest --with-mpi test_cuquantum_cutensor_mpi_backend.py + +import math + +import cupy as cp +import numpy as np +import pytest +import qibo +from qibo import construct_backend, hamiltonians +from qibo.models import QFT +from qibo.symbols import X, Z + +ABS_TOL = 1e-7 + + +def qibo_qft(nqubits, swaps): + circ_qibo = QFT(nqubits, swaps) + state_vec = circ_qibo().state(numpy=True) + return circ_qibo, state_vec + + +def build_observable(nqubits): + """Helper function to construct a target observable.""" + hamiltonian_form = 0 + for i in range(nqubits): + hamiltonian_form += 0.5 * X(i % nqubits) * Z((i + 1) % nqubits) + + hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form) + return hamiltonian, hamiltonian_form + + +def build_observable_dict(nqubits): + """Construct a target observable as a dictionary representation. + + Returns a dictionary suitable for `create_hamiltonian_from_dict`. + """ + terms = [] + + for i in range(nqubits): + term = { + "coefficient": 0.5, + "operators": [("X", i % nqubits), ("Z", (i + 1) % nqubits)], + } + terms.append(term) + + return {"terms": terms} + + +@pytest.mark.gpu +@pytest.mark.mpi +@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10]) +def test_eval_mpi(nqubits: int, dtype="complex128"): + """ + Args: + nqubits (int): Total number of qubits in the system. + dtype (str): The data type for precision, 'complex64' for single, + 'complex128' for double. + """ + # Test qibo + qibo.set_backend(backend="numpy") + qibo_circ, result_sv = qibo_qft(nqubits, swaps=True) + result_sv_cp = cp.asarray(result_sv) + + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + + # Test with explicit settings specified. + comp_set_w_bool = { + "MPI_enabled": True, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": False, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + result_tn_cp = cp.asarray(result_tn.statevector.flatten()) + + print(f"State vector difference: {abs(result_tn_cp - result_sv_cp).max():0.3e}") + + if backend.rank == 0: + + assert cp.allclose( + result_sv_cp, result_tn_cp + ), "Resulting dense vectors do not match" + else: + assert ( + isinstance(result_tn_cp, cp.ndarray) + and result_tn_cp.size == 1 + and result_tn_cp.item() == 0 + ), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}" + + +@pytest.mark.gpu +@pytest.mark.mpi +@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10]) +def test_expectation_mpi(nqubits: int, dtype="complex128"): + + # Test qibo + qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True) + ham, ham_form = build_observable(nqubits) + numpy_backend = construct_backend("numpy") + exact_expval = numpy_backend.calculate_expectation_state( + hamiltonian=ham, + state=state_vec_qibo, + normalize=False, + ) + + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + + # Test with simple settings using bool. Uses default Hamilitonian for expectation calculation. + comp_set_w_bool = { + "MPI_enabled": True, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": True, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}" + + # Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object. + comp_set_w_hamiltonian_obj = { + "MPI_enabled": True, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": ham, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_obj) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}" + + # Test with user defined hamiltonian using Dictionary object form of hamiltonian. + ham_dict = build_observable_dict(nqubits) + comp_set_w_hamiltonian_dict = { + "MPI_enabled": True, + "MPS_enabled": False, + "NCCL_enabled": False, + "expectation_enabled": ham_dict, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_dict) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}" + + +@pytest.mark.gpu +@pytest.mark.mpi +@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10]) +def test_eval_nccl(nqubits: int, dtype="complex128"): + """ + Args: + nqubits (int): Total number of qubits in the system. + dtype (str): The data type for precision, 'complex64' for single, + 'complex128' for double. + """ + # Test qibo + qibo.set_backend(backend="numpy") + qibo_circ, result_sv = qibo_qft(nqubits, swaps=True) + result_sv_cp = cp.asarray(result_sv) + + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + + # Test with explicit settings specified. + comp_set_w_bool = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": True, + "expectation_enabled": False, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + result_tn_cp = cp.asarray(result_tn.statevector.flatten()) + + if backend.rank == 0: + assert cp.allclose( + result_sv_cp, result_tn_cp + ), "Resulting dense vectors do not match" + else: + assert ( + isinstance(result_tn_cp, cp.ndarray) + and result_tn_cp.size == 1 + and result_tn_cp.item() == 0 + ), f"Rank {backend.rank}: result_tn_cp should be scalar/array with 0, got {result_tn_cp}" + + +@pytest.mark.gpu +@pytest.mark.mpi +@pytest.mark.parametrize("nqubits", [1, 2, 5, 7, 10]) +def test_expectation_NCCL(nqubits: int, dtype="complex128"): + + # Test qibo + qibo_circ, state_vec_qibo = qibo_qft(nqubits, swaps=True) + ham, ham_form = build_observable(nqubits) + numpy_backend = construct_backend("numpy") + exact_expval = numpy_backend.calculate_expectation_state( + hamiltonian=ham, + state=state_vec_qibo, + normalize=False, + ) + + # Test cutensornet + backend = construct_backend(backend="qibotn", platform="cutensornet") + + # Test with simple settings using bool. Uses default Hamilitonian for expectation calculation. + comp_set_w_bool = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": True, + "expectation_enabled": True, + } + backend.configure_tn_simulation(comp_set_w_bool) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}" + + # Test with user defined hamiltonian using "hamiltonians.SymbolicHamiltonian" object. + comp_set_w_hamiltonian_obj = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": True, + "expectation_enabled": ham, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_obj) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}" + + # Test with user defined hamiltonian using Dictionary object form of hamiltonian. + ham_dict = build_observable_dict(nqubits) + comp_set_w_hamiltonian_dict = { + "MPI_enabled": False, + "MPS_enabled": False, + "NCCL_enabled": True, + "expectation_enabled": ham_dict, + } + backend.configure_tn_simulation(comp_set_w_hamiltonian_dict) + result_tn = backend.execute_circuit(circuit=qibo_circ) + if backend.rank == 0: + # Compare numerical values + assert math.isclose( + exact_expval.item(), float(result_tn[0]), abs_tol=ABS_TOL + ), f"Rank {backend.rank}: mismatch, expected {exact_expval}, got {result_tn}" + + else: + # Rank > 0: must be hardcoded [0] (int) + assert ( + isinstance(result_tn, (np.ndarray, cp.ndarray)) + and result_tn.size == 1 + and np.issubdtype(result_tn.dtype, np.integer) + and result_tn.item() == 0 + ), f"Rank {backend.rank}: expected int array [0], got {result_tn}"