Refactor to reduce repeating codes

2025-02-18 11:34:55 +08:00
parent 13d4c9c04f
commit ac396a35db
1 changed files with 139 additions and 251 deletions
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum
 from qibotn.circuit_to_mps import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper

-
-def dense_vector_tn(qibo_circ, datatype):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to
-    dense vector.
-
-    Parameters:
-        qibo_circ: The quantum circuit object.
-        datatype (str): Either single ("complex64") or double (complex128) precision.
-
-    Returns:
-        Dense vector of quantum circuit.
-    """
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    return contract(*myconvertor.state_vector_operands())
+import cuquantum.cutensornet as cutn
+from cuquantum import Network
+from mpi4py import MPI
+from cupy.cuda import nccl


-def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to
-    expectation of given Pauli string.
+def initialize_mpi():
+    """Initialize MPI communication and device selection."""
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+    device_id = rank % getDeviceCount()
+    cp.cuda.Device(device_id).use()
+    return comm, rank, size, device_id

-    Parameters:
-        qibo_circ: The quantum circuit object.
-        datatype (str): Either single ("complex64") or double (complex128) precision.
-        pauli_string_pattern(str): pauli string pattern.

-    Returns:
-        Expectation of quantum circuit due to pauli string.
-    """
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-    return contract(
-        *myconvertor.expectation_operands(
-            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
-        )
+def initialize_nccl(comm_mpi, rank, size):
+    """Initialize NCCL communication."""
+    nccl_id = nccl.get_unique_id() if rank == 0 else None
+    nccl_id = comm_mpi.bcast(nccl_id, root=0)
+    return nccl.NcclCommunicator(size, nccl_id, rank)
+
+
+def get_operands(qibo_circ, datatype, rank, comm):
+    """Perform circuit conversion and broadcast operands."""
+    if rank == 0:
+        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+        operands = myconvertor.state_vector_operands()
+    else:
+        operands = None
+    return comm.bcast(operands, root=0)
+
+
+def compute_optimal_path(network, n_samples, size, comm):
+    """Compute contraction path and broadcast optimal selection."""
+    path, info = network.contract_path(
+        optimize={
+            "samples": n_samples,
+            "slicing": {
+                "min_slices": max(32, size),
+                "memory_model": cutn.MemoryModel.CUTENSOR,
+            },
+        }
    )
+    opt_cost, sender = comm.allreduce(
+        sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
+    )
+    return comm.bcast(info, sender)
+
+
+def compute_contraction(network, slices):
+    """Perform tensor contraction."""
+    return network.contract(slices=slices)
+
+
+def compute_slices(info, rank, size):
+    """Determine the slice range each process should compute."""
+    num_slices = info.num_slices
+    chunk, extra = num_slices // size, num_slices % size
+    slice_begin = rank * chunk + min(rank, extra)
+    slice_end = (
+        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
+    )
+    return range(slice_begin, slice_end)
+
+
+def reduce_result(result, comm, method="MPI", root=0):
+    """Reduce results across processes."""
+    if method == "MPI":
+        return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    elif method == "NCCL":
+        stream_ptr = cp.cuda.get_current_stream().ptr
+        comm.reduce(
+            result.data.ptr,
+            result.data.ptr,
+            result.size,
+            nccl.NCCL_FLOAT64,
+            nccl.NCCL_SUM,
+            root,
+            stream_ptr,
+        )
+        return result


 def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
    Returns:
        Dense vector of quantum circuit.
    """
-
-    import cuquantum.cutensornet as cutn
-    from cuquantum import Network
-    from mpi4py import MPI
-
-    root = 0
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-
-    device_id = rank % getDeviceCount()
-    cp.cuda.Device(device_id).use()
-
-    # Perform circuit conversion
-    if rank == 0:
-        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-
-        operands = myconvertor.state_vector_operands()
-    else:
-        operands = None
-
-    operands = comm.bcast(operands, root)
-
-    # Create network object.
+    comm, rank, size, device_id = initialize_mpi()
+    operands = get_operands(qibo_circ, datatype, rank, comm)
    network = Network(*operands, options={"device_id": device_id})
-
-    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "slicing": {
-                "min_slices": max(32, size),
-                "memory_model": cutn.MemoryModel.CUTENSOR,
-            },
-        }
-    )
-
-    # Select the best path from all ranks.
-    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
-
-    # Broadcast info from the sender to all other ranks.
-    info = comm.bcast(info, sender)
-
-    # Set path and slices.
+    info = compute_optimal_path(network, n_samples, size, comm)
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
-
-    # Calculate this process's share of the slices.
-    num_slices = info.num_slices
-    chunk, extra = num_slices // size, num_slices % size
-    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = (
-        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
-    )
-    slices = range(slice_begin, slice_end)
-
-    # Contract the group of slices the process is responsible for.
-    result = network.contract(slices=slices)
-
-    # Sum the partial contribution from each process on root.
-    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
-
-    return result, rank
+    slices = compute_slices(info, rank, size)
+    result = compute_contraction(network, slices)
+    return reduce_result(result, comm, method="MPI"), rank


 def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
    Returns:
        Dense vector of quantum circuit.
    """
-    import cuquantum.cutensornet as cutn
-    from cupy.cuda import nccl
-    from cuquantum import Network
-    from mpi4py import MPI
-
-    root = 0
-    comm_mpi = MPI.COMM_WORLD
-    rank = comm_mpi.Get_rank()
-    size = comm_mpi.Get_size()
-
-    device_id = rank % getDeviceCount()
-
-    cp.cuda.Device(device_id).use()
-
-    # Set up the NCCL communicator.
-    nccl_id = nccl.get_unique_id() if rank == root else None
-    nccl_id = comm_mpi.bcast(nccl_id, root)
-    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
-
-    # Perform circuit conversion
-    if rank == 0:
-        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
-        operands = myconvertor.state_vector_operands()
-    else:
-        operands = None
-
-    operands = comm_mpi.bcast(operands, root)
-
+    comm_mpi, rank, size, device_id = initialize_mpi()
+    comm_nccl = initialize_nccl(comm_mpi, rank, size)
+    operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
    network = Network(*operands)
-
-    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "slicing": {
-                "min_slices": max(32, size),
-                "memory_model": cutn.MemoryModel.CUTENSOR,
-            },
-        }
-    )
-
-    # Select the best path from all ranks.
-    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
-
-    # Broadcast info from the sender to all other ranks.
-    info = comm_mpi.bcast(info, sender)
-
-    # Set path and slices.
+    info = compute_optimal_path(network, n_samples, size, comm_mpi)
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
+    slices = compute_slices(info, rank, size)
+    result = compute_contraction(network, slices)
+    return reduce_result(result, comm_nccl, method="NCCL"), rank

-    # Calculate this process's share of the slices.
-    num_slices = info.num_slices
-    chunk, extra = num_slices // size, num_slices % size
-    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = (
-        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
-    )
-    slices = range(slice_begin, slice_end)

-    # Contract the group of slices the process is responsible for.
-    result = network.contract(slices=slices)
+def dense_vector_tn(qibo_circ, datatype):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    dense vector.

-    # Sum the partial contribution from each process on root.
-    stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(
-        result.data.ptr,
-        result.data.ptr,
-        result.size,
-        nccl.NCCL_FLOAT64,
-        nccl.NCCL_SUM,
-        root,
-        stream_ptr,
-    )
+    Parameters:
+        qibo_circ: The quantum circuit object.
+        datatype (str): Either single ("complex64") or double (complex128) precision.

-    return result, rank
+    Returns:
+        Dense vector of quantum circuit.
+    """
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    return contract(*myconvertor.state_vector_operands())


 def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
@@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
    Returns:
        Expectation of quantum circuit due to pauli string.
    """
-    import cuquantum.cutensornet as cutn
-    from cupy.cuda import nccl
-    from cuquantum import Network
-    from mpi4py import MPI

-    root = 0
-    comm_mpi = MPI.COMM_WORLD
-    rank = comm_mpi.Get_rank()
-    size = comm_mpi.Get_size()
+    comm_mpi, rank, size, device_id = initialize_mpi()

-    device_id = rank % getDeviceCount()
-
-    cp.cuda.Device(device_id).use()
-
-    # Set up the NCCL communicator.
-    nccl_id = nccl.get_unique_id() if rank == root else None
-    nccl_id = comm_mpi.bcast(nccl_id, root)
-    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
+    comm_nccl = initialize_nccl(comm_mpi, rank, size)

    # Perform circuit conversion
    if rank == 0:
-
        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
        operands = myconvertor.expectation_operands(
            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
@@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
    else:
        operands = None

-    operands = comm_mpi.bcast(operands, root)
+    operands = comm_mpi.bcast(operands, root=0)

    network = Network(*operands)

    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "slicing": {
-                "min_slices": max(32, size),
-                "memory_model": cutn.MemoryModel.CUTENSOR,
-            },
-        }
-    )
+    info = compute_optimal_path(network, n_samples, size, comm_mpi)

-    # Select the best path from all ranks.
-    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
-
-    # Broadcast info from the sender to all other ranks.
-    info = comm_mpi.bcast(info, sender)
-
-    # Set path and slices.
+    # Recompute path with the selected optimal settings
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )

-    # Calculate this process's share of the slices.
-    num_slices = info.num_slices
-    chunk, extra = num_slices // size, num_slices % size
-    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = (
-        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
-    )
-    slices = range(slice_begin, slice_end)
+    slices = compute_slices(info, rank, size)

    # Contract the group of slices the process is responsible for.
-    result = network.contract(slices=slices)
+    result = compute_contraction(network, slices)

    # Sum the partial contribution from each process on root.
-    stream_ptr = cp.cuda.get_current_stream().ptr
-    comm_nccl.reduce(
-        result.data.ptr,
-        result.data.ptr,
-        result.size,
-        nccl.NCCL_FLOAT64,
-        nccl.NCCL_SUM,
-        root,
-        stream_ptr,
-    )
+    result = reduce_result(result, comm_nccl, method="NCCL", root=0)

    return result, rank

@@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
    Returns:
        Expectation of quantum circuit due to pauli string.
    """
-    import cuquantum.cutensornet as cutn
-    from cuquantum import Network
-    from mpi4py import MPI  # this line initializes MPI
-
-    root = 0
-    comm = MPI.COMM_WORLD
-    rank = comm.Get_rank()
-    size = comm.Get_size()
-
-    # Assign the device for each process.
-    device_id = rank % getDeviceCount()
-    cp.cuda.Device(device_id).use()
+    # Initialize MPI and device
+    comm, rank, size, device_id = initialize_mpi()

    # Perform circuit conversion
    if rank == 0:
@@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
    else:
        operands = None

-    operands = comm.bcast(operands, root)
+    operands = comm.bcast(operands, root=0)

    # Create network object.
    network = Network(*operands, options={"device_id": device_id})

-    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(
-        optimize={
-            "samples": n_samples,
-            "slicing": {
-                "min_slices": max(32, size),
-                "memory_model": cutn.MemoryModel.CUTENSOR,
-            },
-        }
-    )
-
-    # Select the best path from all ranks.
-    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
-
-    # Broadcast info from the sender to all other ranks.
-    info = comm.bcast(info, sender)
+    # Compute optimal contraction path
+    info = compute_optimal_path(network, n_samples, size, comm)

    # Set path and slices.
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )

-    # Calculate this process's share of the slices.
-    num_slices = info.num_slices
-    chunk, extra = num_slices // size, num_slices % size
-    slice_begin = rank * chunk + min(rank, extra)
-    slice_end = (
-        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
-    )
-    slices = range(slice_begin, slice_end)
+    # Compute slice range for each rank
+    slices = compute_slices(info, rank, size)

-    # Contract the group of slices the process is responsible for.
-    result = network.contract(slices=slices)
+    # Perform contraction
+    result = compute_contraction(network, slices)

    # Sum the partial contribution from each process on root.
-    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    result = reduce_result(result, comm, method="MPI", root=0)

    return result, rank


+def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    expectation of given Pauli string.
+
+    Parameters:
+        qibo_circ: The quantum circuit object.
+        datatype (str): Either single ("complex64") or double (complex128) precision.
+        pauli_string_pattern(str): pauli string pattern.
+
+    Returns:
+        Expectation of quantum circuit due to pauli string.
+    """
+    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    return contract(
+        *myconvertor.expectation_operands(
+            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+        )
+    )
+
+
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
    """Convert qibo circuit to matrix product state (MPS) format and perform
    contraction to dense vector.