Refactor to reduce repeating codes

2025-02-18 11:34:55 +08:00
parent 13d4c9c04f
commit ac396a35db
1 changed files with 139 additions and 251 deletions
--- a/src/qibotn/eval.py
+++ b/src/qibotn/eval.py
@@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum
 from qibotn.circuit_to_mps import QiboCircuitToMPS
 from qibotn.mps_contraction_helper import MPSContractionHelper
-
+import cuquantum.cutensornet as cutn
-def dense_vector_tn(qibo_circ, datatype):
+from cuquantum import Network
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+from mpi4py import MPI
-    dense vector.
+from cupy.cuda import nccl
    Parameters:
        qibo_circ: The quantum circuit object.
        datatype (str): Either single ("complex64") or double (complex128) precision.
    Returns:
        Dense vector of quantum circuit.
    """
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    return contract(*myconvertor.state_vector_operands())
-def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
+def initialize_mpi():
-    """Convert qibo circuit to tensornet (TN) format and perform contraction to
+    """Initialize MPI communication and device selection."""
-    expectation of given Pauli string.
+    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    device_id = rank % getDeviceCount()
    cp.cuda.Device(device_id).use()
    return comm, rank, size, device_id
    Parameters:
        qibo_circ: The quantum circuit object.
        datatype (str): Either single ("complex64") or double (complex128) precision.
        pauli_string_pattern(str): pauli string pattern.
-    Returns:
+def initialize_nccl(comm_mpi, rank, size):
-        Expectation of quantum circuit due to pauli string.
+    """Initialize NCCL communication."""
-    """
+    nccl_id = nccl.get_unique_id() if rank == 0 else None
-    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
+    nccl_id = comm_mpi.bcast(nccl_id, root=0)
-    return contract(
+    return nccl.NcclCommunicator(size, nccl_id, rank)
-        *myconvertor.expectation_operands(
+
-            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
+
-        )
+def get_operands(qibo_circ, datatype, rank, comm):
    """Perform circuit conversion and broadcast operands."""
    if rank == 0:
        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
        operands = myconvertor.state_vector_operands()
    else:
        operands = None
    return comm.bcast(operands, root=0)
 def compute_optimal_path(network, n_samples, size, comm):
    """Compute contraction path and broadcast optimal selection."""
    path, info = network.contract_path(
        optimize={
            "samples": n_samples,
            "slicing": {
                "min_slices": max(32, size),
                "memory_model": cutn.MemoryModel.CUTENSOR,
            },
        }
    )
    opt_cost, sender = comm.allreduce(
        sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
    )
    return comm.bcast(info, sender)
 def compute_contraction(network, slices):
    """Perform tensor contraction."""
    return network.contract(slices=slices)
 def compute_slices(info, rank, size):
    """Determine the slice range each process should compute."""
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
    slice_end = (
        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
    )
    return range(slice_begin, slice_end)
 def reduce_result(result, comm, method="MPI", root=0):
    """Reduce results across processes."""
    if method == "MPI":
        return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
    elif method == "NCCL":
        stream_ptr = cp.cuda.get_current_stream().ptr
        comm.reduce(
            result.data.ptr,
            result.data.ptr,
            result.size,
            nccl.NCCL_FLOAT64,
            nccl.NCCL_SUM,
            root,
            stream_ptr,
        )
        return result
 def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
    Returns:
        Dense vector of quantum circuit.
    """
-
+    comm, rank, size, device_id = initialize_mpi()
-    import cuquantum.cutensornet as cutn
+    operands = get_operands(qibo_circ, datatype, rank, comm)
    from cuquantum import Network
    from mpi4py import MPI
    root = 0
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    device_id = rank % getDeviceCount()
    cp.cuda.Device(device_id).use()
    # Perform circuit conversion
    if rank == 0:
        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
        operands = myconvertor.state_vector_operands()
    else:
        operands = None
    operands = comm.bcast(operands, root)
    # Create network object.
    network = Network(*operands, options={"device_id": device_id})
-
+    info = compute_optimal_path(network, n_samples, size, comm)
    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
    path, info = network.contract_path(
        optimize={
            "samples": n_samples,
            "slicing": {
                "min_slices": max(32, size),
                "memory_model": cutn.MemoryModel.CUTENSOR,
            },
        }
    )
    # Select the best path from all ranks.
    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
    # Broadcast info from the sender to all other ranks.
    info = comm.bcast(info, sender)
    # Set path and slices.
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
-
+    slices = compute_slices(info, rank, size)
-    # Calculate this process's share of the slices.
+    result = compute_contraction(network, slices)
-    num_slices = info.num_slices
+    return reduce_result(result, comm, method="MPI"), rank
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
    slice_end = (
        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
    )
    slices = range(slice_begin, slice_end)
    # Contract the group of slices the process is responsible for.
    result = network.contract(slices=slices)
    # Sum the partial contribution from each process on root.
    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
    return result, rank
 def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
    Returns:
        Dense vector of quantum circuit.
    """
-    import cuquantum.cutensornet as cutn
+    comm_mpi, rank, size, device_id = initialize_mpi()
-    from cupy.cuda import nccl
+    comm_nccl = initialize_nccl(comm_mpi, rank, size)
-    from cuquantum import Network
+    operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
    from mpi4py import MPI
    root = 0
    comm_mpi = MPI.COMM_WORLD
    rank = comm_mpi.Get_rank()
    size = comm_mpi.Get_size()
    device_id = rank % getDeviceCount()
    cp.cuda.Device(device_id).use()
    # Set up the NCCL communicator.
    nccl_id = nccl.get_unique_id() if rank == root else None
    nccl_id = comm_mpi.bcast(nccl_id, root)
    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
    # Perform circuit conversion
    if rank == 0:
        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
        operands = myconvertor.state_vector_operands()
    else:
        operands = None
    operands = comm_mpi.bcast(operands, root)
    network = Network(*operands)
-
+    info = compute_optimal_path(network, n_samples, size, comm_mpi)
    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
    path, info = network.contract_path(
        optimize={
            "samples": n_samples,
            "slicing": {
                "min_slices": max(32, size),
                "memory_model": cutn.MemoryModel.CUTENSOR,
            },
        }
    )
    # Select the best path from all ranks.
    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
    # Broadcast info from the sender to all other ranks.
    info = comm_mpi.bcast(info, sender)
    # Set path and slices.
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
    slices = compute_slices(info, rank, size)
    result = compute_contraction(network, slices)
    return reduce_result(result, comm_nccl, method="NCCL"), rank
    # Calculate this process's share of the slices.
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
    slice_end = (
        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
    )
    slices = range(slice_begin, slice_end)
-    # Contract the group of slices the process is responsible for.
+def dense_vector_tn(qibo_circ, datatype):
-    result = network.contract(slices=slices)
+    """Convert qibo circuit to tensornet (TN) format and perform contraction to
    dense vector.
-    # Sum the partial contribution from each process on root.
+    Parameters:
-    stream_ptr = cp.cuda.get_current_stream().ptr
+        qibo_circ: The quantum circuit object.
-    comm_nccl.reduce(
+        datatype (str): Either single ("complex64") or double (complex128) precision.
        result.data.ptr,
        result.data.ptr,
        result.size,
        nccl.NCCL_FLOAT64,
        nccl.NCCL_SUM,
        root,
        stream_ptr,
    )
-    return result, rank
+    Returns:
        Dense vector of quantum circuit.
    """
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    return contract(*myconvertor.state_vector_operands())
 def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
@@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
    Returns:
        Expectation of quantum circuit due to pauli string.
    """
    import cuquantum.cutensornet as cutn
    from cupy.cuda import nccl
    from cuquantum import Network
    from mpi4py import MPI
-    root = 0
+    comm_mpi, rank, size, device_id = initialize_mpi()
    comm_mpi = MPI.COMM_WORLD
    rank = comm_mpi.Get_rank()
    size = comm_mpi.Get_size()
-    device_id = rank % getDeviceCount()
+    comm_nccl = initialize_nccl(comm_mpi, rank, size)
    cp.cuda.Device(device_id).use()
    # Set up the NCCL communicator.
    nccl_id = nccl.get_unique_id() if rank == root else None
    nccl_id = comm_mpi.bcast(nccl_id, root)
    comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
    # Perform circuit conversion
    if rank == 0:
        myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
        operands = myconvertor.expectation_operands(
            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
@@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
    else:
        operands = None
-    operands = comm_mpi.bcast(operands, root)
+    operands = comm_mpi.bcast(operands, root=0)
    network = Network(*operands)
    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
-    path, info = network.contract_path(
+    info = compute_optimal_path(network, n_samples, size, comm_mpi)
        optimize={
            "samples": n_samples,
            "slicing": {
                "min_slices": max(32, size),
                "memory_model": cutn.MemoryModel.CUTENSOR,
            },
        }
    )
-    # Select the best path from all ranks.
+    # Recompute path with the selected optimal settings
    opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
    # Broadcast info from the sender to all other ranks.
    info = comm_mpi.bcast(info, sender)
    # Set path and slices.
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
-    # Calculate this process's share of the slices.
+    slices = compute_slices(info, rank, size)
    num_slices = info.num_slices
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
    slice_end = (
        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
    )
    slices = range(slice_begin, slice_end)
    # Contract the group of slices the process is responsible for.
-    result = network.contract(slices=slices)
+    result = compute_contraction(network, slices)
    # Sum the partial contribution from each process on root.
-    stream_ptr = cp.cuda.get_current_stream().ptr
+    result = reduce_result(result, comm_nccl, method="NCCL", root=0)
    comm_nccl.reduce(
        result.data.ptr,
        result.data.ptr,
        result.size,
        nccl.NCCL_FLOAT64,
        nccl.NCCL_SUM,
        root,
        stream_ptr,
    )
    return result, rank
@@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
    Returns:
        Expectation of quantum circuit due to pauli string.
    """
-    import cuquantum.cutensornet as cutn
+    # Initialize MPI and device
-    from cuquantum import Network
+    comm, rank, size, device_id = initialize_mpi()
    from mpi4py import MPI  # this line initializes MPI
    root = 0
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    # Assign the device for each process.
    device_id = rank % getDeviceCount()
    cp.cuda.Device(device_id).use()
    # Perform circuit conversion
    if rank == 0:
@@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
    else:
        operands = None
-    operands = comm.bcast(operands, root)
+    operands = comm.bcast(operands, root=0)
    # Create network object.
    network = Network(*operands, options={"device_id": device_id})
-    # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
+    # Compute optimal contraction path
-    path, info = network.contract_path(
+    info = compute_optimal_path(network, n_samples, size, comm)
        optimize={
            "samples": n_samples,
            "slicing": {
                "min_slices": max(32, size),
                "memory_model": cutn.MemoryModel.CUTENSOR,
            },
        }
    )
    # Select the best path from all ranks.
    opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
    # Broadcast info from the sender to all other ranks.
    info = comm.bcast(info, sender)
    # Set path and slices.
    path, info = network.contract_path(
        optimize={"path": info.path, "slicing": info.slices}
    )
-    # Calculate this process's share of the slices.
+    # Compute slice range for each rank
-    num_slices = info.num_slices
+    slices = compute_slices(info, rank, size)
    chunk, extra = num_slices // size, num_slices % size
    slice_begin = rank * chunk + min(rank, extra)
    slice_end = (
        num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
    )
    slices = range(slice_begin, slice_end)
-    # Contract the group of slices the process is responsible for.
+    # Perform contraction
-    result = network.contract(slices=slices)
+    result = compute_contraction(network, slices)
    # Sum the partial contribution from each process on root.
-    result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
+    result = reduce_result(result, comm, method="MPI", root=0)
    return result, rank
 def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
    """Convert qibo circuit to tensornet (TN) format and perform contraction to
    expectation of given Pauli string.
    Parameters:
        qibo_circ: The quantum circuit object.
        datatype (str): Either single ("complex64") or double (complex128) precision.
        pauli_string_pattern(str): pauli string pattern.
    Returns:
        Expectation of quantum circuit due to pauli string.
    """
    myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
    return contract(
        *myconvertor.expectation_operands(
            pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
        )
    )
 def dense_vector_mps(qibo_circ, gate_algo, datatype):
    """Convert qibo circuit to matrix product state (MPS) format and perform
    contraction to dense vector.