From ac396a35dbf2f1b31a0f8eef64ebf6a07e4e5dcd Mon Sep 17 00:00:00 2001 From: tankya2 Date: Tue, 18 Feb 2025 11:34:55 +0800 Subject: [PATCH] Refactor to reduce repeating codes --- src/qibotn/eval.py | 390 ++++++++++++++++----------------------------- 1 file changed, 139 insertions(+), 251 deletions(-) diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py index ebef103..618bdf7 100644 --- a/src/qibotn/eval.py +++ b/src/qibotn/eval.py @@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum from qibotn.circuit_to_mps import QiboCircuitToMPS from qibotn.mps_contraction_helper import MPSContractionHelper - -def dense_vector_tn(qibo_circ, datatype): - """Convert qibo circuit to tensornet (TN) format and perform contraction to - dense vector. - - Parameters: - qibo_circ: The quantum circuit object. - datatype (str): Either single ("complex64") or double (complex128) precision. - - Returns: - Dense vector of quantum circuit. - """ - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - return contract(*myconvertor.state_vector_operands()) +import cuquantum.cutensornet as cutn +from cuquantum import Network +from mpi4py import MPI +from cupy.cuda import nccl -def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern): - """Convert qibo circuit to tensornet (TN) format and perform contraction to - expectation of given Pauli string. +def initialize_mpi(): + """Initialize MPI communication and device selection.""" + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + device_id = rank % getDeviceCount() + cp.cuda.Device(device_id).use() + return comm, rank, size, device_id - Parameters: - qibo_circ: The quantum circuit object. - datatype (str): Either single ("complex64") or double (complex128) precision. - pauli_string_pattern(str): pauli string pattern. - Returns: - Expectation of quantum circuit due to pauli string. - """ - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - return contract( - *myconvertor.expectation_operands( - pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) - ) +def initialize_nccl(comm_mpi, rank, size): + """Initialize NCCL communication.""" + nccl_id = nccl.get_unique_id() if rank == 0 else None + nccl_id = comm_mpi.bcast(nccl_id, root=0) + return nccl.NcclCommunicator(size, nccl_id, rank) + + +def get_operands(qibo_circ, datatype, rank, comm): + """Perform circuit conversion and broadcast operands.""" + if rank == 0: + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + operands = myconvertor.state_vector_operands() + else: + operands = None + return comm.bcast(operands, root=0) + + +def compute_optimal_path(network, n_samples, size, comm): + """Compute contraction path and broadcast optimal selection.""" + path, info = network.contract_path( + optimize={ + "samples": n_samples, + "slicing": { + "min_slices": max(32, size), + "memory_model": cutn.MemoryModel.CUTENSOR, + }, + } ) + opt_cost, sender = comm.allreduce( + sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC + ) + return comm.bcast(info, sender) + + +def compute_contraction(network, slices): + """Perform tensor contraction.""" + return network.contract(slices=slices) + + +def compute_slices(info, rank, size): + """Determine the slice range each process should compute.""" + num_slices = info.num_slices + chunk, extra = num_slices // size, num_slices % size + slice_begin = rank * chunk + min(rank, extra) + slice_end = ( + num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) + ) + return range(slice_begin, slice_end) + + +def reduce_result(result, comm, method="MPI", root=0): + """Reduce results across processes.""" + if method == "MPI": + return comm.reduce(sendobj=result, op=MPI.SUM, root=root) + elif method == "NCCL": + stream_ptr = cp.cuda.get_current_stream().ptr + comm.reduce( + result.data.ptr, + result.data.ptr, + result.size, + nccl.NCCL_FLOAT64, + nccl.NCCL_SUM, + root, + stream_ptr, + ) + return result def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): @@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): Returns: Dense vector of quantum circuit. """ - - import cuquantum.cutensornet as cutn - from cuquantum import Network - from mpi4py import MPI - - root = 0 - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - device_id = rank % getDeviceCount() - cp.cuda.Device(device_id).use() - - # Perform circuit conversion - if rank == 0: - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - - operands = myconvertor.state_vector_operands() - else: - operands = None - - operands = comm.bcast(operands, root) - - # Create network object. + comm, rank, size, device_id = initialize_mpi() + operands = get_operands(qibo_circ, datatype, rank, comm) network = Network(*operands, options={"device_id": device_id}) - - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={ - "samples": n_samples, - "slicing": { - "min_slices": max(32, size), - "memory_model": cutn.MemoryModel.CUTENSOR, - }, - } - ) - - # Select the best path from all ranks. - opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm.bcast(info, sender) - - # Set path and slices. + info = compute_optimal_path(network, n_samples, size, comm) path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) - - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) - - # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) - - # Sum the partial contribution from each process on root. - result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) - - return result, rank + slices = compute_slices(info, rank, size) + result = compute_contraction(network, slices) + return reduce_result(result, comm, method="MPI"), rank def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): @@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): Returns: Dense vector of quantum circuit. """ - import cuquantum.cutensornet as cutn - from cupy.cuda import nccl - from cuquantum import Network - from mpi4py import MPI - - root = 0 - comm_mpi = MPI.COMM_WORLD - rank = comm_mpi.Get_rank() - size = comm_mpi.Get_size() - - device_id = rank % getDeviceCount() - - cp.cuda.Device(device_id).use() - - # Set up the NCCL communicator. - nccl_id = nccl.get_unique_id() if rank == root else None - nccl_id = comm_mpi.bcast(nccl_id, root) - comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank) - - # Perform circuit conversion - if rank == 0: - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) - operands = myconvertor.state_vector_operands() - else: - operands = None - - operands = comm_mpi.bcast(operands, root) - + comm_mpi, rank, size, device_id = initialize_mpi() + comm_nccl = initialize_nccl(comm_mpi, rank, size) + operands = get_operands(qibo_circ, datatype, rank, comm_mpi) network = Network(*operands) - - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={ - "samples": n_samples, - "slicing": { - "min_slices": max(32, size), - "memory_model": cutn.MemoryModel.CUTENSOR, - }, - } - ) - - # Select the best path from all ranks. - opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm_mpi.bcast(info, sender) - - # Set path and slices. + info = compute_optimal_path(network, n_samples, size, comm_mpi) path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) + slices = compute_slices(info, rank, size) + result = compute_contraction(network, slices) + return reduce_result(result, comm_nccl, method="NCCL"), rank - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) - # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) +def dense_vector_tn(qibo_circ, datatype): + """Convert qibo circuit to tensornet (TN) format and perform contraction to + dense vector. - # Sum the partial contribution from each process on root. - stream_ptr = cp.cuda.get_current_stream().ptr - comm_nccl.reduce( - result.data.ptr, - result.data.ptr, - result.size, - nccl.NCCL_FLOAT64, - nccl.NCCL_SUM, - root, - stream_ptr, - ) + Parameters: + qibo_circ: The quantum circuit object. + datatype (str): Either single ("complex64") or double (complex128) precision. - return result, rank + Returns: + Dense vector of quantum circuit. + """ + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + return contract(*myconvertor.state_vector_operands()) def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8): @@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl Returns: Expectation of quantum circuit due to pauli string. """ - import cuquantum.cutensornet as cutn - from cupy.cuda import nccl - from cuquantum import Network - from mpi4py import MPI - root = 0 - comm_mpi = MPI.COMM_WORLD - rank = comm_mpi.Get_rank() - size = comm_mpi.Get_size() + comm_mpi, rank, size, device_id = initialize_mpi() - device_id = rank % getDeviceCount() - - cp.cuda.Device(device_id).use() - - # Set up the NCCL communicator. - nccl_id = nccl.get_unique_id() if rank == root else None - nccl_id = comm_mpi.bcast(nccl_id, root) - comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank) + comm_nccl = initialize_nccl(comm_mpi, rank, size) # Perform circuit conversion if rank == 0: - myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) operands = myconvertor.expectation_operands( pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) @@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl else: operands = None - operands = comm_mpi.bcast(operands, root) + operands = comm_mpi.bcast(operands, root=0) network = Network(*operands) # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={ - "samples": n_samples, - "slicing": { - "min_slices": max(32, size), - "memory_model": cutn.MemoryModel.CUTENSOR, - }, - } - ) + info = compute_optimal_path(network, n_samples, size, comm_mpi) - # Select the best path from all ranks. - opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm_mpi.bcast(info, sender) - - # Set path and slices. + # Recompute path with the selected optimal settings path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) + slices = compute_slices(info, rank, size) # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) + result = compute_contraction(network, slices) # Sum the partial contribution from each process on root. - stream_ptr = cp.cuda.get_current_stream().ptr - comm_nccl.reduce( - result.data.ptr, - result.data.ptr, - result.size, - nccl.NCCL_FLOAT64, - nccl.NCCL_SUM, - root, - stream_ptr, - ) + result = reduce_result(result, comm_nccl, method="NCCL", root=0) return result, rank @@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample Returns: Expectation of quantum circuit due to pauli string. """ - import cuquantum.cutensornet as cutn - from cuquantum import Network - from mpi4py import MPI # this line initializes MPI - - root = 0 - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - # Assign the device for each process. - device_id = rank % getDeviceCount() - cp.cuda.Device(device_id).use() + # Initialize MPI and device + comm, rank, size, device_id = initialize_mpi() # Perform circuit conversion if rank == 0: @@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample else: operands = None - operands = comm.bcast(operands, root) + operands = comm.bcast(operands, root=0) # Create network object. network = Network(*operands, options={"device_id": device_id}) - # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. - path, info = network.contract_path( - optimize={ - "samples": n_samples, - "slicing": { - "min_slices": max(32, size), - "memory_model": cutn.MemoryModel.CUTENSOR, - }, - } - ) - - # Select the best path from all ranks. - opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC) - - # Broadcast info from the sender to all other ranks. - info = comm.bcast(info, sender) + # Compute optimal contraction path + info = compute_optimal_path(network, n_samples, size, comm) # Set path and slices. path, info = network.contract_path( optimize={"path": info.path, "slicing": info.slices} ) - # Calculate this process's share of the slices. - num_slices = info.num_slices - chunk, extra = num_slices // size, num_slices % size - slice_begin = rank * chunk + min(rank, extra) - slice_end = ( - num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra) - ) - slices = range(slice_begin, slice_end) + # Compute slice range for each rank + slices = compute_slices(info, rank, size) - # Contract the group of slices the process is responsible for. - result = network.contract(slices=slices) + # Perform contraction + result = compute_contraction(network, slices) # Sum the partial contribution from each process on root. - result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) + result = reduce_result(result, comm, method="MPI", root=0) return result, rank +def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern): + """Convert qibo circuit to tensornet (TN) format and perform contraction to + expectation of given Pauli string. + + Parameters: + qibo_circ: The quantum circuit object. + datatype (str): Either single ("complex64") or double (complex128) precision. + pauli_string_pattern(str): pauli string pattern. + + Returns: + Expectation of quantum circuit due to pauli string. + """ + myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) + return contract( + *myconvertor.expectation_operands( + pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) + ) + ) + + def dense_vector_mps(qibo_circ, gate_algo, datatype): """Convert qibo circuit to matrix product state (MPS) format and perform contraction to dense vector.