Refactor to reduce repeating codes

This commit is contained in:
tankya2
2025-02-18 11:34:55 +08:00
parent 13d4c9c04f
commit ac396a35db

View File

@@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum
from qibotn.circuit_to_mps import QiboCircuitToMPS from qibotn.circuit_to_mps import QiboCircuitToMPS
from qibotn.mps_contraction_helper import MPSContractionHelper from qibotn.mps_contraction_helper import MPSContractionHelper
import cuquantum.cutensornet as cutn
def dense_vector_tn(qibo_circ, datatype): from cuquantum import Network
"""Convert qibo circuit to tensornet (TN) format and perform contraction to from mpi4py import MPI
dense vector. from cupy.cuda import nccl
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern): def initialize_mpi():
"""Convert qibo circuit to tensornet (TN) format and perform contraction to """Initialize MPI communication and device selection."""
expectation of given Pauli string. comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
return comm, rank, size, device_id
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns: def initialize_nccl(comm_mpi, rank, size):
Expectation of quantum circuit due to pauli string. """Initialize NCCL communication."""
""" nccl_id = nccl.get_unique_id() if rank == 0 else None
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) nccl_id = comm_mpi.bcast(nccl_id, root=0)
return contract( return nccl.NcclCommunicator(size, nccl_id, rank)
*myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
) def get_operands(qibo_circ, datatype, rank, comm):
"""Perform circuit conversion and broadcast operands."""
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
return comm.bcast(operands, root=0)
def compute_optimal_path(network, n_samples, size, comm):
"""Compute contraction path and broadcast optimal selection."""
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
) )
opt_cost, sender = comm.allreduce(
sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
)
return comm.bcast(info, sender)
def compute_contraction(network, slices):
"""Perform tensor contraction."""
return network.contract(slices=slices)
def compute_slices(info, rank, size):
"""Determine the slice range each process should compute."""
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
return range(slice_begin, slice_end)
def reduce_result(result, comm, method="MPI", root=0):
"""Reduce results across processes."""
if method == "MPI":
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
elif method == "NCCL":
stream_ptr = cp.cuda.get_current_stream().ptr
comm.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result
def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8): def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
Returns: Returns:
Dense vector of quantum circuit. Dense vector of quantum circuit.
""" """
comm, rank, size, device_id = initialize_mpi()
import cuquantum.cutensornet as cutn operands = get_operands(qibo_circ, datatype, rank, comm)
from cuquantum import Network
from mpi4py import MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
operands = comm.bcast(operands, root)
# Create network object.
network = Network(*operands, options={"device_id": device_id}) network = Network(*operands, options={"device_id": device_id})
info = compute_optimal_path(network, n_samples, size, comm)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
slices = compute_slices(info, rank, size)
# Calculate this process's share of the slices. result = compute_contraction(network, slices)
num_slices = info.num_slices return reduce_result(result, comm, method="MPI"), rank
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8): def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
Returns: Returns:
Dense vector of quantum circuit. Dense vector of quantum circuit.
""" """
import cuquantum.cutensornet as cutn comm_mpi, rank, size, device_id = initialize_mpi()
from cupy.cuda import nccl comm_nccl = initialize_nccl(comm_mpi, rank, size)
from cuquantum import Network operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
from mpi4py import MPI
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
operands = comm_mpi.bcast(operands, root)
network = Network(*operands) network = Network(*operands)
info = compute_optimal_path(network, n_samples, size, comm_mpi)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
slices = compute_slices(info, rank, size)
result = compute_contraction(network, slices)
return reduce_result(result, comm_nccl, method="NCCL"), rank
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for. def dense_vector_tn(qibo_circ, datatype):
result = network.contract(slices=slices) """Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
# Sum the partial contribution from each process on root. Parameters:
stream_ptr = cp.cuda.get_current_stream().ptr qibo_circ: The quantum circuit object.
comm_nccl.reduce( datatype (str): Either single ("complex64") or double (complex128) precision.
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result, rank Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8): def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
@@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
Returns: Returns:
Expectation of quantum circuit due to pauli string. Expectation of quantum circuit due to pauli string.
""" """
import cuquantum.cutensornet as cutn
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0 comm_mpi, rank, size, device_id = initialize_mpi()
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount() comm_nccl = initialize_nccl(comm_mpi, rank, size)
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion # Perform circuit conversion
if rank == 0: if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype) myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands( operands = myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern) pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
@@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
else: else:
operands = None operands = None
operands = comm_mpi.bcast(operands, root) operands = comm_mpi.bcast(operands, root=0)
network = Network(*operands) network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. # Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path( info = compute_optimal_path(network, n_samples, size, comm_mpi)
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks. # Recompute path with the selected optimal settings
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
# Calculate this process's share of the slices. slices = compute_slices(info, rank, size)
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for. # Contract the group of slices the process is responsible for.
result = network.contract(slices=slices) result = compute_contraction(network, slices)
# Sum the partial contribution from each process on root. # Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr result = reduce_result(result, comm_nccl, method="NCCL", root=0)
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result, rank return result, rank
@@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
Returns: Returns:
Expectation of quantum circuit due to pauli string. Expectation of quantum circuit due to pauli string.
""" """
import cuquantum.cutensornet as cutn # Initialize MPI and device
from cuquantum import Network comm, rank, size, device_id = initialize_mpi()
from mpi4py import MPI # this line initializes MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
# Assign the device for each process.
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Perform circuit conversion # Perform circuit conversion
if rank == 0: if rank == 0:
@@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
else: else:
operands = None operands = None
operands = comm.bcast(operands, root) operands = comm.bcast(operands, root=0)
# Create network object. # Create network object.
network = Network(*operands, options={"device_id": device_id}) network = Network(*operands, options={"device_id": device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction. # Compute optimal contraction path
path, info = network.contract_path( info = compute_optimal_path(network, n_samples, size, comm)
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices. # Set path and slices.
path, info = network.contract_path( path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices} optimize={"path": info.path, "slicing": info.slices}
) )
# Calculate this process's share of the slices. # Compute slice range for each rank
num_slices = info.num_slices slices = compute_slices(info, rank, size)
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for. # Perform contraction
result = network.contract(slices=slices) result = compute_contraction(network, slices)
# Sum the partial contribution from each process on root. # Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root) result = reduce_result(result, comm, method="MPI", root=0)
return result, rank return result, rank
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns:
Expectation of quantum circuit due to pauli string.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(
*myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
)
def dense_vector_mps(qibo_circ, gate_algo, datatype): def dense_vector_mps(qibo_circ, gate_algo, datatype):
"""Convert qibo circuit to matrix product state (MPS) format and perform """Convert qibo circuit to matrix product state (MPS) format and perform
contraction to dense vector. contraction to dense vector.