Refactor to reduce repeating codes

This commit is contained in:
tankya2
2025-02-18 11:34:55 +08:00
parent 13d4c9c04f
commit ac396a35db

View File

@@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum
from qibotn.circuit_to_mps import QiboCircuitToMPS
from qibotn.mps_contraction_helper import MPSContractionHelper
def dense_vector_tn(qibo_circ, datatype):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
import cuquantum.cutensornet as cutn
from cuquantum import Network
from mpi4py import MPI
from cupy.cuda import nccl
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
def initialize_mpi():
"""Initialize MPI communication and device selection."""
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
return comm, rank, size, device_id
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns:
Expectation of quantum circuit due to pauli string.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(
*myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
def initialize_nccl(comm_mpi, rank, size):
"""Initialize NCCL communication."""
nccl_id = nccl.get_unique_id() if rank == 0 else None
nccl_id = comm_mpi.bcast(nccl_id, root=0)
return nccl.NcclCommunicator(size, nccl_id, rank)
def get_operands(qibo_circ, datatype, rank, comm):
"""Perform circuit conversion and broadcast operands."""
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
return comm.bcast(operands, root=0)
def compute_optimal_path(network, n_samples, size, comm):
"""Compute contraction path and broadcast optimal selection."""
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
opt_cost, sender = comm.allreduce(
sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
)
return comm.bcast(info, sender)
def compute_contraction(network, slices):
"""Perform tensor contraction."""
return network.contract(slices=slices)
def compute_slices(info, rank, size):
"""Determine the slice range each process should compute."""
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
return range(slice_begin, slice_end)
def reduce_result(result, comm, method="MPI", root=0):
"""Reduce results across processes."""
if method == "MPI":
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
elif method == "NCCL":
stream_ptr = cp.cuda.get_current_stream().ptr
comm.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
return result
def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
@@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
Returns:
Dense vector of quantum circuit.
"""
import cuquantum.cutensornet as cutn
from cuquantum import Network
from mpi4py import MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
operands = comm.bcast(operands, root)
# Create network object.
comm, rank, size, device_id = initialize_mpi()
operands = get_operands(qibo_circ, datatype, rank, comm)
network = Network(*operands, options={"device_id": device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Set path and slices.
info = compute_optimal_path(network, n_samples, size, comm)
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
return result, rank
slices = compute_slices(info, rank, size)
result = compute_contraction(network, slices)
return reduce_result(result, comm, method="MPI"), rank
def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
@@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
Returns:
Dense vector of quantum circuit.
"""
import cuquantum.cutensornet as cutn
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.state_vector_operands()
else:
operands = None
operands = comm_mpi.bcast(operands, root)
comm_mpi, rank, size, device_id = initialize_mpi()
comm_nccl = initialize_nccl(comm_mpi, rank, size)
operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
info = compute_optimal_path(network, n_samples, size, comm_mpi)
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
slices = compute_slices(info, rank, size)
result = compute_contraction(network, slices)
return reduce_result(result, comm_nccl, method="NCCL"), rank
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
def dense_vector_tn(qibo_circ, datatype):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
dense vector.
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
return result, rank
Returns:
Dense vector of quantum circuit.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(*myconvertor.state_vector_operands())
def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
@@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
Returns:
Expectation of quantum circuit due to pauli string.
"""
import cuquantum.cutensornet as cutn
from cupy.cuda import nccl
from cuquantum import Network
from mpi4py import MPI
root = 0
comm_mpi = MPI.COMM_WORLD
rank = comm_mpi.Get_rank()
size = comm_mpi.Get_size()
comm_mpi, rank, size, device_id = initialize_mpi()
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Set up the NCCL communicator.
nccl_id = nccl.get_unique_id() if rank == root else None
nccl_id = comm_mpi.bcast(nccl_id, root)
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
comm_nccl = initialize_nccl(comm_mpi, rank, size)
# Perform circuit conversion
if rank == 0:
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
operands = myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
@@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
else:
operands = None
operands = comm_mpi.bcast(operands, root)
operands = comm_mpi.bcast(operands, root=0)
network = Network(*operands)
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
info = compute_optimal_path(network, n_samples, size, comm_mpi)
# Select the best path from all ranks.
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm_mpi.bcast(info, sender)
# Set path and slices.
# Recompute path with the selected optimal settings
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
slices = compute_slices(info, rank, size)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
result = compute_contraction(network, slices)
# Sum the partial contribution from each process on root.
stream_ptr = cp.cuda.get_current_stream().ptr
comm_nccl.reduce(
result.data.ptr,
result.data.ptr,
result.size,
nccl.NCCL_FLOAT64,
nccl.NCCL_SUM,
root,
stream_ptr,
)
result = reduce_result(result, comm_nccl, method="NCCL", root=0)
return result, rank
@@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
Returns:
Expectation of quantum circuit due to pauli string.
"""
import cuquantum.cutensornet as cutn
from cuquantum import Network
from mpi4py import MPI # this line initializes MPI
root = 0
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
# Assign the device for each process.
device_id = rank % getDeviceCount()
cp.cuda.Device(device_id).use()
# Initialize MPI and device
comm, rank, size, device_id = initialize_mpi()
# Perform circuit conversion
if rank == 0:
@@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
else:
operands = None
operands = comm.bcast(operands, root)
operands = comm.bcast(operands, root=0)
# Create network object.
network = Network(*operands, options={"device_id": device_id})
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
path, info = network.contract_path(
optimize={
"samples": n_samples,
"slicing": {
"min_slices": max(32, size),
"memory_model": cutn.MemoryModel.CUTENSOR,
},
}
)
# Select the best path from all ranks.
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
# Broadcast info from the sender to all other ranks.
info = comm.bcast(info, sender)
# Compute optimal contraction path
info = compute_optimal_path(network, n_samples, size, comm)
# Set path and slices.
path, info = network.contract_path(
optimize={"path": info.path, "slicing": info.slices}
)
# Calculate this process's share of the slices.
num_slices = info.num_slices
chunk, extra = num_slices // size, num_slices % size
slice_begin = rank * chunk + min(rank, extra)
slice_end = (
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
)
slices = range(slice_begin, slice_end)
# Compute slice range for each rank
slices = compute_slices(info, rank, size)
# Contract the group of slices the process is responsible for.
result = network.contract(slices=slices)
# Perform contraction
result = compute_contraction(network, slices)
# Sum the partial contribution from each process on root.
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
result = reduce_result(result, comm, method="MPI", root=0)
return result, rank
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
expectation of given Pauli string.
Parameters:
qibo_circ: The quantum circuit object.
datatype (str): Either single ("complex64") or double (complex128) precision.
pauli_string_pattern(str): pauli string pattern.
Returns:
Expectation of quantum circuit due to pauli string.
"""
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
return contract(
*myconvertor.expectation_operands(
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
)
)
def dense_vector_mps(qibo_circ, gate_algo, datatype):
"""Convert qibo circuit to matrix product state (MPS) format and perform
contraction to dense vector.