Refactor to reduce repeating codes
This commit is contained in:
@@ -6,40 +6,88 @@ from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.circuit_to_mps import QiboCircuitToMPS
|
||||
from qibotn.mps_contraction_helper import MPSContractionHelper
|
||||
|
||||
|
||||
def dense_vector_tn(qibo_circ, datatype):
|
||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
|
||||
dense vector.
|
||||
|
||||
Parameters:
|
||||
qibo_circ: The quantum circuit object.
|
||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
||||
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(*myconvertor.state_vector_operands())
|
||||
import cuquantum.cutensornet as cutn
|
||||
from cuquantum import Network
|
||||
from mpi4py import MPI
|
||||
from cupy.cuda import nccl
|
||||
|
||||
|
||||
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
|
||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
|
||||
expectation of given Pauli string.
|
||||
def initialize_mpi():
|
||||
"""Initialize MPI communication and device selection."""
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
device_id = rank % getDeviceCount()
|
||||
cp.cuda.Device(device_id).use()
|
||||
return comm, rank, size, device_id
|
||||
|
||||
Parameters:
|
||||
qibo_circ: The quantum circuit object.
|
||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
||||
pauli_string_pattern(str): pauli string pattern.
|
||||
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(
|
||||
*myconvertor.expectation_operands(
|
||||
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
|
||||
)
|
||||
def initialize_nccl(comm_mpi, rank, size):
|
||||
"""Initialize NCCL communication."""
|
||||
nccl_id = nccl.get_unique_id() if rank == 0 else None
|
||||
nccl_id = comm_mpi.bcast(nccl_id, root=0)
|
||||
return nccl.NcclCommunicator(size, nccl_id, rank)
|
||||
|
||||
|
||||
def get_operands(qibo_circ, datatype, rank, comm):
|
||||
"""Perform circuit conversion and broadcast operands."""
|
||||
if rank == 0:
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
operands = myconvertor.state_vector_operands()
|
||||
else:
|
||||
operands = None
|
||||
return comm.bcast(operands, root=0)
|
||||
|
||||
|
||||
def compute_optimal_path(network, n_samples, size, comm):
|
||||
"""Compute contraction path and broadcast optimal selection."""
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
"slicing": {
|
||||
"min_slices": max(32, size),
|
||||
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||
},
|
||||
}
|
||||
)
|
||||
opt_cost, sender = comm.allreduce(
|
||||
sendobj=(info.opt_cost, comm.Get_rank()), op=MPI.MINLOC
|
||||
)
|
||||
return comm.bcast(info, sender)
|
||||
|
||||
|
||||
def compute_contraction(network, slices):
|
||||
"""Perform tensor contraction."""
|
||||
return network.contract(slices=slices)
|
||||
|
||||
|
||||
def compute_slices(info, rank, size):
|
||||
"""Determine the slice range each process should compute."""
|
||||
num_slices = info.num_slices
|
||||
chunk, extra = num_slices // size, num_slices % size
|
||||
slice_begin = rank * chunk + min(rank, extra)
|
||||
slice_end = (
|
||||
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||
)
|
||||
return range(slice_begin, slice_end)
|
||||
|
||||
|
||||
def reduce_result(result, comm, method="MPI", root=0):
|
||||
"""Reduce results across processes."""
|
||||
if method == "MPI":
|
||||
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||
elif method == "NCCL":
|
||||
stream_ptr = cp.cuda.get_current_stream().ptr
|
||||
comm.reduce(
|
||||
result.data.ptr,
|
||||
result.data.ptr,
|
||||
result.size,
|
||||
nccl.NCCL_FLOAT64,
|
||||
nccl.NCCL_SUM,
|
||||
root,
|
||||
stream_ptr,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
||||
@@ -61,70 +109,16 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
|
||||
import cuquantum.cutensornet as cutn
|
||||
from cuquantum import Network
|
||||
from mpi4py import MPI
|
||||
|
||||
root = 0
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
device_id = rank % getDeviceCount()
|
||||
cp.cuda.Device(device_id).use()
|
||||
|
||||
# Perform circuit conversion
|
||||
if rank == 0:
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
|
||||
operands = myconvertor.state_vector_operands()
|
||||
else:
|
||||
operands = None
|
||||
|
||||
operands = comm.bcast(operands, root)
|
||||
|
||||
# Create network object.
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm)
|
||||
network = Network(*operands, options={"device_id": device_id})
|
||||
|
||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
"slicing": {
|
||||
"min_slices": max(32, size),
|
||||
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Select the best path from all ranks.
|
||||
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||
|
||||
# Broadcast info from the sender to all other ranks.
|
||||
info = comm.bcast(info, sender)
|
||||
|
||||
# Set path and slices.
|
||||
info = compute_optimal_path(network, n_samples, size, comm)
|
||||
path, info = network.contract_path(
|
||||
optimize={"path": info.path, "slicing": info.slices}
|
||||
)
|
||||
|
||||
# Calculate this process's share of the slices.
|
||||
num_slices = info.num_slices
|
||||
chunk, extra = num_slices // size, num_slices % size
|
||||
slice_begin = rank * chunk + min(rank, extra)
|
||||
slice_end = (
|
||||
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||
)
|
||||
slices = range(slice_begin, slice_end)
|
||||
|
||||
# Contract the group of slices the process is responsible for.
|
||||
result = network.contract(slices=slices)
|
||||
|
||||
# Sum the partial contribution from each process on root.
|
||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||
|
||||
return result, rank
|
||||
slices = compute_slices(info, rank, size)
|
||||
result = compute_contraction(network, slices)
|
||||
return reduce_result(result, comm, method="MPI"), rank
|
||||
|
||||
|
||||
def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
||||
@@ -146,83 +140,32 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
import cuquantum.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cuquantum import Network
|
||||
from mpi4py import MPI
|
||||
|
||||
root = 0
|
||||
comm_mpi = MPI.COMM_WORLD
|
||||
rank = comm_mpi.Get_rank()
|
||||
size = comm_mpi.Get_size()
|
||||
|
||||
device_id = rank % getDeviceCount()
|
||||
|
||||
cp.cuda.Device(device_id).use()
|
||||
|
||||
# Set up the NCCL communicator.
|
||||
nccl_id = nccl.get_unique_id() if rank == root else None
|
||||
nccl_id = comm_mpi.bcast(nccl_id, root)
|
||||
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
|
||||
|
||||
# Perform circuit conversion
|
||||
if rank == 0:
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
operands = myconvertor.state_vector_operands()
|
||||
else:
|
||||
operands = None
|
||||
|
||||
operands = comm_mpi.bcast(operands, root)
|
||||
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
|
||||
network = Network(*operands)
|
||||
|
||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
"slicing": {
|
||||
"min_slices": max(32, size),
|
||||
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Select the best path from all ranks.
|
||||
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||
|
||||
# Broadcast info from the sender to all other ranks.
|
||||
info = comm_mpi.bcast(info, sender)
|
||||
|
||||
# Set path and slices.
|
||||
info = compute_optimal_path(network, n_samples, size, comm_mpi)
|
||||
path, info = network.contract_path(
|
||||
optimize={"path": info.path, "slicing": info.slices}
|
||||
)
|
||||
slices = compute_slices(info, rank, size)
|
||||
result = compute_contraction(network, slices)
|
||||
return reduce_result(result, comm_nccl, method="NCCL"), rank
|
||||
|
||||
# Calculate this process's share of the slices.
|
||||
num_slices = info.num_slices
|
||||
chunk, extra = num_slices // size, num_slices % size
|
||||
slice_begin = rank * chunk + min(rank, extra)
|
||||
slice_end = (
|
||||
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||
)
|
||||
slices = range(slice_begin, slice_end)
|
||||
|
||||
# Contract the group of slices the process is responsible for.
|
||||
result = network.contract(slices=slices)
|
||||
def dense_vector_tn(qibo_circ, datatype):
|
||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
|
||||
dense vector.
|
||||
|
||||
# Sum the partial contribution from each process on root.
|
||||
stream_ptr = cp.cuda.get_current_stream().ptr
|
||||
comm_nccl.reduce(
|
||||
result.data.ptr,
|
||||
result.data.ptr,
|
||||
result.size,
|
||||
nccl.NCCL_FLOAT64,
|
||||
nccl.NCCL_SUM,
|
||||
root,
|
||||
stream_ptr,
|
||||
)
|
||||
Parameters:
|
||||
qibo_circ: The quantum circuit object.
|
||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
||||
|
||||
return result, rank
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(*myconvertor.state_vector_operands())
|
||||
|
||||
|
||||
def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_samples=8):
|
||||
@@ -248,28 +191,13 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
import cuquantum.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cuquantum import Network
|
||||
from mpi4py import MPI
|
||||
|
||||
root = 0
|
||||
comm_mpi = MPI.COMM_WORLD
|
||||
rank = comm_mpi.Get_rank()
|
||||
size = comm_mpi.Get_size()
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
|
||||
device_id = rank % getDeviceCount()
|
||||
|
||||
cp.cuda.Device(device_id).use()
|
||||
|
||||
# Set up the NCCL communicator.
|
||||
nccl_id = nccl.get_unique_id() if rank == root else None
|
||||
nccl_id = comm_mpi.bcast(nccl_id, root)
|
||||
comm_nccl = nccl.NcclCommunicator(size, nccl_id, rank)
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
|
||||
# Perform circuit conversion
|
||||
if rank == 0:
|
||||
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
operands = myconvertor.expectation_operands(
|
||||
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
|
||||
@@ -277,55 +205,25 @@ def expectation_pauli_tn_nccl(qibo_circ, datatype, pauli_string_pattern, n_sampl
|
||||
else:
|
||||
operands = None
|
||||
|
||||
operands = comm_mpi.bcast(operands, root)
|
||||
operands = comm_mpi.bcast(operands, root=0)
|
||||
|
||||
network = Network(*operands)
|
||||
|
||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
"slicing": {
|
||||
"min_slices": max(32, size),
|
||||
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||
},
|
||||
}
|
||||
)
|
||||
info = compute_optimal_path(network, n_samples, size, comm_mpi)
|
||||
|
||||
# Select the best path from all ranks.
|
||||
opt_cost, sender = comm_mpi.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||
|
||||
# Broadcast info from the sender to all other ranks.
|
||||
info = comm_mpi.bcast(info, sender)
|
||||
|
||||
# Set path and slices.
|
||||
# Recompute path with the selected optimal settings
|
||||
path, info = network.contract_path(
|
||||
optimize={"path": info.path, "slicing": info.slices}
|
||||
)
|
||||
|
||||
# Calculate this process's share of the slices.
|
||||
num_slices = info.num_slices
|
||||
chunk, extra = num_slices // size, num_slices % size
|
||||
slice_begin = rank * chunk + min(rank, extra)
|
||||
slice_end = (
|
||||
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||
)
|
||||
slices = range(slice_begin, slice_end)
|
||||
slices = compute_slices(info, rank, size)
|
||||
|
||||
# Contract the group of slices the process is responsible for.
|
||||
result = network.contract(slices=slices)
|
||||
result = compute_contraction(network, slices)
|
||||
|
||||
# Sum the partial contribution from each process on root.
|
||||
stream_ptr = cp.cuda.get_current_stream().ptr
|
||||
comm_nccl.reduce(
|
||||
result.data.ptr,
|
||||
result.data.ptr,
|
||||
result.size,
|
||||
nccl.NCCL_FLOAT64,
|
||||
nccl.NCCL_SUM,
|
||||
root,
|
||||
stream_ptr,
|
||||
)
|
||||
result = reduce_result(result, comm_nccl, method="NCCL", root=0)
|
||||
|
||||
return result, rank
|
||||
|
||||
@@ -353,18 +251,8 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
import cuquantum.cutensornet as cutn
|
||||
from cuquantum import Network
|
||||
from mpi4py import MPI # this line initializes MPI
|
||||
|
||||
root = 0
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
# Assign the device for each process.
|
||||
device_id = rank % getDeviceCount()
|
||||
cp.cuda.Device(device_id).use()
|
||||
# Initialize MPI and device
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
|
||||
# Perform circuit conversion
|
||||
if rank == 0:
|
||||
@@ -376,51 +264,51 @@ def expectation_pauli_tn_MPI(qibo_circ, datatype, pauli_string_pattern, n_sample
|
||||
else:
|
||||
operands = None
|
||||
|
||||
operands = comm.bcast(operands, root)
|
||||
operands = comm.bcast(operands, root=0)
|
||||
|
||||
# Create network object.
|
||||
network = Network(*operands, options={"device_id": device_id})
|
||||
|
||||
# Compute the path on all ranks with 8 samples for hyperoptimization. Force slicing to enable parallel contraction.
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
"slicing": {
|
||||
"min_slices": max(32, size),
|
||||
"memory_model": cutn.MemoryModel.CUTENSOR,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Select the best path from all ranks.
|
||||
opt_cost, sender = comm.allreduce(sendobj=(info.opt_cost, rank), op=MPI.MINLOC)
|
||||
|
||||
# Broadcast info from the sender to all other ranks.
|
||||
info = comm.bcast(info, sender)
|
||||
# Compute optimal contraction path
|
||||
info = compute_optimal_path(network, n_samples, size, comm)
|
||||
|
||||
# Set path and slices.
|
||||
path, info = network.contract_path(
|
||||
optimize={"path": info.path, "slicing": info.slices}
|
||||
)
|
||||
|
||||
# Calculate this process's share of the slices.
|
||||
num_slices = info.num_slices
|
||||
chunk, extra = num_slices // size, num_slices % size
|
||||
slice_begin = rank * chunk + min(rank, extra)
|
||||
slice_end = (
|
||||
num_slices if rank == size - 1 else (rank + 1) * chunk + min(rank + 1, extra)
|
||||
)
|
||||
slices = range(slice_begin, slice_end)
|
||||
# Compute slice range for each rank
|
||||
slices = compute_slices(info, rank, size)
|
||||
|
||||
# Contract the group of slices the process is responsible for.
|
||||
result = network.contract(slices=slices)
|
||||
# Perform contraction
|
||||
result = compute_contraction(network, slices)
|
||||
|
||||
# Sum the partial contribution from each process on root.
|
||||
result = comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||
result = reduce_result(result, comm, method="MPI", root=0)
|
||||
|
||||
return result, rank
|
||||
|
||||
|
||||
def expectation_pauli_tn(qibo_circ, datatype, pauli_string_pattern):
|
||||
"""Convert qibo circuit to tensornet (TN) format and perform contraction to
|
||||
expectation of given Pauli string.
|
||||
|
||||
Parameters:
|
||||
qibo_circ: The quantum circuit object.
|
||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
||||
pauli_string_pattern(str): pauli string pattern.
|
||||
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(
|
||||
*myconvertor.expectation_operands(
|
||||
pauli_string_gen(qibo_circ.nqubits, pauli_string_pattern)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def dense_vector_mps(qibo_circ, gate_algo, datatype):
|
||||
"""Convert qibo circuit to matrix product state (MPS) format and perform
|
||||
contraction to dense vector.
|
||||
|
||||
Reference in New Issue
Block a user