diff --git a/benchmark_contract_sliced.py b/benchmark_contract_sliced.py index 5da0cd4..a089546 100644 --- a/benchmark_contract_sliced.py +++ b/benchmark_contract_sliced.py @@ -31,13 +31,13 @@ else: tree = None tree = comm.bcast(tree, root=0) -arrays = [torch.from_numpy(np.ascontiguousarray(t._data, dtype=np.complex128)) for t in tn.tensors] +arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors] n_slices = tree.multiplicity if rank == 0: print(f"Slices: {n_slices}, Ranks: {size}, " f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, " - f"Threads/rank: {max(1, NCORES // size)}, Backend: torch") + f"Threads/rank: {NCORES}, Backend: torch") t0 = time.time() result = None diff --git a/benchmark_slice.py b/benchmark_slice.py index 8e7daae..b398857 100644 --- a/benchmark_slice.py +++ b/benchmark_slice.py @@ -8,7 +8,7 @@ with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f: print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB") -tree_sliced = tree.slice_and_reconfigure(target_size=2**30) # 2^29 = 8 GB +tree_sliced = tree.slice_and_reconfigure(target_size=2**28) with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f: pickle.dump(tree_sliced, f) diff --git a/check_tree.py b/check_tree.py index dfd8d40..935f952 100644 --- a/check_tree.py +++ b/check_tree.py @@ -5,9 +5,21 @@ path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl" with open(path, 'rb') as f: tree = pickle.load(f) +# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2 +# complex128 multiply-add = 6 real FLOPs +CORES = 96 +FREQ = 2.1e9 +AVX512_FP64 = 16 +TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12 # ~6.45 TFLOPS real FP64 +COMPLEX_FLOPS = TFLOPS / 6 # complex128 effective + +flops = tree.total_flops() +slices = tree.multiplicity +est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12) + print(f"File: {path}") -print(f"Peak memory elements: {tree.max_size():.2e}") -print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}") # complex128 = 16 bytes -print(f"Total FLOPs: {tree.total_flops():.2e}") +print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}") +print(f"Total FLOPs: {flops:.2e} x{slices} slices = {flops*slices:.2e}") print(f"Contraction width: {tree.contraction_width()}") -print(f"Multiplicity (slices): {tree.multiplicity}") +print(f"Multiplicity (slices): {slices}") +print(f"Estimated time (96 cores): {est_seconds:.1f}s ({est_seconds/3600:.2f}h)") diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py index 3e532be..6b28500 100644 --- a/src/qibotn/backends/quimb.py +++ b/src/qibotn/backends/quimb.py @@ -439,6 +439,7 @@ def _expectation_parallel(self, circuit, observable, method, opts): mpi_contract = opts.get('mpi_contract', False) torch_threads = opts.get('torch_threads', None) slicing_opts = opts.get('slicing_opts', None) + trial_timeout = opts.get('trial_timeout', None) qc = self._qibo_circuit_to_quimb( circuit, @@ -472,6 +473,7 @@ def _expectation_parallel(self, circuit, observable, method, opts): max_time=max_time, n_workers=search_workers, slicing_opts=slicing_opts, + trial_timeout=trial_timeout, ) if tree is None: