diff --git a/benchmark_contract_sliced.py b/benchmark_contract_sliced.py
index 5da0cd4..a089546 100644
--- a/benchmark_contract_sliced.py
+++ b/benchmark_contract_sliced.py
@@ -31,13 +31,13 @@ else:
     tree = None
 tree = comm.bcast(tree, root=0)
 
-arrays = [torch.from_numpy(np.ascontiguousarray(t._data, dtype=np.complex128)) for t in tn.tensors]
+arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors]
 n_slices = tree.multiplicity
 
 if rank == 0:
     print(f"Slices: {n_slices}, Ranks: {size}, "
           f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, "
-          f"Threads/rank: {max(1, NCORES // size)}, Backend: torch")
+          f"Threads/rank: {NCORES}, Backend: torch")
 
 t0 = time.time()
 result = None
diff --git a/benchmark_slice.py b/benchmark_slice.py
index 8e7daae..b398857 100644
--- a/benchmark_slice.py
+++ b/benchmark_slice.py
@@ -8,7 +8,7 @@ with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f:
 
 print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB")
 
-tree_sliced = tree.slice_and_reconfigure(target_size=2**30)  # 2^29 = 8 GB
+tree_sliced = tree.slice_and_reconfigure(target_size=2**28)
 
 with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f:
     pickle.dump(tree_sliced, f)
diff --git a/check_tree.py b/check_tree.py
index dfd8d40..935f952 100644
--- a/check_tree.py
+++ b/check_tree.py
@@ -5,9 +5,21 @@ path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl"
 with open(path, 'rb') as f:
     tree = pickle.load(f)
 
+# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2
+# complex128 multiply-add = 6 real FLOPs
+CORES = 96
+FREQ = 2.1e9
+AVX512_FP64 = 16
+TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12  # ~6.45 TFLOPS real FP64
+COMPLEX_FLOPS = TFLOPS / 6  # complex128 effective
+
+flops = tree.total_flops()
+slices = tree.multiplicity
+est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12) 
+
 print(f"File: {path}")
-print(f"Peak memory elements: {tree.max_size():.2e}")
-print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")  # complex128 = 16 bytes
-print(f"Total FLOPs: {tree.total_flops():.2e}")
+print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")
+print(f"Total FLOPs: {flops:.2e}  x{slices} slices = {flops*slices:.2e}")
 print(f"Contraction width: {tree.contraction_width()}")
-print(f"Multiplicity (slices): {tree.multiplicity}")
+print(f"Multiplicity (slices): {slices}")
+print(f"Estimated time (96 cores): {est_seconds:.1f}s  ({est_seconds/3600:.2f}h)")
diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py
index 3e532be..6b28500 100644
--- a/src/qibotn/backends/quimb.py
+++ b/src/qibotn/backends/quimb.py
@@ -439,6 +439,7 @@ def _expectation_parallel(self, circuit, observable, method, opts):
     mpi_contract = opts.get('mpi_contract', False)
     torch_threads = opts.get('torch_threads', None)
     slicing_opts = opts.get('slicing_opts', None)
+    trial_timeout = opts.get('trial_timeout', None)
 
     qc = self._qibo_circuit_to_quimb(
         circuit,
@@ -472,6 +473,7 @@ def _expectation_parallel(self, circuit, observable, method, opts):
             max_time=max_time,
             n_workers=search_workers,
             slicing_opts=slicing_opts,
+            trial_timeout=trial_timeout,
         )
 
         if tree is None: