完善时间剪枝功能
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled

This commit is contained in:
2026-05-08 00:12:32 +08:00
parent c818ac7a6e
commit 49b27a5840
701 changed files with 1426 additions and 72 deletions

View File

@@ -3,19 +3,19 @@ import time, pickle, os
import numpy as np
from mpi4py import MPI
NQUBITS, NLAYERS, NCORES = 25, 10, 96
NQUBITS, NLAYERS, NCORES = 25, 10, 48
comm = MPI.COMM_WORLD
rank, size = comm.Get_rank(), comm.Get_size()
os.environ['OMP_NUM_THREADS'] = str(max(1, NCORES // size))
os.environ['MKL_NUM_THREADS'] = str(max(1, NCORES // size))
os.environ['OMP_NUM_THREADS'] = str(NCORES)
os.environ['MKL_NUM_THREADS'] = str(NCORES)
import torch
import qibo, quimb as qu
from qibotn.observables import build_random_circuit
torch.set_num_threads(max(1, NCORES // size))
torch.set_num_threads(NCORES)
circuit = build_random_circuit(NQUBITS, NLAYERS)
qibo.set_backend("qibotn", platform="quimb")

View File

@@ -6,7 +6,7 @@ import qibo, quimb as qu
from mpi4py import MPI
NQUBITS, NLAYERS, WORKERS = 30, 10, 96
NQUBITS, NLAYERS, WORKERS = 20, 10, 96
comm = MPI.COMM_WORLD
rank, size = comm.Get_rank(), comm.Get_size()
@@ -23,7 +23,7 @@ if rank == 0:
print(f"Searching {NQUBITS}q {NLAYERS}l, method={method}, ranks={size}, workers/rank={WORKERS}...")
t0 = time.time()
tree = parallel_path_search(tn, tn.outer_inds(), method=method,
total_repeats=1024, max_time=300, n_workers=WORKERS)
total_repeats=1024, max_time=300, n_workers=WORKERS,trial_timeout=60)
t_search = time.time() - t0
if rank == 0:

View File

@@ -1,2 +1,2 @@
10.20.6.74:1
10.20.6.102:1
10.20.6.74
#10.20.6.102

View File

@@ -13,18 +13,15 @@ except ImportError:
MPI = None
def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=None):
"""Single-process path search with cotengra."""
import random
import cotengra as ctg
def _run_single_trial(tn_bytes, output_inds, seed, slicing_opts):
import random, cotengra as ctg
random.seed(seed)
tn = pickle.loads(tn_bytes)
opt = ctg.HyperOptimizer(
methods=["kahypar", "kahypar-agglom", "spinglass"],
max_repeats=repeats,
max_repeats=1,
parallel=False,
minimize="combo-256",
max_time=max_time,
optlib="random",
slicing_opts=slicing_opts,
progbar=False,
@@ -33,13 +30,62 @@ def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=
return tree.combo_cost(factor=256), tree
def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts=None):
"""ProcessPool-based parallel search."""
def _kill_pool(pool):
for pid in list(pool._processes.keys()):
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
pool.shutdown(wait=False)
def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=None, trial_timeout=None):
import time
if trial_timeout is None:
import random, cotengra as ctg
random.seed(seed)
tn = pickle.loads(tn_bytes)
opt = ctg.HyperOptimizer(
methods=["kahypar", "kahypar-agglom", "spinglass"],
max_repeats=repeats,
parallel=False,
minimize="combo-256",
max_time=max_time,
optlib="random",
slicing_opts=slicing_opts,
progbar=False,
)
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
return tree.combo_cost(factor=256), tree
deadline = time.time() + max_time
best_cost, best_tree = float("inf"), None
for i in range(repeats):
if time.time() >= deadline:
break
timeout = min(trial_timeout, deadline - time.time())
pool = ProcessPoolExecutor(max_workers=1)
fut = pool.submit(_run_single_trial, tn_bytes, output_inds, seed * 10000 + i, slicing_opts)
try:
cost, tree = fut.result(timeout=timeout)
if cost < best_cost:
best_cost, best_tree = cost, tree
except Exception:
pass
finally:
_kill_pool(pool)
return best_cost, best_tree
def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts=None, trial_timeout=None):
tn_bytes = pickle.dumps(tn)
repeats_per = max(1, total_repeats // n_workers)
pool = ProcessPoolExecutor(max_workers=n_workers)
futures = [
pool.submit(_serial_search, tn_bytes, output_inds, repeats_per, seed, max_time, slicing_opts)
pool.submit(_serial_search, tn_bytes, output_inds, repeats_per, seed, max_time, slicing_opts, trial_timeout)
for seed in range(n_workers)
]
best_cost, best_tree = float("inf"), None
@@ -56,109 +102,78 @@ def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, sli
finally:
for fut in futures:
fut.cancel()
for pid in list(pool._processes.keys()):
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
pool.shutdown(wait=False)
_kill_pool(pool)
return best_tree
def _mpi_search(tn, output_inds, total_repeats, max_time, n_workers=None, slicing_opts=None):
"""MPI+ProcessPool hybrid search.
Each MPI rank uses a local ProcessPool for parallel search,
then the best tree is gathered and broadcast.
"""
def _mpi_search(tn, output_inds, total_repeats, max_time, n_workers=None, slicing_opts=None, trial_timeout=None):
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
rank, size = comm.Get_rank(), comm.Get_size()
tn_bytes = pickle.dumps(tn)
repeats_per = max(1, total_repeats // size)
if n_workers and n_workers > 1:
# Hybrid: each MPI rank uses ProcessPool
local_tree = _processpool_search(
tn, output_inds, repeats_per, n_workers, max_time, slicing_opts
tn, output_inds, repeats_per, n_workers, max_time, slicing_opts, trial_timeout
)
local_cost = local_tree.combo_cost(factor=256) if local_tree else float("inf")
else:
# Pure MPI: each rank runs serial
local_cost, local_tree = _serial_search(
tn_bytes, output_inds, repeats_per, rank, max_time, slicing_opts
tn_bytes, output_inds, repeats_per, rank, max_time, slicing_opts, trial_timeout
)
all_results = comm.gather((local_tree.combo_cost(factor=256), local_tree), root=0)
all_results = comm.gather((local_cost, local_tree), root=0)
best_tree = None
if rank == 0:
best_cost = float("inf")
for cost, tree in all_results:
if tree is not None and cost < best_cost:
best_cost, best_tree = cost, tree
best_tree = comm.bcast(best_tree, root=0)
return best_tree
return comm.bcast(best_tree, root=0)
def parallel_path_search(tn, output_inds, method='processpool', total_repeats=1024,
max_time=300, n_workers=48, slicing_opts=None):
max_time=300, n_workers=48, slicing_opts=None, trial_timeout=None):
"""Parallel contraction path search.
Args:
tn: Tensor network (quimb TensorNetwork)
output_inds: Output indices
method: 'processpool' | 'mpi' | 'serial'
total_repeats: Total optimization repeats
max_time: Timeout per worker (seconds)
n_workers: Number of workers (processpool only, or per-MPI-rank if MPI)
slicing_opts: dict for cotengra slicing_opts (memory control)
Returns:
Best contraction tree
total_repeats: Total optimization repeats across all workers
max_time: Global timeout per worker (seconds)
n_workers: Workers per MPI rank (or total for processpool)
slicing_opts: cotengra slicing options for memory control
trial_timeout: Per-trial timeout (seconds); kills and skips hung trials
"""
if method == 'serial':
tn_bytes = pickle.dumps(tn)
_, tree = _serial_search(tn_bytes, output_inds, total_repeats, 0, max_time, slicing_opts)
_, tree = _serial_search(tn_bytes, output_inds, total_repeats, 0, max_time, slicing_opts, trial_timeout)
return tree
elif method == 'mpi':
if not _HAVE_MPI:
raise ImportError("mpi4py not available")
return _mpi_search(tn, output_inds, total_repeats, max_time, n_workers, slicing_opts)
return _mpi_search(tn, output_inds, total_repeats, max_time, n_workers, slicing_opts, trial_timeout)
elif method == 'processpool':
return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts)
return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts, trial_timeout)
else:
raise ValueError(f"Unknown method: {method}")
def parallel_contract(tree, arrays, method='mpi', comm=None):
"""Parallel sliced contraction.
Args:
tree: Contraction tree
arrays: List of tensor arrays
method: 'mpi' (only MPI supported for now)
comm: MPI communicator
Returns:
Contracted result (on root rank for MPI, otherwise for all)
"""
if method == 'mpi':
if not _HAVE_MPI or comm is None:
raise ValueError("MPI method requires mpi4py and comm")
return _contract_mpi(tree, arrays, comm)
else:
raise ValueError(f"Unknown method: {method}")
raise ValueError(f"Unknown method: {method}")
def _contract_mpi(tree, arrays, comm, root=0):
"""Distribute contraction slices across MPI ranks with Reduce."""
rank = comm.Get_rank()
size = comm.Get_size()
rank, size = comm.Get_rank(), comm.Get_size()
is_torch = type(arrays[0]).__module__.startswith("torch")
result_np = None
for i in range(rank, tree.multiplicity, size):
x = tree.contract_slice(arrays, i)
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
x_np = np.asarray(x.detach().cpu().numpy() if is_torch else x).reshape(-1)
result_np = x_np if result_np is None else result_np + x_np
if result_np is None:
@@ -166,8 +181,4 @@ def _contract_mpi(tree, arrays, comm, root=0):
result = np.zeros_like(result_np) if rank == root else None
comm.Reduce(result_np, result, root=root)
if rank == root and is_torch:
import torch
return torch.from_numpy(np.asarray(result))
return result

0
vtune_contract/.norun Normal file
View File

View File

@@ -0,0 +1,268 @@
<?xml version="1.0" encoding="UTF-8"?>
<bag xmlns:boolean="http://www.w3.org/2001/XMLSchema#boolean" xmlns:double="http://www.intel.com/2001/XMLSchema#double" xmlns:exsl="http://exslt.org/common" xmlns:int="http://www.w3.org/2001/XMLSchema#int" xmlns:str="http://exslt.org/strings" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<internal>
<name>%HotspotsAtypeName</name>
<shortName>%HotspotsAtypeShortName</shortName>
<description>%HotspotsAtypeDescription</description>
<shortDescription>%HotspotsAtypeShortDescription</shortDescription>
<abbreviation>hs</abbreviation>
<alias>hotspots</alias>
<int:schemaVersion>1</int:schemaVersion>
<int:contentVersion>1</int:contentVersion>
<iconClass>fire solid</iconClass>
<property name="helpId">configs.analysis_type-hotspots_f1101</property>
</internal>
<prerequisites xsl:version="1.0" exsl:keep_exsl_namespace="" syntax="norules">
<xsl:copy-of select="document('config://analysis_type/include/require_connection.xsl?connections=group_generic,tcp')"/>
</prerequisites>
<knobs xsl:version="1.0" exsl:keep_exsl_namespace="" syntax="norules">
<xsl:variable name="pmuConditionsExtended" select="document('config://include/pmu_variables_extended.xsl')"/>
<xsl:variable name="isExtendedIsaSupport" select="$pmuConditionsExtended//variables/isExtendedIsaSupport"/>
<stringKnob id="initialViewpoint" displayName="%InitialViewpoint" boolean:visible="false">
<defaultValue>%HotspotsByCPUUsageViewpointName</defaultValue>
</stringKnob>
<stringKnob id="allowedViewpoints" boolean:visible="false">
<defaultValue>%HotspotsByCPUUsageViewpointName,%TasksOverviewViewpointName</defaultValue>
</stringKnob>
<xsl:variable name="knobsParams">
<params samplingIntervalApplyKnob="samplingMode" samplingIntervalApplyKnobValue="hw" pmuSamplingIntervalDescription="SamplingIntervalDescriptionOnHotspots"/>
</xsl:variable>
<xsl:variable name="knobsParamsName">
<xsl:text>config://analysis_type/include/knobs.xsl?</xsl:text>
<xsl:for-each select="exsl:node-set($knobsParams)//@*">
<xsl:value-of select="concat(name(), '=', .)"/>
<xsl:text>&amp;</xsl:text>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="commonKnobs" select="document($knobsParamsName)"/>
<xsl:choose>
<xsl:when test="exsl:ctx('targetOS', '') = 'QNX' or exsl:ctx('targetOS', '') = 'MacOSX'">
<enumKnob id="samplingMode" displayName="%SamplingMode" cliName="sampling-mode" boolean:visible="false">
<description>%SamplingModeDescription</description>
<values>
<value displayName="%HardwareSampling" cliName="hw">hw</value>
<defaultValue>hw</defaultValue>
</values>
</enumKnob>
<xsl:copy-of select="$commonKnobs//knobs/doubleKnob[@id='samplingInterval']"/>
<booleanKnob id="enableStackCollect" displayName="%EnableStackCollection" cliName="enable-stack-collection">
<xsl:if test="not(exsl:is_experimental('sep-lbr'))">
<xsl:attribute name="boolean:visible">false</xsl:attribute>
</xsl:if>
<description>%EnableStackCollectionDescription</description>
<boolean:defaultValue>false</boolean:defaultValue>
</booleanKnob>
</xsl:when>
<xsl:when test="$isExtendedIsaSupport='true'">
<enumKnob id="samplingMode" cliName="sampling-mode" boolean:visible="true">
<description>%HardwareSamplingDescription</description>
<values>
<value displayName="%HardwareSampling" cliName="hw">hw</value>
<defaultValue>hw</defaultValue>
</values>
</enumKnob>
<xsl:copy-of select="$commonKnobs//knobs/doubleKnob[@id='samplingInterval']"/>
<booleanKnob id="enableStackCollect" displayName="%EnableStackCollection" cliName="enable-stack-collection">
<xsl:attribute name="boolean:visible">false</xsl:attribute>
<description>%EnableStackCollectionDescription</description>
<boolean:defaultValue>true</boolean:defaultValue>
</booleanKnob>
</xsl:when>
<xsl:otherwise>
<groupKnob id="groupForCustomControl">
<knobProperty name="knob_control_id">hotspotsGroup</knobProperty>
<knobs>
<enumKnob id="samplingMode" displayName="%SamplingMode" cliName="sampling-mode" boolean:visible="true">
<description>%SamplingModeDescription</description>
<values>
<value displayName="%SoftwareSampling" cliName="sw">sw</value>
<value displayName="%HardwareSampling" cliName="hw">hw</value>
<defaultValue>sw</defaultValue>
</values>
</enumKnob>
<xsl:copy-of select="$commonKnobs//knobs/doubleKnob[@id='samplingInterval']"/>
<booleanKnob id="enableStackCollect" displayName="%EnableStackCollection" cliName="enable-stack-collection">
<xsl:if test="not(exsl:is_experimental('sep-lbr')) and exsl:ctx('targetOS', '') = 'MacOSX'">
<xsl:attribute name="boolean:visible">false</xsl:attribute>
</xsl:if>
<description>%EnableStackCollectionDescription</description>
<boolean:defaultValue>false</boolean:defaultValue>
</booleanKnob>
<xsl:copy-of select="$commonKnobs//knobs/enumKnob[@id='stackSizeCollect']"/>
</knobs>
</groupKnob>
</xsl:otherwise>
</xsl:choose>
<doubleKnob id="slowGoodFrameThreshold" displayName="%SlowGoodFrameThreshold" cliName="slow-frames-threshold" boolean:visible="false">
<description>%SlowGoodFrameThresholdDescription</description>
<double:min>0.01</double:min>
<double:max>1024000</double:max>
<double:defaultValue>40</double:defaultValue>
</doubleKnob>
<doubleKnob id="goodFastFrameThreshold" displayName="%GoodFastFrameThreshold" cliName="fast-frames-threshold" boolean:visible="false">
<description>%GoodFastFrameThresholdDescription</description>
<double:min>0.01</double:min>
<double:max>1024000</double:max>
<double:defaultValue>100</double:defaultValue>
</doubleKnob>
<xsl:if test="exsl:ctx('targetOS', '') != 'Android'">
<booleanKnob id="enableCharacterizationInsights" displayName="%EnableCharacterizationInsights" cliName="enable-characterization-insights">
<boolean:defaultValue>true</boolean:defaultValue>
<description>%EnableCharacterizationInsightsDescription</description>
</booleanKnob>
</xsl:if>
</knobs>
<analysis xsl:version="1.0" str:keep_str_namespace="" exsl:keep_exsl_namespace="" syntax="norules">
<xsl:variable name="minMajorVersion" select="number(5)"/>
<xsl:variable name="minMinorVersion" select="number(10)"/>
<xsl:variable name="sepVersion" select="string(exsl:ctx('SEPDriverVersion', ''))"/>
<xsl:variable name="currentSepVersions" select="str:tokenize($sepVersion, '.')"/>
<xsl:variable name="pmuConditions" select="document('config://include/pmu_variables.xsl')"/>
<xsl:variable name="isHybridPMU" select="$pmuConditions//variables/isHybridPMU"/>
<xsl:variable name="useEventBasedCounts" select="(exsl:ctx('samplingMode', 'sw')='sw' or (number($currentSepVersions[1]) &gt; $minMajorVersion) or (number($currentSepVersions[1]) = $minMajorVersion and number($currentSepVersions[2]) &gt; $minMinorVersion)) and ($isHybridPMU != 'true')"/>
<xsl:variable name="events" select="document(concat('config://analysis_type/include/common_events.xsl?', 'useEventBasedCounts=', $useEventBasedCounts, '&amp;isHotspots=true'))"/>
<xsl:variable name="mainEvents" select="$events//events/cpi"/>
<xsl:variable name="fpuEvents" select="$events//events/fpu"/>
<xsl:variable name="retiredEvents" select="$events//events/retired"/>
<xsl:variable name="retiredEventsPM" select="$events//events/retired_perf_metrics"/>
<xsl:variable name="pmuCommon" select="document('config://include/pmu_common.xsl')"/>
<xsl:variable name="perfMetricsPossible" select="$pmuCommon//variables/perfMetricsPossible"/>
<collector id="runss">
<xsl:choose>
<xsl:when test="exsl:ctx('samplingMode', 'sw')='sw'">
<xsl:if test="not(exsl:ctx('isTPSSAvailable', 0)) and not(exsl:ctx('isPtraceAvailable', 0)) and not(exsl:ctx('targetOS', '') = 'QNX')">
<xsl:value-of select="exsl:error('%RunssHotspotsNotSupported')"/>
</xsl:if>
<xsl:if test="exsl:ctx('isPtraceScopeLimited', 0)">
<xsl:value-of select="exsl:error('%RunssPtraceScopeLimited')"/>
</xsl:if>
<xsl:if test="exsl:ctx('targetOS', '') = 'MacOSX'">
<xsl:value-of select="exsl:error('%ThisAnalysisTypeIsNotApplicable')"/>
</xsl:if>
<collectorKnob knob="collectSamplesMode">stack</collectorKnob>
<collectorKnob knob="samplingInterval">10</collectorKnob>
<collectorKnob knob="collectUserTasksEventsCountersMode">true</collectorKnob>
<xsl:if test="exsl:ctx('isPytraceAvailable', 0)">
<xsl:choose>
<xsl:when test="exsl:ctx('targetOS', '') = 'Windows'">
<collectorKnob knob="mrteType">java,dotnet,python</collectorKnob>
</xsl:when>
<xsl:when test="exsl:ctx('targetOS', '') = 'Linux'">
<collectorKnob knob="mrteType">java,python</collectorKnob>
</xsl:when>
</xsl:choose>
</xsl:if>
<collectorKnob knob="collectOpenMPRegions">
<xsl:value-of select="exsl:ctx('analyzeOpenMPRegions', 0)"/>
</collectorKnob>
<xsl:if test="exsl:ctx('enableCharacterizationInsights', 0)">
<xsl:variable name="isSEPFlow" select="exsl:ctx('isSEPDriverAvailable', 0) and (not(exsl:ctx('usePerf', 0)) or exsl:ctx('targetOS', '') = 'Windows' or exsl:ctx('targetOS', '') = 'MacOSX' or exsl:ctx('targetOS', '') = 'FreeBSD')"/>
<xsl:variable name="isPerfFlow" select="(exsl:ctx('targetOS', '') = 'Linux' or exsl:ctx('targetOS', '') = 'Android') and exsl:ctx('LinuxPerfCredentials', 'NotAvailable')!='NotAvailable' and contains(exsl:ctx('LinuxPerfCapabilities', ''), 'format')"/>
<xsl:variable name="isPMUAvailable" select="exsl:ctx('PerfmonVersion', '4') &gt; '1' and ($isSEPFlow or $isPerfFlow)"/>
<xsl:variable name="isPermissionInsufficient" select="exsl:ctx('targetOS')='Windows' and not(exsl:ctx('AdministratorPrivileges', 'false'))"/>
<xsl:choose>
<xsl:when test="not($isPMUAvailable) or $isPermissionInsufficient">
<xsl:if test="not($isPMUAvailable)">
<xsl:value-of select="exsl:warning('%HWInsightsNotAvailableWarningTpss')"/>
</xsl:if>
<xsl:if test="$isPermissionInsufficient">
<xsl:value-of select="exsl:warning('%HWInsightsNotAvailablePermissionWarningTpss')"/>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="fullEventList">
<xsl:value-of select="$mainEvents"/>
<xsl:if test="$fpuEvents and $fpuEvents != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$fpuEvents"/>
</xsl:if>
<xsl:choose>
<xsl:when test="$perfMetricsPossible = 'true'">
<xsl:if test="$retiredEventsPM and $retiredEventsPM != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$retiredEventsPM"/>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:if test="$retiredEvents and $retiredEvents != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$retiredEvents"/>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<collectorKnob knob="pmuEventConfig">
<xsl:value-of select="exsl:merge_pmu_events($fullEventList)"/>
</collectorKnob>
<xsl:choose>
<xsl:when test="$perfMetricsPossible = 'true'">
<boolean:collectorKnob knob="useAggregatedCounting">true</boolean:collectorKnob>
<boolean:collectorKnob knob="usePerfMetrics">true</boolean:collectorKnob>
</xsl:when>
<xsl:otherwise>
<boolean:collectorKnob knob="useCountingMode">true</boolean:collectorKnob>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
<boolean:collectorKnob knob="collectSWHotspots">true</boolean:collectorKnob>
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="id">runsa</xsl:attribute>
<xsl:if test="exsl:ctx('PMU', '') = ''">
<xsl:value-of select="exsl:error('%UnknownPMUForAT')"/>
</xsl:if>
<collectorKnob knob="useEventBasedCounts">
<xsl:value-of select="$useEventBasedCounts"/>
</collectorKnob>
<collectorKnob knob="collectUserTasksEventsCountersMode">true</collectorKnob>
<collectorKnob knob="eventMode">all</collectorKnob>
<collectorKnob knob="enableStackCollection">
<xsl:value-of select="exsl:ctx('enableStackCollect', 0)"/>
</collectorKnob>
<xsl:copy-of select="document('config://analysis_type/include/knobs.xsl')//knobs/stackSizeKnobLogic/*"/>
<boolean:collectorKnob knob="enableCSwitch">false</boolean:collectorKnob>
<boolean:collectorKnob knob="cpuGpuUsageData">true</boolean:collectorKnob>
<xsl:variable name="usePerfMetrics" select="$perfMetricsPossible = 'true' and not(exsl:ctx('enableStackCollect', 0))"/>
<xsl:variable name="fullEventList">
<xsl:value-of select="$mainEvents"/>
<xsl:choose>
<xsl:when test="$usePerfMetrics">
<xsl:if test="$retiredEventsPM and $retiredEventsPM != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$retiredEventsPM"/>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:if test="$retiredEvents and $retiredEvents != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$retiredEvents"/>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="exsl:ctx('enableCharacterizationInsights', 0)">
<xsl:if test="$fpuEvents and $fpuEvents != ''">
<xsl:text>,</xsl:text>
<xsl:value-of select="$fpuEvents"/>
</xsl:if>
</xsl:if>
</xsl:variable>
<collectorKnob knob="pmuEventConfig">
<xsl:value-of select="exsl:merge_pmu_events($fullEventList)"/>
</collectorKnob>
<collectorKnob knob="pmuSamplingInterval">
<xsl:value-of select="format-number(exsl:ctx('samplingInterval', 1), '#.####')"/>
</collectorKnob>
<boolean:collectorKnob knob="isUArchUsageAvailable">true</boolean:collectorKnob>
<collectorKnob knob="usePerfMetrics">
<xsl:value-of select="$usePerfMetrics"/>
</collectorKnob>
<xsl:if test="exsl:is_experimental('ehfi')">
<collectorKnob knob="analyzeEHFIClasses">true</collectorKnob>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</collector>
</analysis>
</bag>

View File

@@ -0,0 +1,34 @@
<?xml version='1.0' encoding='UTF-8'?>
<bag xmlns:boolean="http://www.w3.org/2001/XMLSchema#boolean" xmlns:double="http://www.intel.com/2001/XMLSchema#double" xmlns:int="http://www.w3.org/2001/XMLSchema#int" xmlns:null="http://www.intel.com/2009/BagSchema#null">
<pointer:workload pointer:type_id="dasID_cctrl2::ApplicationWorkload" pointer:ptr_type="dasID_cctrl2::IWorkload">
<workload targetName="launch" launch_app.app_to_launch="mpirun" launch_app.app_parameters="&quot;-np&quot; &quot;2&quot; &quot;--hostfile&quot; &quot;/home/yx/qibotn/hostfile&quot; &quot;-perhost&quot; &quot;2&quot; &quot;--map-by&quot; &quot;numa&quot; &quot;python&quot; &quot;benchmark_contract_sliced.py&quot; " result_directory.path="/home/yx/qibotn/vtune_contract">
<search_directories/>
<context>
<contextValue id="allowMultipleRuns" boolean:value="false"/>
<contextValue id="analyzeKvmGuest" boolean:value="false"/>
<contextValue id="analyzeSystemWide" boolean:value="false"/>
<contextValue id="cpuMask" value=""/>
<contextValue id="customCollector" value=""/>
<contextValue id="dataLimit" int:value="1000"/>
<contextValue id="enableRing" boolean:value="false"/>
<contextValue id="finalizationMode" value="fast"/>
<contextValue id="followChild" boolean:value="true"/>
<contextValue id="followChildGroup" null:value=""/>
<contextValue id="followChildStrategy" value=""/>
<contextValue id="groupForFinalizationControl" null:value=""/>
<contextValue id="kvmGuestKallsyms" value=""/>
<contextValue id="kvmGuestModules" value=""/>
<contextValue id="kvmProfileGuest" null:value=""/>
<contextValue id="mrteMode" value="auto"/>
<contextValue id="targetDurationType" value="short"/>
<contextValue id="targetRingBuffer" double:value="0"/>
<contextValue id="targetType" value="launch"/>
<contextValue id="traceMpi" boolean:value="false"/>
<contextValue id="tracingMode" null:value=""/>
<contextValue id="wrapperScriptContent" value=""/>
<contextValue id="wrapperScriptPath" value=""/>
</context>
</workload>
</pointer:workload>
</bag>

View File

@@ -0,0 +1,369 @@
<?xml version='1.0' encoding='UTF-8'?>
<bag xmlns:boolean="http://www.w3.org/2001/XMLSchema#boolean" xmlns:double="http://www.intel.com/2001/XMLSchema#double" xmlns:int="http://www.w3.org/2001/XMLSchema#int" xmlns:long="http://www.w3.org/2001/XMLSchema#long" xmlns:null="http://www.intel.com/2009/BagSchema#null" xmlns:unsignedByte="http://www.w3.org/2001/XMLSchema#unsignedByte" xmlns:unsignedInt="http://www.w3.org/2001/XMLSchema#unsignedInt" xmlns:unsignedLong="http://www.w3.org/2001/XMLSchema#unsignedLong" xmlns:unsignedShort="http://www.w3.org/2001/XMLSchema#unsignedShort">
<contextValue id="CLIENT_ID" value="CLI"/>
<contextValue id="CPU_NAME" value="Intel(R) Xeon(R) Processor code named Emeraldrapids"/>
<contextValue id="GPUUserGroups" value=""/>
<contextValue id="Hypervisor" value="None"/>
<contextValue id="HypervisorType" value="None"/>
<contextValue id="IsNUMANodeWithoutCPUsPresent" boolean:value="false"/>
<contextValue id="L2CATDetails" value="COS=8;ways=16"/>
<contextValue id="L3CATDetails" value="COS=15;ways=20"/>
<contextValue id="LLCSize" unsignedLong:value="272629760"/>
<contextValue id="LinuxPerfCapabilities" value="breakpoint:raw;cpu:raw,format,events,ldlat,frontend,offcore_rsp;cstate_core:raw,format,events;cstate_pkg:raw,format,events;dsa0:raw,format;dsa2:raw,format;iax1:raw,format;iax3:raw,format;intel_bts:raw;intel_pt:raw,format;kprobe:raw,format;msr:raw,format,events;power:raw,format,events;software:raw;tracepoint:raw;uncore_cha:52,raw,format;uncore_iio:10,raw,format;uncore_iio_free_running:10,raw,format,events;uncore_imc:8,raw,format,events;uncore_imc_free_running:4,raw,format,events;uncore_irp:10,raw,format;uncore_m2m:4,raw,format;uncore_m2pcie:10,raw,format;uncore_m3upi:4,raw,format;uncore_mdf:14,raw,format;uncore_pcu:raw,format;uncore_upi:3,raw,format;uprobe:raw,format"/>
<contextValue id="LinuxPerfCredentials" value="User"/>
<contextValue id="LinuxPerfMuxIntervalMs" int:value="1"/>
<contextValue id="LinuxPerfStackCapabilities" value="fp,dwarf,lbr"/>
<contextValue id="LinuxPerfVersion" value="6.6.99-vtune"/>
<contextValue id="LinuxRelease" value="6.12.0-124.52.1.el10_1.x86_64"/>
<contextValue id="OS" value="Linux"/>
<contextValue id="OS@version" int:value="1"/>
<contextValue id="OSBitness" value="64"/>
<contextValue id="OSBuildNumber" unsignedShort:value="0"/>
<contextValue id="PMU" value="emeraldrapids_server"/>
<contextValue id="PerfmonVersion" unsignedByte:value="5"/>
<contextValue id="RootPrivileges" boolean:value="false"/>
<contextValue id="accurateCPUTimeDetection" boolean:value="false"/>
<contextValue id="adjustCollectionBoundsByOMPApps" boolean:value="false"/>
<contextValue id="advancedLoopAnalysis" boolean:value="false"/>
<contextValue id="allowMultipleRuns" boolean:value="false"/>
<contextValue id="allowedViewpoints" value="%HotspotsByCPUUsageViewpointName,%TasksOverviewViewpointName"/>
<contextValue id="analyzeActivePowerConsumption" boolean:value="false"/>
<contextValue id="analyzeDgfxBandwidth" boolean:value="false"/>
<contextValue id="analyzeEHFIClasses" boolean:value="false"/>
<contextValue id="analyzeEnergyConsumption" boolean:value="false"/>
<contextValue id="analyzeFPU" boolean:value="false"/>
<contextValue id="analyzeFullProcTrace" boolean:value="false"/>
<contextValue id="analyzeIdlePowerConsumption" boolean:value="false"/>
<contextValue id="analyzeKvmGuest" boolean:value="false"/>
<contextValue id="analyzeMemoryConsumption" boolean:value="false"/>
<contextValue id="analyzePersistentMemory" boolean:value="false"/>
<contextValue id="analyzeSystemWide" boolean:value="false"/>
<contextValue id="appRunsCount" unsignedInt:value="1"/>
<contextValue id="apsMode" boolean:value="false"/>
<contextValue id="areKernelPtrsRestricted" value="yes"/>
<contextValue id="atraceEventConfig" value=""/>
<contextValue id="basicBlockAnalysis" boolean:value="true"/>
<contextValue id="cacheMonitoringUpscalingFactor" unsignedLong:value="106496"/>
<contextValue id="calleeAttributionMode" value="undefined"/>
<contextValue id="chipsetEventConfig" value=""/>
<contextValue id="collectCAT" boolean:value="false"/>
<contextValue id="collectCPUGPUBandwidth" boolean:value="false"/>
<contextValue id="collectCacheOccupancy" boolean:value="false"/>
<contextValue id="collectCallCounts" boolean:value="false"/>
<contextValue id="collectFPGAOpenCl" boolean:value="false"/>
<contextValue id="collectFpgaBlueStreamEvents" boolean:value="false"/>
<contextValue id="collectFramesMode" boolean:value="true"/>
<contextValue id="collectFullProcTrace" boolean:value="false"/>
<contextValue id="collectGpuCm" boolean:value="false"/>
<contextValue id="collectGpuMetal" boolean:value="false"/>
<contextValue id="collectGpuOpenCl" value=""/>
<contextValue id="collectGpuOpenClArg" value=""/>
<contextValue id="collectHwTrace" boolean:value="false"/>
<contextValue id="collectIOMMU" boolean:value="false"/>
<contextValue id="collectIoMode" value="off"/>
<contextValue id="collectIoWaits" boolean:value="false"/>
<contextValue id="collectL3ExternalBW" boolean:value="false"/>
<contextValue id="collectMemBandwidth" boolean:value="false"/>
<contextValue id="collectMemObjects" boolean:value="false"/>
<contextValue id="collectMmioAccess" boolean:value="false"/>
<contextValue id="collectOpenMPRegions" boolean:value="false"/>
<contextValue id="collectOsCounters" boolean:value="false"/>
<contextValue id="collectPCIeBandwidth" boolean:value="false"/>
<contextValue id="collectPCIeP2PBandwidth" boolean:value="false"/>
<contextValue id="collectPStateData" boolean:value="true"/>
<contextValue id="collectPTforTSX" boolean:value="false"/>
<contextValue id="collectPreciseClockticks" boolean:value="false"/>
<contextValue id="collectSWHotspots" boolean:value="true"/>
<contextValue id="collectSamplesMode" value="stack"/>
<contextValue id="collectSignalsMode" value="off"/>
<contextValue id="collectTSXCycles" boolean:value="false"/>
<contextValue id="collectThrottlingReasons" boolean:value="false"/>
<contextValue id="collectTopology" boolean:value="false"/>
<contextValue id="collectTripCounts" boolean:value="false"/>
<contextValue id="collectUserDataAllMode" boolean:value="false"/>
<contextValue id="collectUserSyncMode" boolean:value="false"/>
<contextValue id="collectUserTasksEventsCountersMode" boolean:value="true"/>
<contextValue id="collectWaitsMode" value="off"/>
<contextValue id="collectXpuCompute" value="true"/>
<contextValue id="collectionStart" value="15:57:39 07/05/2026 UTC"/>
<contextValue id="collectionStart@version" int:value="2"/>
<contextValue id="collectionStartTsc" unsignedLong:value="5790238599043820950"/>
<contextValue id="collectionStop" value="15:58:09 07/05/2026 UTC"/>
<contextValue id="collectionStop@version" int:value="2"/>
<contextValue id="collectionStopTsc" unsignedLong:value="5790238892163798430"/>
<contextValue id="collectorTypeDecription" value="Driverless Perf per-process counting,User-mode sampling and tracing"/>
<contextValue id="collectorTypeDecription@version" int:value="1"/>
<contextValue id="commandLine" value="mpirun &quot;-np&quot; &quot;2&quot; &quot;--hostfile&quot; &quot;/home/yx/qibotn/hostfile&quot; &quot;-perhost&quot; &quot;2&quot; &quot;--map-by&quot; &quot;numa&quot; &quot;python&quot; &quot;benchmark_contract_sliced.py&quot; "/>
<contextValue id="commandLine@version" int:value="1"/>
<contextValue id="computerName" value="node-3"/>
<contextValue id="computerName@version" int:value="1"/>
<contextValue id="connectionType" value="localhost"/>
<contextValue id="coreFrequencies" value="2699999873"/>
<contextValue id="coreFrequencies@version" int:value="1"/>
<contextValue id="cpuByIoWaits" boolean:value="false"/>
<contextValue id="cpuFamily" unsignedInt:value="6"/>
<contextValue id="cpuFamily@version" int:value="1"/>
<contextValue id="cpuGpuUsageData" boolean:value="false"/>
<contextValue id="cpuMask" value=""/>
<contextValue id="cpuModel" unsignedInt:value="207"/>
<contextValue id="cpuModel@version" int:value="1"/>
<contextValue id="createGPUQueueFrames" boolean:value="false"/>
<contextValue id="criticalRankOpenmpThreadCount" unsignedInt:value="0"/>
<contextValue id="criticalRankOpenmpThreadCount@version" int:value="1"/>
<contextValue id="cswitchMode" value="inactive"/>
<contextValue id="customCollector" value=""/>
<contextValue id="dataLimit" int:value="1000"/>
<contextValue id="disableGPUSysinfo" boolean:value="false"/>
<contextValue id="disableRetCompression" boolean:value="false"/>
<contextValue id="dramBandwidthLimits" boolean:value="false"/>
<contextValue id="dumpComputeTaskBinaries" boolean:value="false"/>
<contextValue id="emonCSVTraceFormat" boolean:value="false"/>
<contextValue id="emonMachineReadableVersionFile" boolean:value="false"/>
<contextValue id="emonPreset" value=""/>
<contextValue id="enableCStateCollection" boolean:value="false"/>
<contextValue id="enableCSwitch" boolean:value="false"/>
<contextValue id="enableCharacterizationInsights" boolean:value="true"/>
<contextValue id="enableCycleAccurateMode" boolean:value="false"/>
<contextValue id="enableDramBandwidthLimitsWarning" boolean:value="false"/>
<contextValue id="enableHWBasedCSCollection" boolean:value="false"/>
<contextValue id="enableInterruptsCollection" boolean:value="false"/>
<contextValue id="enableLBRCollection" boolean:value="false"/>
<contextValue id="enableMemoryObjectCorrelation" boolean:value="false"/>
<contextValue id="enableMpiTracing" boolean:value="false"/>
<contextValue id="enableOpenglesInstrumentation" boolean:value="false"/>
<contextValue id="enablePEBSCollection" boolean:value="false"/>
<contextValue id="enableParallelFsCollection" boolean:value="false"/>
<contextValue id="enableRing" boolean:value="false"/>
<contextValue id="enableRing0ProfilingMode" boolean:value="false"/>
<contextValue id="enableStackCollect" boolean:value="false"/>
<contextValue id="enableStackCollection" boolean:value="false"/>
<contextValue id="enableThreadAffinity" boolean:value="false"/>
<contextValue id="enableTimedPEBSCollection" boolean:value="false"/>
<contextValue id="enableVTSSCollection" boolean:value="false"/>
<contextValue id="energyProfilingMode" value="none"/>
<contextValue id="environmentVars" null:value=""/>
<contextValue id="environmentVars@version" int:value="1"/>
<contextValue id="errorsAsWarnings" boolean:value="false"/>
<contextValue id="eventInfo" boolean:value="false"/>
<contextValue id="eventMode" value="all"/>
<contextValue id="eventMuxFactor" int:value="5"/>
<contextValue id="explicitUncoreEventsConfig" boolean:value="false"/>
<contextValue id="fileRequiestLogic" value="local"/>
<contextValue id="finalizationMode" value="fast"/>
<contextValue id="followChild" boolean:value="true"/>
<contextValue id="followChildGroup" null:value=""/>
<contextValue id="followChildStrategy" value=""/>
<contextValue id="forceMuxOff" boolean:value="false"/>
<contextValue id="forceShowInlines" boolean:value="false"/>
<contextValue id="forceSystemWide" boolean:value="false"/>
<contextValue id="fpgaAocxOrBinaryFile" value=""/>
<contextValue id="fpgaNoMemTransfers" boolean:value="false"/>
<contextValue id="fpgaNoTemporal" boolean:value="false"/>
<contextValue id="fpgaOnBoard" value="None"/>
<contextValue id="fpgaPeriod" int:value="0"/>
<contextValue id="fpgaSourceFile" value=""/>
<contextValue id="fpuVersion" value="1_0"/>
<contextValue id="ftraceEventConfig" value=""/>
<contextValue id="genArchOnBoard" int:value="0"/>
<contextValue id="goodFastFrameThreshold" double:value="100"/>
<contextValue id="gpuAdapterNames" value=""/>
<contextValue id="gpuCounters" value=""/>
<contextValue id="gpuDisplayPrefix" value="GPU"/>
<contextValue id="gpuFixedMetricsSelected" value=""/>
<contextValue id="gpuHWProfiling" value=""/>
<contextValue id="gpuLLCSize" unsignedLong:value="272629760"/>
<contextValue id="gpuMetricsSelected" value=""/>
<contextValue id="gpuPerformanceMode" null:value=""/>
<contextValue id="gpuPlatform" null:value=""/>
<contextValue id="gpuPlatformIndex" null:value=""/>
<contextValue id="gpuPlatformIndex@version" int:value="1"/>
<contextValue id="gpuProfilingMode" value=""/>
<contextValue id="gpuSamplingInterval" value=""/>
<contextValue id="gpuUsage" boolean:value="false"/>
<contextValue id="groupForCustomControl" null:value=""/>
<contextValue id="groupForFinalizationControl" null:value=""/>
<contextValue id="groupForGPUCustomCollection" null:value=""/>
<contextValue id="groupForTraceEventConfig" null:value=""/>
<contextValue id="handleLostEvents" boolean:value="false"/>
<contextValue id="hideSystemByDefault" boolean:value="false"/>
<contextValue id="hideWarningInPerfsnapshot" boolean:value="false"/>
<contextValue id="hostName" value="node-3"/>
<contextValue id="hostOS" value="Linux"/>
<contextValue id="hostsCount" int:value="1"/>
<contextValue id="hostsCount@version" int:value="1"/>
<contextValue id="i915Status" value="MissingDriver"/>
<contextValue id="ignorePowerData" boolean:value="false"/>
<contextValue id="inKernelProfilingAnalysis" boolean:value="false"/>
<contextValue id="initialViewpoint" value="%HotspotsByCPUUsageViewpointName"/>
<contextValue id="iptCollectEvents" boolean:value="false"/>
<contextValue id="iptRegionsToLoad" int:value="0"/>
<contextValue id="is3DXP2LMMode" boolean:value="false"/>
<contextValue id="is3DXPAppDirectMode" boolean:value="false"/>
<contextValue id="is3DXPPresent" boolean:value="false"/>
<contextValue id="isAOCLAvailable" boolean:value="false"/>
<contextValue id="isCATSupportedByCPU" boolean:value="true"/>
<contextValue id="isCPUSupportedBySocwatch" boolean:value="true"/>
<contextValue id="isCSwitchAvailable" value="no"/>
<contextValue id="isCpuThrottlingAvailable" boolean:value="false"/>
<contextValue id="isDeviceOrCredentialGuardEnabled" boolean:value="false"/>
<contextValue id="isEHFIAvailable" boolean:value="false"/>
<contextValue id="isEnergyCollectionSupported" boolean:value="true"/>
<contextValue id="isFtraceAvailable" value="ftraceAccessError,debugfsNotAccessible"/>
<contextValue id="isFtraceAvailableKnob" value="ftraceAccessError,debugfsNotAccessible"/>
<contextValue id="isFunctionTracingAvailable" value="no"/>
<contextValue id="isGpuBusynessAvailable" value="unsupportedHardware"/>
<contextValue id="isGpuMultiRunRequired" boolean:value="false"/>
<contextValue id="isGpuWaitAvailable" value="no"/>
<contextValue id="isHTEnabled" boolean:value="false"/>
<contextValue id="isHybridCPU" boolean:value="false"/>
<contextValue id="isHybridCPU@version" int:value="1"/>
<contextValue id="isIowaitTracingAvailable" value="no"/>
<contextValue id="isL2CATAvailable" boolean:value="true"/>
<contextValue id="isL3CATAvailable" boolean:value="true"/>
<contextValue id="isL3CacheOccupancyAvailable" boolean:value="true"/>
<contextValue id="isL3LocalBWAvailable" boolean:value="true"/>
<contextValue id="isL3MonitoringSupportedByCPU" boolean:value="true"/>
<contextValue id="isL3TotalBWAvailable" boolean:value="true"/>
<contextValue id="isMaxDRAMBandwidthMeasurementSupported" boolean:value="true"/>
<contextValue id="isMdfEtwAvailable" boolean:value="false"/>
<contextValue id="isNMIWatchDogTimerRunning" boolean:value="true"/>
<contextValue id="isPAXDriverLoaded" boolean:value="false"/>
<contextValue id="isPStateAvailable" boolean:value="true"/>
<contextValue id="isPTAvailable" boolean:value="true"/>
<contextValue id="isPerfPCIeMappingAvailable" boolean:value="false"/>
<contextValue id="isPowerVRDataAvailable" value="no"/>
<contextValue id="isPtraceAvailable" boolean:value="true"/>
<contextValue id="isPtraceScopeLimited" boolean:value="false"/>
<contextValue id="isPytraceAvailable" boolean:value="true"/>
<contextValue id="isSEPDriverAvailable" boolean:value="false"/>
<contextValue id="isSGXAvailable" boolean:value="true"/>
<contextValue id="isSocwatchDriverLoaded" boolean:value="false"/>
<contextValue id="isTPSSAvailable" boolean:value="true"/>
<contextValue id="isTSXAvailable" boolean:value="false"/>
<contextValue id="isUArchUsageAvailable" boolean:value="false"/>
<contextValue id="isVSyncAvailable" value="no"/>
<contextValue id="isVTSSPPDriverAvailable" boolean:value="true"/>
<contextValue id="isXelinkAvailable" boolean:value="false"/>
<contextValue id="kernelStackWarningCheck" boolean:value="false"/>
<contextValue id="kernelsToProfile" value="*#1#1#4294967295"/>
<contextValue id="kvmGuestKallsyms" value=""/>
<contextValue id="kvmGuestModules" value=""/>
<contextValue id="kvmProfileGuest" null:value=""/>
<contextValue id="l0DevicesAvailable" boolean:value="false"/>
<contextValue id="l0GPUDevicesAvailable" boolean:value="false"/>
<contextValue id="l0LoaderStatus" value="LibNotFound"/>
<contextValue id="l0MetricConfig" value=""/>
<contextValue id="l0SamplingInterval" double:value="1"/>
<contextValue id="l0SamplingType" value="time"/>
<contextValue id="l0SysmanConfig" value=""/>
<contextValue id="l0VPUDevicesAvailable" boolean:value="false"/>
<contextValue id="loadLbrStackToDb" boolean:value="true"/>
<contextValue id="loadPebsData" boolean:value="true"/>
<contextValue id="loadRawLbrData" boolean:value="false"/>
<contextValue id="logicalCPUCount" int:value="96"/>
<contextValue id="logicalCPUCount@version" int:value="1"/>
<contextValue id="maxLocalBandwidth" unsignedLong:value="0"/>
<contextValue id="maxRegionDuration" double:value="100"/>
<contextValue id="memoryAccessBandwidthMeasuring" boolean:value="false"/>
<contextValue id="memoryObjectMinSize" int:value="1024"/>
<contextValue id="memoryType" value="Unknown"/>
<contextValue id="mpiRank" null:value=""/>
<contextValue id="mpiRank@version" int:value="2"/>
<contextValue id="mrteMode" value="auto"/>
<contextValue id="mrteType" value="java,python"/>
<contextValue id="muxGroupCount" int:value="3"/>
<contextValue id="nameThreadsAsCreationModule" boolean:value="false"/>
<contextValue id="noSignalAltstack" boolean:value="false"/>
<contextValue id="numaVersion" value="1_0"/>
<contextValue id="numaVersion@version" int:value="1"/>
<contextValue id="numaVersionCurrent" value="1_0"/>
<contextValue id="omniPathOnBoard" value="None"/>
<contextValue id="openclSourceAsm" boolean:value="true"/>
<contextValue id="openmpProcessCount" unsignedInt:value="0"/>
<contextValue id="openmpProcessCount@version" int:value="1"/>
<contextValue id="openmpThreadCount" unsignedInt:value="2"/>
<contextValue id="openmpThreadCount@version" int:value="2"/>
<contextValue id="osDetailedName" value="6.12.0-124.52.1.el10_1.x86_64 \S&#10;Kernel \r on \m&#10;"/>
<contextValue id="osDetailedName@version" int:value="1"/>
<contextValue id="osKernelRelease" value="6.12.0-124.52.1.el10_1.x86_64"/>
<contextValue id="osKernelRelease@version" int:value="1"/>
<contextValue id="osName" value="Linux"/>
<contextValue id="osName@version" int:value="1"/>
<contextValue id="packageCount" int:value="2"/>
<contextValue id="packageCount@version" int:value="1"/>
<contextValue id="pausedTime" double:value="0"/>
<contextValue id="pausedTime@version" int:value="2"/>
<contextValue id="pciClassParts" value=""/>
<contextValue id="perfForceSystemWide" boolean:value="false"/>
<contextValue id="physicalCoreCount" int:value="96"/>
<contextValue id="physicalCoreCount@version" int:value="1"/>
<contextValue id="platformType" value="157"/>
<contextValue id="pmuEventConfig" value="CPU_CLK_UNHALTED.THREAD,CPU_CLK_UNHALTED.REF_TSC:sample:sa=2700000,INST_RETIRED.ANY:sample:sa=2700000,CPU_CLK_UNHALTED.DISTRIBUTED,FP_ARITH_INST_RETIRED.SCALAR_SINGLE,FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED.SCALAR_DOUBLE,FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE,UOPS_EXECUTED.X87,UOPS_RETIRED.SLOTS:sample:sa=2000003,UOPS_EXECUTED.THREAD,FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED2.SCALAR_HALF,FP_ARITH_INST_RETIRED2.128B_PACKED_HALF,FP_ARITH_INST_RETIRED2.256B_PACKED_HALF,FP_ARITH_INST_RETIRED2.512B_PACKED_HALF,TOPDOWN.SLOTS:sample:sa=2000003,TOPDOWN.SLOTS_P:sample:sa=2000003"/>
<contextValue id="pmuSamplingInterval" double:value="1"/>
<contextValue id="populatedIoParts" value=""/>
<contextValue id="populatedIoUnits" value=""/>
<contextValue id="populatedTidValuesForIO" value=""/>
<contextValue id="preciseMultiplexing" boolean:value="false"/>
<contextValue id="preferDriverlessCollection" boolean:value="true"/>
<contextValue id="preferedGpuAdapter" value="none"/>
<contextValue id="preferredEbsCollectorOrder" value="perf,vtss,sep"/>
<contextValue id="processKernelBinaries" boolean:value="false"/>
<contextValue id="referenceFrequency" unsignedLong:value="2699999873"/>
<contextValue id="referenceFrequency@version" int:value="2"/>
<contextValue id="resolveCallsites" boolean:value="true"/>
<contextValue id="restrictPCIeBandwidthByClass" value="None"/>
<contextValue id="resultSize" long:value="87587138"/>
<contextValue id="resultSize@version" int:value="1"/>
<contextValue id="ringBuffer" int:value="0"/>
<contextValue id="ringBufferSizeInSeconds" double:value="0"/>
<contextValue id="runss:enable" boolean:value="true"/>
<contextValue id="samplingInterval" double:value="5"/>
<contextValue id="samplingMode" value="sw"/>
<contextValue id="scPids" value="1035901"/>
<contextValue id="scStartTscs" value="5790238597824248360"/>
<contextValue id="shortCollectionMux" boolean:value="false"/>
<contextValue id="shortCollectionMux@version" int:value="1"/>
<contextValue id="showGPUBandwidthHistogram" boolean:value="true"/>
<contextValue id="showInlinesByDefault" boolean:value="true"/>
<contextValue id="slowGoodFrameThreshold" double:value="40"/>
<contextValue id="stackSize" int:value="0"/>
<contextValue id="stackSizeCollect" value="0"/>
<contextValue id="stackStitching" boolean:value="true"/>
<contextValue id="stackTypeCollect" value="software"/>
<contextValue id="stackUnwindLimit" int:value="8388608"/>
<contextValue id="stackwalk" value="offline"/>
<contextValue id="stallReasonsSamplingInterval" value=""/>
<contextValue id="supportedTargetTypes" value="all"/>
<contextValue id="suppressCSVSyntaxWarnings" boolean:value="false"/>
<contextValue id="systemCollectorConfig" value=""/>
<contextValue id="systemWideContextSwitch" boolean:value="false"/>
<contextValue id="systemWideDiskIO" boolean:value="false"/>
<contextValue id="targetConcurrency" int:value="96"/>
<contextValue id="targetConcurrency@version" int:value="2"/>
<contextValue id="targetDurationType" value="short"/>
<contextValue id="targetGPU" value="none"/>
<contextValue id="targetOS" value="Linux"/>
<contextValue id="targetOS@version" int:value="1"/>
<contextValue id="targetOption" value="localhost"/>
<contextValue id="targetRingBuffer" double:value="0"/>
<contextValue id="targetType" value="launch"/>
<contextValue id="tidValuesForIO" value=""/>
<contextValue id="tmamVersion" value="5_1"/>
<contextValue id="tmamVersion@version" int:value="1"/>
<contextValue id="tmamVersionCurrent" value="5_1"/>
<contextValue id="totalElapsedTime" double:value="29.311997748100001"/>
<contextValue id="totalElapsedTime@version" int:value="3"/>
<contextValue id="traceMpi" boolean:value="false"/>
<contextValue id="tracingMode" null:value=""/>
<contextValue id="uncoreSamplingInterval" int:value="10"/>
<contextValue id="useAOCLProfile" boolean:value="false"/>
<contextValue id="useAggregatedCounting" boolean:value="false"/>
<contextValue id="useCountingMode" boolean:value="true"/>
<contextValue id="useEventBasedCounts" boolean:value="false"/>
<contextValue id="useGpuCounting" boolean:value="false"/>
<contextValue id="usePerf" boolean:value="false"/>
<contextValue id="usePerfMetrics" boolean:value="false"/>
<contextValue id="wrapperScriptContent" value=""/>
<contextValue id="wrapperScriptPath" value=""/>
</bag>

View File

@@ -0,0 +1,71 @@
<?xml version='1.0' encoding='UTF-8'?>
<bag xmlns:int="http://www.w3.org/2001/XMLSchema#int" xmlns:long="http://www.w3.org/2001/XMLSchema#long">
<message_entry_t int:status="0" cap="Collection failed" msg="Collection failed. The data cannot be displayed. " long:timeStamp="1778169489297">
<message msg="Only user space will be profiled due to credentials lack. Consider changing /proc/sys/kernel/perf_event_paranoid file for enabling kernel space profiling." int:severity="2"/>
<message msg="Collection started. To stop the collection, either press CTRL-C or enter from another console window: vtune -r /home/yx/qibotn/vtune_contract -command stop." int:severity="1"/>
<message msg="Unauthorized control server connection." int:severity="4"/>
<message msg="Collection stopped." int:severity="1"/>
</message_entry_t>
<message_entry_t int:status="1" cap="Finalization completed with warnings" msg="Result finalization has completed with warnings that may affect the representation of analysis data. Please see details below." long:timeStamp="1778169531570">
<message msg="Using result path `/home/yx/qibotn/vtune_contract'" int:severity="1"/>
<message msg="The database has been cleared, elapsed time is 0.295 seconds." int:severity="16"/>
<message msg="Raw data has been loaded to the database, elapsed time is 14.977 seconds." int:severity="16"/>
<message msg="Data transformations have been finished, elapsed time is 0.017 seconds." int:severity="16"/>
<message msg="Cannot locate file `&lt;frozen importlib._bootstrap&gt;'." int:severity="2"/>
<message msg="Cannot locate file `&lt;frozen importlib._bootstrap_external&gt;'." int:severity="2"/>
<message msg="Cannot locate file `&lt;string&gt;.ko'." int:severity="2"/>
<message msg="Cannot locate file `&lt;frozen abc&gt;.ko'." int:severity="2"/>
<message msg="Cannot locate file `&lt;frozen posixpath&gt;.ko'." int:severity="2"/>
<message msg="Cannot locate file `/usr/lib64/python3.12/encodings/cp775.py'." int:severity="2"/>
<message msg="Cannot locate file `&lt;shim&gt;.ko'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libibverbs.so.1'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/grep'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/bash'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/intel/oneapi/mpi/2021.17/opt/mpi/libfabric/lib/prov/libmlx-fi.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libucp.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libuct.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libucs.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/intel/oneapi/vtune/2025.7/lib64/libtpsstool.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libucm.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/lib64/libibverbs/libmlx5-rdmav57.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/ucx/libuct_ib.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/lscpu'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libc.so.6'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/ucx/libuct_ib_mlx5.so.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/torch/lib/libgomp.so.1'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/torch/lib/libtorch_python.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/python3.12'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libpython3.12.so.1.0'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/whoami'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/linalg/_decomp_interpolative.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/bin/uname'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/lib64/python3.12/lib-dynload/_pickle.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libstdc++.so.6'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/spatial/_ckdtree.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/usr/lib64/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/numpy/_core/_multiarray_umath.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/lib64/libffi.so.8'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/mpi4py/MPI.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/interpolate/_rgi_cython.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/intel/oneapi/vtune/2025.7/lib64/runtime/libittnotify_collector.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/numpy/random/bit_generator.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/numpy/random/_pcg64.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/numpy/random/_generator.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/torch/lib/libc10.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/_cyutility.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/81d243bd2c585b0f4821__mypyc.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/intel/oneapi/mpi/2021.17/opt/mpi/libfabric/lib/libfabric.so.1'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/linalg/_flapack.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/scipy/optimize/_highspy/_core.cpython-312-x86_64-linux-gnu.so'." int:severity="2"/>
<message msg="Cannot locate debugging information for file `/home/yx/qibotn/.venv/lib64/python3.12/site-packages/torch/lib/libtorch_cpu.so'." int:severity="2"/>
<message msg="Symbol resolution has been finished, elapsed time is 22.559 seconds." int:severity="16"/>
<message msg="Deferred data transformations have been finished, elapsed time is 0.526 seconds." int:severity="16"/>
<message msg="Data model parameters have been set, elapsed time is 0.017 seconds." int:severity="16"/>
<message msg="Precomputing frequently used data has been finished, elapsed time is 0.342 seconds." int:severity="16"/>
<message msg="Redundant overtime data has been discarded, elapsed time is 0.002 seconds." int:severity="16"/>
<message msg="Raw collector data has been discarded, elapsed time is 0.000 seconds." int:severity="16"/>
<message msg="Finalizing the result took 41.182 seconds." int:severity="16"/>
<message msg="Knob values have been set, elapsed time is 0.000 seconds." int:severity="16"/>
</message_entry_t>
</bag>

View File

@@ -0,0 +1,31 @@
-r
/home/yx/qibotn/vtune_contract
--stack-stitching
--data-limit-mb=1000
--disk-space-limit=0
--mrte-type=java,python
--stack-unwind-limit=8388608
--itt-config=frame
--itt-config=task,event,counter
--stackwalk=offline
--mrte-mode=auto
--type=cpu:counters:nostack
--type=cpu:stack
--interval=10
--platform-type=157
--pmu-type=emeraldrapids_server
--event-config=CPU_CLK_UNHALTED.THREAD,CPU_CLK_UNHALTED.REF_TSC:sample:sa=2700000,INST_RETIRED.ANY:sample:sa=2700000,CPU_CLK_UNHALTED.DISTRIBUTED,FP_ARITH_INST_RETIRED.SCALAR_SINGLE,FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED.SCALAR_DOUBLE,FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE,UOPS_EXECUTED.X87,UOPS_RETIRED.SLOTS:sample:sa=2000003,UOPS_EXECUTED.THREAD,FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED2.SCALAR_HALF,FP_ARITH_INST_RETIRED2.128B_PACKED_HALF,FP_ARITH_INST_RETIRED2.256B_PACKED_HALF,FP_ARITH_INST_RETIRED2.512B_PACKED_HALF,TOPDOWN.SLOTS:sample:sa=2000003,TOPDOWN.SLOTS_P:sample:sa=2000003
--collector=perf
--count
--
mpirun
-np
2
--hostfile
/home/yx/qibotn/hostfile
-perhost
2
--map-by
numa
python
benchmark_contract_sliced.py

View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<All_Configurations-All_Platforms>
<searchDirs>
<searchCategory>
<category type="u8_t">1</category>
</searchCategory>
<searchCategory>
<category type="u8_t">2</category>
</searchCategory>
<searchCategory>
<category type="u8_t">3</category>
</searchCategory>
<searchCategory>
<category type="u8_t">4</category>
</searchCategory>
</searchDirs>
</All_Configurations-All_Platforms>
</root>

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<guiEngine type="bag"/>
</root>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,129 @@
# AMPLXE_PERF_STAT
# hostname : node-3
# os release : 6.12.0-124.52.1.el10_1.x86_64
# arch : x86_64
# cpuid : GenuineIntel,6,207,2
# cpudesc : INTEL(R) XEON(R) PLATINUM 8558P
# total memory : 527171272 kB
# nrcpus online : 96
# nrcpus avail : 96
# sibling cores : 0-47
# sibling cores : 48-95
# sibling threads : 0
# sibling threads : 1
# sibling threads : 2
# sibling threads : 3
# sibling threads : 4
# sibling threads : 5
# sibling threads : 6
# sibling threads : 7
# sibling threads : 8
# sibling threads : 9
# sibling threads : 10
# sibling threads : 11
# sibling threads : 12
# sibling threads : 13
# sibling threads : 14
# sibling threads : 15
# sibling threads : 16
# sibling threads : 17
# sibling threads : 18
# sibling threads : 19
# sibling threads : 20
# sibling threads : 21
# sibling threads : 22
# sibling threads : 23
# sibling threads : 24
# sibling threads : 25
# sibling threads : 26
# sibling threads : 27
# sibling threads : 28
# sibling threads : 29
# sibling threads : 30
# sibling threads : 31
# sibling threads : 32
# sibling threads : 33
# sibling threads : 34
# sibling threads : 35
# sibling threads : 36
# sibling threads : 37
# sibling threads : 38
# sibling threads : 39
# sibling threads : 40
# sibling threads : 41
# sibling threads : 42
# sibling threads : 43
# sibling threads : 44
# sibling threads : 45
# sibling threads : 46
# sibling threads : 47
# sibling threads : 48
# sibling threads : 49
# sibling threads : 50
# sibling threads : 51
# sibling threads : 52
# sibling threads : 53
# sibling threads : 54
# sibling threads : 55
# sibling threads : 56
# sibling threads : 57
# sibling threads : 58
# sibling threads : 59
# sibling threads : 60
# sibling threads : 61
# sibling threads : 62
# sibling threads : 63
# sibling threads : 64
# sibling threads : 65
# sibling threads : 66
# sibling threads : 67
# sibling threads : 68
# sibling threads : 69
# sibling threads : 70
# sibling threads : 71
# sibling threads : 72
# sibling threads : 73
# sibling threads : 74
# sibling threads : 75
# sibling threads : 76
# sibling threads : 77
# sibling threads : 78
# sibling threads : 79
# sibling threads : 80
# sibling threads : 81
# sibling threads : 82
# sibling threads : 83
# sibling threads : 84
# sibling threads : 85
# sibling threads : 86
# sibling threads : 87
# sibling threads : 88
# sibling threads : 89
# sibling threads : 90
# sibling threads : 91
# sibling threads : 92
# sibling threads : 93
# sibling threads : 94
# sibling threads : 95
# process id : 1035901
;;1852162590870;0
;;1954556299080;1
;;4241120946016;2
;;1852848155581;3
;;66657;4
;;0;5
;;0;6
;;5745804;7
;;405292255;8
;;296278351;9
;;439721;10
;;4247142966778;11
;;4146749907087;12
;;4563086821843;13
;;201;14
;;0;15
;;0;16
;;0;17
;;0;18
;;11112975545220;19
;;11122858880946;20

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More