191 lines
7.0 KiB
Python
191 lines
7.0 KiB
Python
import os
|
|
import sys
|
|
import subprocess
|
|
import math
|
|
|
|
def get_command_result(cmd):
|
|
print cmd
|
|
results = subprocess.Popen(
|
|
cmd, stdout=subprocess.PIPE,
|
|
shell=True).stdout.readlines()
|
|
return [str(x).rstrip("\n") for x in results]
|
|
|
|
def get_cpus():
|
|
cpus = []
|
|
online_file = open('/sys/devices/system/cpu/online', 'r')
|
|
for cpurange in online_file.readlines()[0].strip().split(','):
|
|
try:
|
|
cpurange_start, cpurange_end = cpurange.split('-')
|
|
except ValueError:
|
|
cpurange_start = cpurange_end = cpurange
|
|
for cpu in range(int(cpurange_start), int(cpurange_end) + 1):
|
|
cpus.append(cpu)
|
|
return cpus
|
|
|
|
def get_omp_places_cores(cpus):
|
|
places = []
|
|
map_cpu_to_place = {}
|
|
for cpu in cpus:
|
|
if cpu not in map_cpu_to_place:
|
|
siblings_file = open('/sys/devices/system/cpu/cpu{0}/topology/thread_siblings_list'.format(cpu))
|
|
place = []
|
|
siblings = siblings_file.readlines()[0].strip().split(',')
|
|
for sibling in siblings:
|
|
place.append(int(sibling))
|
|
places.append(place)
|
|
for sibling in siblings:
|
|
map_cpu_to_place[int(sibling)] = place
|
|
return places, map_cpu_to_place
|
|
|
|
def index_of_place(places, cpu):
|
|
i = 0
|
|
for place in places:
|
|
if cpu in place:
|
|
return i
|
|
i = i + 1
|
|
return -1
|
|
|
|
def index_of_subpartition(subpartition, place):
|
|
i = 0
|
|
for placelist in subpartition:
|
|
if place in placelist:
|
|
return i
|
|
i = i + 1
|
|
return -1
|
|
|
|
def get_estimated_bind(omp_proc_bind, nthreads, places):
|
|
if omp_proc_bind == 'close':
|
|
return get_estimated_bind_close(0, 0, nthreads, places)
|
|
elif omp_proc_bind == 'spread':
|
|
return get_estimated_bind_spread(0, 0, nthreads, places)
|
|
return None
|
|
|
|
def get_estimated_bind_close(master_thread, master_cpu, nthreads, places):
|
|
map_thread_to_place = {}
|
|
nplaces = len(places)
|
|
# print 'nthreads =', nthreads
|
|
# print 'nplaces =', nplaces
|
|
if nthreads <= nplaces:
|
|
place_idx = index_of_place(places, master_cpu)
|
|
# print 'place_idx =', place_idx
|
|
for i in range(nthreads):
|
|
thread = (master_thread + i) % nthreads
|
|
map_thread_to_place[thread] = places[(place_idx + i) % nplaces]
|
|
else:
|
|
s = [0] * nplaces
|
|
for p in range(nplaces):
|
|
if nplaces - p <= nthreads % nplaces: # implementation defined
|
|
s[p] = nthreads / nplaces + 1 # ceil
|
|
else:
|
|
s[p] = nthreads / nplaces # floor
|
|
# print 's[', p, '] =', s[p]
|
|
i_begin = 0
|
|
place_idx = index_of_place(places, master_cpu)
|
|
for p in range(nplaces):
|
|
for i in range(i_begin, i_begin + s[p]):
|
|
thread = (master_thread + i) % nthreads
|
|
map_thread_to_place[thread] = places[(place_idx + p) % nplaces]
|
|
i_begin = i_begin + s[p]
|
|
return map_thread_to_place
|
|
|
|
def get_estimated_bind_spread(master_thread, master_cpu, nthreads, places):
|
|
map_thread_to_place = {}
|
|
nplaces = len(places)
|
|
if nthreads <= nplaces:
|
|
places_subpartition = []
|
|
p_begin = 0
|
|
for i in range(nthreads):
|
|
if nthreads - i <= nplaces % nthreads: # implementation defined
|
|
size_places_subpartition = nplaces / nthreads + 1 # ceil
|
|
else:
|
|
size_places_subpartition = nplaces / nthreads # floor
|
|
places_subpartition.append(places[p_begin:p_begin + size_places_subpartition])
|
|
p_begin = p_begin + size_places_subpartition
|
|
place_idx = index_of_place(places, master_cpu)
|
|
places_subpartition_idx = index_of_subpartition(places_subpartition, places[place_idx])
|
|
for i in range(nthreads):
|
|
thread = (master_thread + i) % nthreads
|
|
if thread == master_thread:
|
|
map_thread_to_place[thread] = places[place_idx]
|
|
else:
|
|
map_thread_to_place[thread] = places_subpartition[(places_subpartition_idx + i) % nthreads][0]
|
|
else:
|
|
threads = []
|
|
for i in range(nthreads):
|
|
threads.append(i)
|
|
threads_subpartition = []
|
|
i_begin = 0
|
|
for p in range(nplaces):
|
|
if nplaces - p <= nthreads % nplaces: # implementation defined
|
|
size_threads_subpartition = nthreads / nplaces + 1 # ceil
|
|
else:
|
|
size_threads_subpartition = nthreads / nplaces # floor
|
|
threads_subpartition.append(threads[i_begin:i_begin + size_threads_subpartition])
|
|
i_begin = i_begin + size_threads_subpartition
|
|
place_idx = index_of_place(places, master_cpu)
|
|
for p in range(nplaces):
|
|
for i in threads_subpartition[p]:
|
|
thread = (master_thread + i) % nthreads
|
|
map_thread_to_place[thread] = places[p]
|
|
return map_thread_to_place
|
|
|
|
def run_and_get_omp_cpu_affinity(omp_proc_bind, nthreads):
|
|
os.environ['NODES'] = '1'
|
|
os.environ['PPN'] = '1'
|
|
os.environ['HWLOC_HIDE_ERRORS'] = '1'
|
|
os.environ['OMP_PLACES'] = 'cores'
|
|
os.environ['OMP_PROC_BIND'] = omp_proc_bind
|
|
os.environ['OMP_NUM_THREADS'] = str(nthreads)
|
|
command = './show-omp-cpu-affinity'
|
|
result_map_thread_to_cpu = {}
|
|
for line in get_command_result(command):
|
|
outputs = line.split(' ')
|
|
thread = outputs[1]
|
|
cpu = outputs[3]
|
|
result_map_thread_to_cpu[int(thread)] = int(cpu)
|
|
return result_map_thread_to_cpu
|
|
|
|
def compare_result(nthreads, map_thread_to_place, result_map_thread_to_cpu):
|
|
try:
|
|
for thread in range(nthreads):
|
|
place = map_thread_to_place[thread]
|
|
if result_map_thread_to_cpu[thread] not in place:
|
|
return False
|
|
return True
|
|
except KeyError:
|
|
return False
|
|
|
|
def test_cpu_affinity(cpus, omp_proc_bind, nthreads, places):
|
|
map_thread_to_place = get_estimated_bind(omp_proc_bind, nthreads, places)
|
|
result_map_thread_to_cpu = run_and_get_omp_cpu_affinity(omp_proc_bind, nthreads)
|
|
if compare_result(nthreads, map_thread_to_place, result_map_thread_to_cpu):
|
|
result = 'SUCCESS'
|
|
else:
|
|
result = 'FAIL'
|
|
print "MAP_THREAD_TO_PLACE({0}): {1}".format(omp_proc_bind, map_thread_to_place)
|
|
print "RESULT_MAP_THREAD_TO_CPU: {0}".format(result_map_thread_to_cpu)
|
|
print "#CPU: {}, #PLACE: {}, OMP_PLACES: cores, OMP_PROC_BIND: {}, OMP_NUM_THREAD: {}, RESULT: {}".format(len(cpus), len(places), omp_proc_bind, nthreads, result)
|
|
|
|
def main():
|
|
cpus = get_cpus()
|
|
print 'CPUS:', cpus
|
|
places, map_cpu_to_place = get_omp_places_cores(cpus)
|
|
print 'PLACES:', places
|
|
print 'MAP_CPU_TO_PLACE', map_cpu_to_place
|
|
print
|
|
|
|
nplaces = len(places)
|
|
for nthreads in range(2, nplaces * 4 + 1):
|
|
if nthreads < nplaces and nplaces % nthreads > 0:
|
|
continue
|
|
if nthreads >= nplaces and nthreads % nplaces > 0:
|
|
continue
|
|
test_cpu_affinity(cpus, 'close', nthreads, places)
|
|
print
|
|
test_cpu_affinity(cpus, 'spread', nthreads, places)
|
|
print
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|