Enable optimized GPU runs from Python launcher

2026-04-30 18:31:31 +08:00
parent da4d56ccf7
commit e0d0673c8e
1 changed files with 135 additions and 12 deletions
--- a/makefile_and_run.py
+++ b/makefile_and_run.py
@@ -9,6 +9,8 @@


 import AMSS_NCKU_Input as input_data
+import os
+import shutil
 import subprocess
 import time

@@ -56,6 +58,111 @@ BUILD_JOBS = 64

 ##################################################################

+def _truthy(value, default=False):
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    text = str(value).strip().lower()
+    if text == "":
+        return default
+    return text in ("1", "yes", "y", "true", "on", "enable", "enabled")
+
+
+def _input_or_env(input_name, env_name, default=None):
+    if env_name in os.environ:
+        return os.environ[env_name]
+    return getattr(input_data, input_name, default)
+
+
+def _start_cuda_mps_if_requested(runtime_env):
+    if input_data.GPU_Calculation != "yes":
+        return False
+
+    default_auto_mps = int(getattr(input_data, "MPI_processes", 1)) > 1
+    auto_mps = _truthy(
+        _input_or_env("CUDA_Auto_MPS", "AMSS_CUDA_AUTO_MPS", default_auto_mps),
+        default=default_auto_mps,
+    )
+    if not auto_mps:
+        return False
+
+    mps_control = shutil.which("nvidia-cuda-mps-control")
+    if not mps_control:
+        print(" CUDA MPS control command was not found; running without MPS.")
+        return False
+
+    uid = os.getuid()
+    pipe_dir = str(_input_or_env("CUDA_MPS_PIPE_DIRECTORY", "CUDA_MPS_PIPE_DIRECTORY",
+                                 f"/tmp/amss-ncku-mps-{uid}"))
+    log_dir = str(_input_or_env("CUDA_MPS_LOG_DIRECTORY", "CUDA_MPS_LOG_DIRECTORY",
+                                f"/tmp/amss-ncku-mps-log-{uid}"))
+    os.makedirs(pipe_dir, exist_ok=True)
+    os.makedirs(log_dir, exist_ok=True)
+
+    mps_env = runtime_env.copy()
+    mps_env["CUDA_MPS_PIPE_DIRECTORY"] = pipe_dir
+    mps_env["CUDA_MPS_LOG_DIRECTORY"] = log_dir
+
+    if os.path.exists(os.path.join(pipe_dir, "control")):
+        runtime_env.update({
+            "CUDA_MPS_PIPE_DIRECTORY": pipe_dir,
+            "CUDA_MPS_LOG_DIRECTORY": log_dir,
+        })
+        print(f" Reusing CUDA MPS daemon: {pipe_dir}")
+        return False
+
+    print(f" Starting CUDA MPS daemon for this run: {pipe_dir}")
+    result = subprocess.run([mps_control, "-d"], env=mps_env, text=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    if result.returncode != 0:
+        print(" CUDA MPS daemon did not start; running without MPS.")
+        if result.stdout:
+            print(result.stdout, end="")
+        return False
+
+    runtime_env.update({
+        "CUDA_MPS_PIPE_DIRECTORY": pipe_dir,
+        "CUDA_MPS_LOG_DIRECTORY": log_dir,
+    })
+    return True
+
+
+def _stop_cuda_mps(runtime_env):
+    mps_control = shutil.which("nvidia-cuda-mps-control")
+    if not mps_control:
+        return
+    subprocess.run([mps_control], input="quit\n", env=runtime_env, text=True,
+                   stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+
+def _gpu_runtime_env():
+    runtime_env = os.environ.copy()
+
+    defaults = {
+        "AMSS_INTERP_FAST": "1",
+        "AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP": "1",
+        "AMSS_CUDA_KEEP_ALL_LEVELS": "1",
+    }
+    for key, value in defaults.items():
+        runtime_env.setdefault(key, value)
+
+    optional_overrides = {
+        "AMSS_INTERP_FAST_COMPARE": "AMSS_Interp_Fast_Compare",
+        "AMSS_INTERP_FAST_COMPARE_LIMIT": "AMSS_Interp_Fast_Compare_Limit",
+        "AMSS_INTERP_FAST_COMPARE_TOL": "AMSS_Interp_Fast_Compare_Tol",
+        "AMSS_GPU_STAGE_TIMING": "AMSS_GPU_Stage_Timing",
+        "AMSS_GPU_STAGE_TIMING_EVERY": "AMSS_GPU_Stage_Timing_Every",
+    }
+    for env_name, input_name in optional_overrides.items():
+        if env_name not in runtime_env and hasattr(input_data, input_name):
+            runtime_env[env_name] = str(getattr(input_data, input_name))
+
+    return runtime_env
+
+
+##################################################################
+


 ##################################################################
@@ -145,6 +252,8 @@ def run_ABE():
    print(                                                      )

    ## Define the command to run; cast other values to strings as needed
+    mpi_env = None
+    started_mps = False
    
    if (input_data.GPU_Calculation == "no"):
        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
@@ -153,21 +262,35 @@ def run_ABE():
    elif (input_data.GPU_Calculation == "yes"):
        mpi_command         = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE_CUDA"
        mpi_command_outfile = "ABEGPU_out.log"
+        mpi_env = _gpu_runtime_env()
+        started_mps = _start_cuda_mps_if_requested(mpi_env)
+        print(" GPU optimized runtime switches:")
+        print(f"   AMSS_INTERP_FAST={mpi_env.get('AMSS_INTERP_FAST', '')}")
+        print(f"   AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP={mpi_env.get('AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP', '')}")
+        print(f"   AMSS_CUDA_KEEP_ALL_LEVELS={mpi_env.get('AMSS_CUDA_KEEP_ALL_LEVELS', '')}")
+        if "CUDA_MPS_PIPE_DIRECTORY" in mpi_env:
+            print(f"   CUDA_MPS_PIPE_DIRECTORY={mpi_env['CUDA_MPS_PIPE_DIRECTORY']}")
 
-    ## Execute the MPI command and stream output
-    mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
+    try:
+        ## Execute the MPI command and stream output
+        mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE,
+                                       stderr=subprocess.STDOUT, text=True, env=mpi_env)

-    ## Write ABE run output to file while printing to stdout
-    with open(mpi_command_outfile, 'w') as file0:  
-        ## Read and print output lines; also write each line to file
-        for line in mpi_process.stdout:
-            print(line, end='')  # stream output in real time
-            file0.write(line)    # write the line to file
-            file0.flush()        # flush to ensure each line is written immediately (optional)            
-    file0.close()
+        ## Write ABE run output to file while printing to stdout
+        with open(mpi_command_outfile, 'w') as file0:
+            ## Read and print output lines; also write each line to file
+            for line in mpi_process.stdout:
+                print(line, end='')  # stream output in real time
+                file0.write(line)    # write the line to file
+                file0.flush()        # flush to ensure each line is written immediately (optional)

-    ## Wait for the process to finish
-    mpi_return_code = mpi_process.wait()
+        ## Wait for the process to finish
+        mpi_return_code = mpi_process.wait()
+        if mpi_return_code != 0:
+            raise subprocess.CalledProcessError(mpi_return_code, mpi_command)
+    finally:
+        if started_mps:
+            _stop_cuda_mps(mpi_env)
    
    print(                                           )
    print( " The ABE/ABEGPU simulation is finished " )