first commit

2025-12-18 16:00:22 +08:00
commit 785f306726
69 changed files with 33171 additions and 0 deletions
--- a/greed/labtemplate.typ
+++ b/greed/labtemplate.typ
@@ -0,0 +1,166 @@
+#let times = "Times LT Pro"
+#let times = "Times New Roman"
+#let song = (times, "Noto Serif CJK SC")
+#let hei = (times, "Noto Sans CJK SC")
+#let kai = (times, "Noto Serif CJK SC")
+#let xbsong = (times, "Noto Serif CJK SC")
+#let fsong = (times, "Noto Serif CJK SC")
+#let code = (times, "JetBrains Mono")
+#let nudtlabpaper(title: "", 
+  author1: "", 
+  id1: "", 
+  advisor: "",
+  jobtitle: "",
+  lab: "",
+  date: "",
+  header_str: "",
+  minimal_cover: false,
+  body) = {
+  // Set the document's basic properties.
+  set document(author: author1, title: title)
+  set page(
+    
+    margin: (left: 30mm, right: 30mm, top: 30mm, bottom: 30mm),
+  )
+
+  // If minimal_cover is requested, render an otherwise-empty first page
+  // that only displays the "实验时间" near the bottom center.
+  if minimal_cover {
+    v(158pt)
+    align(center)[
+      #block(text(weight: 700, size: 30pt, font: hei, tracking: 1pt, "2025秋 -《算法设计与分析》"))
+    ]
+    align(center)[
+      #block(text(weight: 700, size: 24pt, font: song, tracking: 1pt, "贪心算法分析实验报告"))
+    ]
+
+
+    // Keep standard margins but push content down toward the bottom.
+    v(220pt)
+    align(center)[
+      #block(text(size: 14pt, font: song, tracking: 9pt, "实验时间"))
+    ]
+    v(2pt)
+    align(center)[
+      #block(text(size: 16pt, font: song, date))
+    ]
+    pagebreak()
+  } else {
+    // Title row.
+    v(158pt)
+    align(center)[
+      #block(text(weight: 700, size: 30pt, font: hei, tracking: 1pt, "2025秋 -《算法设计与分析》"))
+    ]
+    align(center)[
+      #block(text(weight: 700, size: 24pt, font: song, tracking: 1pt, "动态规划算法分析实验报告"))
+    ]
+
+    v(103pt)
+    pad(
+      left: 1em,
+      right: 1em,
+      grid(
+          // columns: (80pt, 1fr),
+          // rows: (17pt, auto),
+          // text(weight: 700, size: 16pt, font: song, "实验名称："),
+          // align(center, text(weight: "regular", size: 16pt, font: song, title)),
+          // text(""),
+          // line(length: 100%)
+      )
+      // #block(text(weight: 700, 1.75em, title))
+      // underline(text(weight: 700, size: 16pt, font: song, title))
+    )
+
+    // Author information.
+
+    v(62.5pt)
+
+    grid(
+      columns: (0.25fr, 0.25fr, 0.25fr, 0.25fr),
+      rows: (20pt, 8pt, 20pt, 8pt, 20pt, 8pt, 20pt, 12pt),
+      text(size: 14pt, font: song, tracking: 9pt, "学员姓名"),
+      align(center, text(size: 14pt, font: song, author1)),
+      text(size: 14pt, font: song, tracking: 54pt, "学号"),
+      align(center, text(size: 14pt, font: times, id1)),
+      text(""),
+      line(length: 100%),
+      text(""),
+      line(length: 100%),
+      text(size: 14pt, font: song, tracking: 9pt, "指导教员"),
+      align(center, text(size: 14pt, font: song, advisor)),
+      text(size: 14pt, font: song, tracking: 54pt, "职称"),
+      align(center, text(size: 14pt, font: song, jobtitle)),
+      text(""),
+      line(length: 100%),
+      text(""),
+      line(length: 100%),
+      text(size: 14pt, font: song, tracking: 9pt, "实验室"),
+      align(center, text(size: 14pt, font: song, lab)),
+      text(size: 14pt, font: song, tracking: 9pt, "实验时间"),
+      align(center, text(size: 14pt, font: song, date)),
+      text(""),
+      line(length: 100%),
+      text(""),
+      line(length: 100%),
+    )
+
+    v(50.5pt)
+    align(center, text(font: hei, size: 15pt, "国防科技大学教育训练部制"))
+
+    pagebreak()
+  }
+  
+  set page(
+    margin: (left: 30mm, right: 30mm, top: 30mm, bottom: 30mm),
+    numbering: "i",
+    number-align: center,
+  )
+
+  v(14pt)
+  align(center)[
+    #block(text(font: hei, size: 14pt, "《本科实验报告》填写说明"))
+  ]
+
+  v(14pt)
+  text("")
+  par(first-line-indent: 2em, text(font: song, size: 12pt, "实验报告内容编排应符合以下要求："))
+  
+  par(first-line-indent: 2em, text(font: fsong, size: 12pt, "（1）采用A4（21cm×29.7cm）白色复印纸，单面黑字。上下左右各侧的页边距均为3cm；缺省文档网格：字号为小4号，中文为宋体，英文和阿拉伯数字为Times New Roman，每页30行，每行36字；页脚距边界为2.5cm，页码置于页脚、居中，采用小5号阿拉伯数字从1开始连续编排，封面不编页码。"))
+
+  par(first-line-indent: 2em, text(font: fsong, size: 12pt, "（2）报告正文最多可设四级标题，字体均为黑体，第一级标题字号为4号，其余各级标题为小4号；标题序号第一级用“一、”、“二、”……，第二级用“（一）”、“（二）” ……，第三级用“1.”、“2.” ……，第四级用“（1）”、“（2）” ……，分别按序连续编排。"))
+
+  par(first-line-indent: 2em, text(font: fsong, size: 12pt, "（3）正文插图、表格中的文字字号均为5号。"))
+
+  pagebreak()
+
+  set page(
+    margin: (left: 30mm, right: 30mm, top: 30mm, bottom: 30mm),
+    numbering: "1",
+    number-align: center,
+  )
+
+  set heading(numbering: "1.1")
+  // set text(font: hei, lang: "zh")
+  
+  show heading: it => box(width: 100%)[
+    #v(0.50em)
+    #set text(font: hei)
+    #counter(heading).display()
+    // #h(0.5em)
+    #it.body
+  ]
+  // Main body.
+  set par(justify: true)
+
+  body
+}
+
+#let para(t) = par(first-line-indent: 2em, text(font: song, size: 10.5pt, t))
+#let subpara(t) = par(first-line-indent: 2em, text(font: song, size: 10pt, t))
+#let cb(t) = block(
+  text(font: ("Consolas","FangSong_GB2312"), t),
+  fill: luma(240),
+  inset: 1pt,
+  radius: 4pt,
+  // width: 100%,
+)
--- a/greed/main.pdf
+++ b/greed/main.pdf
--- a/greed/main.typ
+++ b/greed/main.typ
@@ -0,0 +1,202 @@
+#import "labtemplate.typ": *
+#show: nudtlabpaper.with(
+  author1: "程景愉", 
+  id1: "202302723005", 
+  advisor: " 胡罡",
+  jobtitle: "教授",
+  lab: "306-707",
+  date: "2025.12.18",
+  header_str: "贪心算法分析实验报告",
+  minimal_cover: true,
+)
+
+#set page(header: [
+    #set par(spacing: 6pt)
+    #align(center)[#text(size: 11pt)[《算法设计与分析》实验报告]]
+    #v(-0.3em)
+    #line(length: 100%, stroke: (thickness: 1pt))
+],)
+
+#show heading: it => box(width: 100%)[
+    #v(0.50em)
+    #set text(font: hei)
+    #it.body
+]
+
+#outline(title: "目录",depth: 3, indent: 1em)
+// #pagebreak()
+#outline(
+  title: [图目录],
+  target: figure.where(kind: image),
+)
+
+#show heading: it => box(width: 100%)[
+    #v(0.50em)
+    #set text(font: hei)
+    #counter(heading).display()
+    #it.body
+]
+#set enum(indent: 0.5em,body-indent: 0.5em,)
+#pagebreak()
+
+= 实验介绍
+#para[
+贪心算法（Greedy Algorithm）是指在对问题求解时，总是做出在当前看来是最好的选择。也就是说，不从整体最优上加以考虑，算法得到的是在某种意义上的局部最优解。多机调度问题是经典的 NP-Hard 问题，本实验旨在通过实现和对比不同的贪心策略（List Scheduling 和 LPT），深入理解贪心算法的近似比性质，并探讨其在实际场景（如 GPU 集群调度）中的应用。
+]
+
+= 实验内容
+#para[
+本实验主要围绕多机调度问题的贪心算法展开，并扩展至在线 GPU 集群调度模拟。具体内容包括：
+]
+ 实现两种贪心策略：任意顺序列表调度 (List Scheduling, LS) 和 最长处理时间优先 (Longest Processing Time, LPT)。
+ 实现基于分支限界 (Branch and Bound) 的最优解求解算法，作为性能评估的基准。
+ 构造特定的“最坏情况”输入，验证贪心算法的理论近似比下界。
+ 通过大量随机测试样本，统计不同算法的近似比分布及运行时间，分析 $m$ (机器数) 和 $n$ (作业数) 对性能的影响。
+ （附加）模拟 GPU 集群在线调度场景，设计并对比不同的调度策略在不同负载下的表现。
+
+= 实验要求
+#para[
+针对多机调度问题，实验具体要求如下：
+]
+ 针对多机调度问题，实现 LS 和 LPT 两种贪心算法。
+ 实现遍历的最优解求解算法（分支限界法）。
+ 构造最坏情况输入，结合理论证明进行讨论。
+ 固定 $m, n$，随机产生大量样本，计算贪心解与最优解的比值（近似比），并分析其概率分布。
+ 改变 $m, n$，对比分析结果。
+ 附加：模拟 GPU 集群调度，考虑利用率 $eta$ 和用户延迟 $delta$，设计多种策略并分析。
+
+= 实验步骤
+
+== 算法设计
+
+=== 算法一：列表调度 (List Scheduling, LS)
+#para[
+LS 算法是最朴素的贪心策略。它按照作业输入的任意顺序，依次将作业分配给当前负载最小的机器。
+该算法是一种在线算法，其时间复杂度为 $O(n log m)$ (使用优先队列维护机器负载) 或 $O(n m)$ (线性扫描)。
+理论上，LS 算法的近似比为 $2 - 1/m$。
+]
+```cpp
+// 核心代码片段
+long long greedy_ls(int m, const vector<Job>& jobs) {
+    vector<long long> machines(m, 0);
+    for (const auto& job : jobs) {
+        int min_idx = 0; // Find machine with min load
+        for (int i = 1; i < m; ++i) {
+            if (machines[i] < machines[min_idx]) min_idx = i;
+        }
+        machines[min_idx] += job.duration;
+    }
+    return *max_element(machines.begin(), machines.end());
+}
+```
+
+=== 算法二：最长处理时间优先 (LPT)
+#para[
+LPT 算法在 LS 的基础上增加了预处理步骤：将所有作业按处理时间递减排序，然后依次分配给负载最小的机器。
+排序操作使得较大的作业优先被处理，从而避免了最后剩下一个大作业导致机器负载极不均衡的情况。
+该算法的时间复杂度主要由排序决定，为 $O(n log n)$。
+理论上，LPT 算法的近似比为 $4/3 - 1/(3m)$。
+]
+
+=== 算法三：最优解 (Branch and Bound)
+#para[
+为了评估贪心算法的性能，我们需要求得问题的最优解。由于多机调度是 NP-Complete 问题，我们采用深度优先搜索配合分支限界 (Branch and Bound) 来求解。
+剪枝策略包括：
+]
+1. 当前最大负载已经超过已知最优解，停止搜索。
+2. 理论下界剪枝：如果 `max(当前最大负载, (剩余作业总长 + 当前总负载)/m)` 超过已知最优解，停止搜索。
+3. 对称性剪枝：若多台机器当前负载相同，则分配给它们是等价的，只尝试第一台。
+
+== 最坏情况构造与分析
+=== LS 算法最坏情况
+#para[
+*构造方法：* 对于 $m$ 台机器，输入 $m(m-1)$ 个时长为 1 的小作业，紧接着 1 个时长为 $m$ 的大作业。
+]
+#para[
+*分析：* LS 算法会将前 $m(m-1)$ 个小作业均匀分配给 $m$ 台机器，每台机器负载为 $m-1$。最后的大作业将被分配给任意一台机器，使其最终负载变为 $(m-1) + m = 2m-1$。
+而最优解是将所有小作业均匀分配给 $m-1$ 台机器（每台负载 $m$），将大作业单独分配给剩下一台机器（负载 $m$），此时 MakeSpan 为 $m$。
+近似比为 $(2m-1)/m = 2 - 1/m$。
+本实验通过代码验证了 $m=3, 4, 5$ 时的该情况，结果与理论完全一致。
+]
+
+=== LPT 算法最坏情况
+#para[
+*构造方法：* 经典的 LPT 最坏情况较为复杂，例如 $m=2$ 时，作业集为 $\{3, 3, 2, 2, 2\}$。
+]
+#para[
+*分析：* 排序后为 $3, 3, 2, 2, 2$。
+LPT 分配：M1: $3, 2, 2$ (总 7), M2: $3, 2$ (总 5)。MakeSpan = 7。
+最优解：M1: $3, 3$ (总 6), M2: $2, 2, 2$ (总 6)。MakeSpan = 6。
+近似比 $7/6 approx 1.167$。理论界 $4/3 - 1/6 = 7/6$。实验验证吻合。
+]
+
+== 实验数据与可视化
+#para[
+我们对 $m in {3, 5, 8}$ 和 $n in {10, dots, 100}$ 进行了大量随机测试。
+]
+
+#figure(
+  image("results/ratio_boxplot.png", width: 80%),
+  caption: [LS 与 LPT 算法近似比分布对比],
+)
+
+#figure(
+  image("results/ratio_vs_n.png", width: 80%),
+  caption: [近似比随作业数量 n 的变化趋势],
+)
+
+#figure(
+  image("results/time_comparison.png", width: 80%),
+  caption: [算法平均运行时间对比],
+)
+
+= 实验结果分析
+#para[
+1.*近似比性能：* 从箱线图可以看出，LPT 算法的近似比极其接近 1（通常在 1.0 - 1.05 之间），性能极其优越且稳定。相比之下，LS 算法的近似比分布较宽，平均在 1.1 - 1.3 之间，且随着 $m$ 的增加，最差情况（近似比上界）有升高的趋势，符合 $2 - 1/m$ 的理论预测。
+]
+#para[
+2.*规模的影响：* 随着作业数 $n$ 的增加，LS 的近似比往往会下降并趋于稳定。这是因为大量随机作业往往能“填平”机器间的负载差异。LPT 则始终保持高效。
+]
+#para[
+3.*运行时间：* 贪心算法（LS, LPT）的运行时间极短（微秒级），且随 $n$ 线性或近线性增长。最优解算法（B&B）随 $n$ 指数级增长，当 $n > 20$ 时已难以在短时间内求解，验证了 NP-Hard 问题的计算复杂性。
+]
+
+= 实验总结
+#para[
+本实验深入分析了多机调度问题的贪心求解策略。实验结果表明，虽然 LS 算法实现简单，但在最坏情况下性能较差。简单的排序预处理（LPT 策略）能带来巨大的性能提升，使其在绝大多数随机及构造测试中都能获得极接近最优解的结果。这启示我们在设计贪心算法时，合理的贪心顺序（如优先处理“困难”或“大”的任务）至关重要。
+]
+
+#pagebreak()
+= 附加：GPU 集群在线调度模拟
+
+== 场景描述
+#para[
+模拟一个拥有 $m=64$ 块 GPU 的集群任务调度。任务到达服从泊松分布，单机执行时间服从均匀分布。任务支持并行 ($k$ 块 GPU)，但存在并行效率损耗：效率因子 $E_k = sigma^(log_2 k)$，其中 $sigma in [0.75, 0.95]$。系统目标是平衡 *集群利用率 ($eta$)* 和 *用户平均延迟 ($delta$)*。
+]
+
+== 调度策略设计
+#para[
+我们设计了三种策略进行对比：
+]
+ *策略 A：保守策略 (Conservative)* 总是为每个任务分配 $k=1$ 块 GPU。其思路是最大化计算资源的“有效性”，避免并行损耗。
+ *策略 B：激进策略 (Aggressive)* 总是尽可能分配最大的并行度（如 $k=32$ 或 $64$）。其思路是最小化单任务执行时间，但忽略了巨大的资源浪费。
+ *策略 C：自适应策略 (Adaptive)* 根据当前等待队列的长度动态调整 $k$。若队列为空，使用高并行度加速；若队列拥堵，降低并行度以提高吞吐量。
+
+== 模拟结果
+
+#figure(
+  image("results/gpu_sim_plots.png", width: 90%),
+  caption: [不同负载下三种策略的利用率与延迟对比],
+)
+
+#para[
+实验在轻负载 ($lambda=0.5$)、中负载 ($lambda=0.9$) 和重负载 ($lambda=1.1$) 下进行了模拟。结果显示：
+]
+ *保守策略 (Conservative)*：在所有负载下都能保持较低的延迟。
+ *激进策略 (Aggressive)*：表现极差。由于并行效率损失，导致系统迅速过载，用户延迟呈爆炸式增长。
+ *自适应策略 (Adaptive)*：表现最为均衡。在轻负载时加速任务，在重负载时保证系统稳定性。
+
+== 结论
+#para[
+在具有并行开销的资源调度场景中，盲目追求高并行度（激进策略）是不可取的。通过感知系统负载来动态调整资源分配粒度的 *自适应策略*，是更为优越的解决方案。
+]
--- a/greed/multimachine
+++ b/greed/multimachine
--- a/greed/results/algo_comparison.csv
+++ b/greed/results/algo_comparison.csv
@@ -0,0 +1,331 @@
+m,n,ls_makespan,lpt_makespan,opt_makespan,ls_time,lpt_time,opt_time,ls_ratio,lpt_ratio
+3,10,187,184,182,0.341,0.661,40.546,1.02747,1.01099
+3,10,186,166,163,0.22,0.481,33.152,1.1411,1.0184
+3,10,178,168,165,0.23,0.381,79.219,1.07879,1.01818
+3,10,214,165,162,0.261,0.4,46.478,1.32099,1.01852
+3,10,168,158,150,0.13,0.381,55.324,1.12,1.05333
+3,10,216,200,191,0.121,0.3,81.624,1.13089,1.04712
+3,10,257,233,223,0.15,0.291,96.611,1.15247,1.04484
+3,10,217,208,202,0.09,0.251,99.817,1.07426,1.0297
+3,10,181,180,180,0.181,0.3,43.962,1.00556,1
+3,10,246,223,217,0.16,0.361,77.706,1.13364,1.02765
+3,15,313,316,309,0.231,0.631,5092.6,1.01294,1.02265
+3,15,225,211,211,0.461,0.881,3654.51,1.06635,1
+3,15,296,271,270,0.561,1.092,14606.3,1.0963,1.0037
+3,15,337,300,294,0.431,0.611,6054.11,1.14626,1.02041
+3,15,331,312,309,0.25,0.712,2791.6,1.0712,1.00971
+3,15,233,208,206,0.35,0.702,5490.77,1.13107,1.00971
+3,15,275,275,273,0.25,0.541,6773.35,1.00733,1.00733
+3,15,261,245,243,0.541,1.102,9826.36,1.07407,1.00823
+3,15,311,308,298,0.441,0.982,3541.14,1.04362,1.03356
+3,15,324,305,304,0.331,0.601,10941.8,1.06579,1.00329
+3,20,433,413,-1,0.511,1.112,0,-1,-1
+3,20,358,333,-1,0.331,0.972,0,-1,-1
+3,20,409,387,-1,0.361,0.872,0,-1,-1
+3,20,434,419,-1,0.331,0.671,0,-1,-1
+3,20,430,406,-1,0.24,0.832,0,-1,-1
+3,20,366,342,-1,0.261,0.781,0,-1,-1
+3,20,348,333,-1,0.28,0.762,0,-1,-1
+3,20,444,427,-1,0.36,0.652,0,-1,-1
+3,20,418,365,-1,0.291,0.641,0,-1,-1
+3,20,387,377,-1,0.23,0.721,0,-1,-1
+3,25,435,428,-1,0.27,1.062,0,-1,-1
+3,25,537,523,-1,0.291,0.982,0,-1,-1
+3,25,537,514,-1,0.34,0.812,0,-1,-1
+3,25,509,497,-1,0.321,0.932,0,-1,-1
+3,25,474,450,-1,0.31,0.752,0,-1,-1
+3,25,481,438,-1,0.291,0.802,0,-1,-1
+3,25,501,485,-1,0.36,0.792,0,-1,-1
+3,25,484,440,-1,0.271,0.932,0,-1,-1
+3,25,549,542,-1,0.29,0.862,0,-1,-1
+3,25,487,459,-1,0.331,0.711,0,-1,-1
+3,25,489,484,-1,0.25,1.032,0,-1,-1
+3,25,491,463,-1,0.291,0.701,0,-1,-1
+3,25,553,501,-1,0.31,0.872,0,-1,-1
+3,25,468,466,-1,0.271,0.791,0,-1,-1
+3,25,515,489,-1,0.261,0.832,0,-1,-1
+3,25,486,476,-1,0.301,0.871,0,-1,-1
+3,25,471,461,-1,0.33,0.842,0,-1,-1
+3,25,521,483,-1,0.261,0.872,0,-1,-1
+3,25,533,506,-1,0.291,0.922,0,-1,-1
+3,25,447,422,-1,0.28,0.822,0,-1,-1
+3,30,631,593,-1,0.351,1.232,0,-1,-1
+3,30,600,557,-1,0.27,1.082,0,-1,-1
+3,30,602,576,-1,0.39,1.072,0,-1,-1
+3,30,573,553,-1,0.351,1.102,0,-1,-1
+3,30,491,483,-1,0.321,1.072,0,-1,-1
+3,30,635,578,-1,0.31,1.072,0,-1,-1
+3,30,624,587,-1,0.41,1.022,0,-1,-1
+3,30,577,558,-1,0.3,1.062,0,-1,-1
+3,30,557,554,-1,0.351,1.002,0,-1,-1
+3,30,681,648,-1,0.311,1.122,0,-1,-1
+3,30,567,540,-1,0.331,0.922,0,-1,-1
+3,30,612,598,-1,0.34,1.062,0,-1,-1
+3,30,580,532,-1,0.361,0.881,0,-1,-1
+3,30,560,554,-1,0.301,1.122,0,-1,-1
+3,30,587,556,-1,0.361,1.072,0,-1,-1
+3,30,584,563,-1,0.381,1.082,0,-1,-1
+3,30,583,563,-1,0.28,1.082,0,-1,-1
+3,30,627,601,-1,0.331,1.072,0,-1,-1
+3,30,552,541,-1,0.301,1.092,0,-1,-1
+3,30,590,583,-1,0.341,1.092,0,-1,-1
+3,50,919,919,-1,0.551,2.194,0,-1,-1
+3,50,863,846,-1,0.541,2.064,0,-1,-1
+3,50,832,818,-1,0.591,1.894,0,-1,-1
+3,50,1074,1042,-1,0.471,1.833,0,-1,-1
+3,50,1007,988,-1,0.471,1.944,0,-1,-1
+3,50,960,937,-1,0.511,1.924,0,-1,-1
+3,50,894,889,-1,0.571,1.944,0,-1,-1
+3,50,1032,996,-1,0.521,1.923,0,-1,-1
+3,50,868,865,-1,0.511,1.974,0,-1,-1
+3,50,990,957,-1,0.481,1.934,0,-1,-1
+3,50,814,808,-1,0.521,2.024,0,-1,-1
+3,50,972,934,-1,0.46,1.904,0,-1,-1
+3,50,1005,980,-1,0.511,2.184,0,-1,-1
+3,50,984,935,-1,0.521,1.864,0,-1,-1
+3,50,1021,993,-1,0.501,1.763,0,-1,-1
+3,50,1012,999,-1,0.451,1.963,0,-1,-1
+3,50,947,916,-1,0.471,1.913,0,-1,-1
+3,50,948,939,-1,0.451,1.994,0,-1,-1
+3,50,1004,989,-1,0.531,1.803,0,-1,-1
+3,50,913,901,-1,0.501,2.054,0,-1,-1
+3,100,1982,1955,-1,1.012,6.763,0,-1,-1
+3,100,1946,1935,-1,0.891,4.107,0,-1,-1
+3,100,1839,1828,-1,0.901,4.629,0,-1,-1
+3,100,1915,1912,-1,0.872,4.408,0,-1,-1
+3,100,1831,1802,-1,0.942,4.348,0,-1,-1
+3,100,1830,1822,-1,0.842,4.238,0,-1,-1
+3,100,1928,1886,-1,0.922,4.568,0,-1,-1
+3,100,1837,1825,-1,0.921,4.358,0,-1,-1
+3,100,1864,1858,-1,0.922,4.138,0,-1,-1
+3,100,1733,1711,-1,0.872,4.548,0,-1,-1
+3,100,1866,1832,-1,0.841,4.138,0,-1,-1
+3,100,1857,1827,-1,0.922,4.438,0,-1,-1
+3,100,1938,1930,-1,0.832,4.548,0,-1,-1
+3,100,1983,1976,-1,0.942,4.368,0,-1,-1
+3,100,1956,1951,-1,0.922,4.308,0,-1,-1
+3,100,1804,1806,-1,1.012,4.399,0,-1,-1
+3,100,1735,1735,-1,0.882,4.398,0,-1,-1
+3,100,2065,2024,-1,0.932,4.458,0,-1,-1
+3,100,1871,1850,-1,0.972,4.088,0,-1,-1
+3,100,1792,1790,-1,0.942,4.428,0,-1,-1
+5,10,160,128,128,0.551,0.541,9.638,1.25,1
+5,10,156,129,129,0.31,0.501,5.721,1.2093,1
+5,10,158,137,137,0.301,0.341,4.689,1.15328,1
+5,10,134,100,100,0.241,0.24,5.15,1.34,1
+5,10,108,82,82,0.341,0.431,2.284,1.31707,1
+5,10,114,89,88,0.271,0.33,10.53,1.29545,1.01136
+5,10,164,134,134,0.171,0.31,13.375,1.22388,1
+5,10,144,123,123,0.251,0.581,5.25,1.17073,1
+5,10,113,99,99,0.221,0.37,5.711,1.14141,1
+5,10,157,135,135,0.18,0.3,8.005,1.16296,1
+5,15,176,135,131,0.341,0.591,583.706,1.34351,1.03053
+5,15,206,184,179,0.24,0.531,3995.27,1.15084,1.02793
+5,15,195,165,165,0.27,0.531,4.779,1.18182,1
+5,15,206,183,174,0.251,0.461,2258.44,1.18391,1.05172
+5,15,141,134,131,0.311,0.591,730.011,1.07634,1.0229
+5,15,138,136,133,0.35,0.531,1305.74,1.03759,1.02256
+5,15,199,173,168,0.451,0.451,2779.8,1.18452,1.02976
+5,15,164,141,137,0.411,0.641,1209.13,1.19708,1.0292
+5,15,167,145,140,0.36,0.611,5887.82,1.19286,1.03571
+5,15,224,183,169,0.27,0.592,2172.73,1.32544,1.08284
+5,20,262,260,-1,0.461,1.042,0,-1,-1
+5,20,295,263,-1,0.37,0.762,0,-1,-1
+5,20,260,256,-1,0.371,0.771,0,-1,-1
+5,20,303,270,-1,0.38,0.882,0,-1,-1
+5,20,240,188,-1,0.361,0.731,0,-1,-1
+5,20,232,224,-1,0.35,0.742,0,-1,-1
+5,20,297,268,-1,0.271,0.841,0,-1,-1
+5,20,216,201,-1,0.35,0.862,0,-1,-1
+5,20,247,229,-1,0.331,0.822,0,-1,-1
+5,20,267,231,-1,0.301,0.831,0,-1,-1
+5,25,342,279,-1,0.371,1.112,0,-1,-1
+5,25,345,316,-1,0.421,1.112,0,-1,-1
+5,25,335,296,-1,0.511,0.952,0,-1,-1
+5,25,301,271,-1,0.41,1.032,0,-1,-1
+5,25,312,290,-1,0.39,0.921,0,-1,-1
+5,25,335,319,-1,0.441,0.992,0,-1,-1
+5,25,271,257,-1,0.391,0.942,0,-1,-1
+5,25,363,319,-1,0.341,1.042,0,-1,-1
+5,25,294,269,-1,0.43,0.872,0,-1,-1
+5,25,265,259,-1,0.401,0.832,0,-1,-1
+5,25,298,265,-1,0.361,1.092,0,-1,-1
+5,25,316,273,-1,0.421,0.972,0,-1,-1
+5,25,329,289,-1,0.39,1.031,0,-1,-1
+5,25,295,272,-1,0.431,0.962,0,-1,-1
+5,25,320,283,-1,0.38,0.921,0,-1,-1
+5,25,356,333,-1,0.391,0.992,0,-1,-1
+5,25,350,329,-1,0.401,1.122,0,-1,-1
+5,25,314,304,-1,0.361,0.972,0,-1,-1
+5,25,328,282,-1,0.401,0.861,0,-1,-1
+5,25,306,282,-1,0.331,0.972,0,-1,-1
+5,30,369,350,-1,0.431,1.082,0,-1,-1
+5,30,355,344,-1,0.4,1.173,0,-1,-1
+5,30,396,353,-1,0.471,1.163,0,-1,-1
+5,30,328,298,-1,0.431,1.243,0,-1,-1
+5,30,376,354,-1,0.491,1.142,0,-1,-1
+5,30,361,314,-1,0.411,1.132,0,-1,-1
+5,30,334,327,-1,0.431,1.082,0,-1,-1
+5,30,371,357,-1,0.451,1.062,0,-1,-1
+5,30,336,321,-1,0.411,1.273,0,-1,-1
+5,30,363,337,-1,0.431,1.152,0,-1,-1
+5,30,443,394,-1,0.531,1.273,0,-1,-1
+5,30,359,345,-1,0.511,1.403,0,-1,-1
+5,30,374,311,-1,0.481,1.112,0,-1,-1
+5,30,326,324,-1,0.451,1.302,0,-1,-1
+5,30,312,302,-1,0.461,1.252,0,-1,-1
+5,30,346,335,-1,0.441,1.222,0,-1,-1
+5,30,349,317,-1,0.481,1.172,0,-1,-1
+5,30,351,344,-1,0.531,1.192,0,-1,-1
+5,30,373,343,-1,0.491,1.132,0,-1,-1
+5,30,375,338,-1,0.401,1.132,0,-1,-1
+5,50,553,532,-1,0.752,2.043,0,-1,-1
+5,50,563,543,-1,0.642,2.234,0,-1,-1
+5,50,566,551,-1,0.711,2.134,0,-1,-1
+5,50,575,550,-1,0.701,9.098,0,-1,-1
+5,50,624,584,-1,0.691,2.234,0,-1,-1
+5,50,521,503,-1,0.661,2.324,0,-1,-1
+5,50,622,563,-1,0.651,2.054,0,-1,-1
+5,50,537,496,-1,0.631,1.944,0,-1,-1
+5,50,531,503,-1,0.662,2.023,0,-1,-1
+5,50,581,574,-1,0.731,2.043,0,-1,-1
+5,50,567,540,-1,0.672,2.204,0,-1,-1
+5,50,643,630,-1,0.741,1.914,0,-1,-1
+5,50,568,515,-1,0.721,2.044,0,-1,-1
+5,50,601,554,-1,0.751,2.064,0,-1,-1
+5,50,568,526,-1,0.602,2.204,0,-1,-1
+5,50,611,565,-1,0.731,2.144,0,-1,-1
+5,50,584,551,-1,0.681,2.053,0,-1,-1
+5,50,596,558,-1,0.771,2.104,0,-1,-1
+5,50,588,543,-1,0.681,2.084,0,-1,-1
+5,50,585,558,-1,0.681,2.094,0,-1,-1
+5,100,1095,1071,-1,1.303,5.039,0,-1,-1
+5,100,1163,1138,-1,1.332,4.709,0,-1,-1
+5,100,1197,1175,-1,1.453,4.749,0,-1,-1
+5,100,1094,1075,-1,1.302,4.679,0,-1,-1
+5,100,1083,1063,-1,1.242,4.549,0,-1,-1
+5,100,1136,1096,-1,1.302,4.599,0,-1,-1
+5,100,1035,1009,-1,1.343,4.669,0,-1,-1
+5,100,1069,1054,-1,1.353,4.698,0,-1,-1
+5,100,1212,1165,-1,1.382,4.758,0,-1,-1
+5,100,1178,1155,-1,1.373,4.919,0,-1,-1
+5,100,1062,1037,-1,1.332,4.889,0,-1,-1
+5,100,1130,1108,-1,1.362,4.509,0,-1,-1
+5,100,1099,1068,-1,1.313,4.678,0,-1,-1
+5,100,1095,1084,-1,1.333,5.009,0,-1,-1
+5,100,1155,1120,-1,1.343,4.849,0,-1,-1
+5,100,1151,1080,-1,1.383,4.869,0,-1,-1
+5,100,1171,1138,-1,1.273,4.648,0,-1,-1
+5,100,1204,1177,-1,1.312,4.739,0,-1,-1
+5,100,1138,1129,-1,1.342,4.659,0,-1,-1
+5,100,1228,1187,-1,1.202,4.809,0,-1,-1
+8,10,129,99,99,0.471,0.571,3.447,1.30303,1
+8,10,99,96,96,0.501,0.29,2.284,1.03125,1
+8,10,115,100,100,0.34,0.29,2.254,1.15,1
+8,10,83,83,83,0.461,0.3,2.234,1,1
+8,10,123,95,95,0.22,0.301,2.935,1.29474,1
+8,10,86,86,86,0.391,0.37,2.695,1,1
+8,10,95,95,95,0.381,0.721,2.916,1,1
+8,10,119,92,92,0.351,0.37,2.395,1.29348,1
+8,10,115,83,83,0.251,0.311,2.314,1.38554,1
+8,10,128,99,99,0.331,0.621,2.325,1.29293,1
+8,15,150,111,102,0.581,0.581,146.756,1.47059,1.08824
+8,15,161,109,109,1.553,3.337,60.944,1.47706,1
+8,15,116,109,109,1.573,2.405,45.105,1.06422,1
+8,15,151,111,106,2.384,3.526,210.084,1.42453,1.04717
+8,15,164,150,150,2.234,3.026,28.403,1.09333,1
+8,15,131,120,120,1.883,2.375,45.105,1.09167,1
+8,15,158,144,144,1.683,2.645,27.892,1.09722,1
+8,15,152,113,113,2.063,2.144,27.412,1.34513,1
+8,15,141,103,101,1.322,2.064,55.734,1.39604,1.0198
+8,15,98,77,76,1.774,1.954,83.096,1.28947,1.01316
+8,20,189,147,-1,1.944,4.739,0,-1,-1
+8,20,177,151,-1,2.554,3.486,0,-1,-1
+8,20,144,119,-1,2.826,3.737,0,-1,-1
+8,20,154,132,-1,2.235,3.727,0,-1,-1
+8,20,211,164,-1,2.325,3.647,0,-1,-1
+8,20,180,157,-1,1.824,4.839,0,-1,-1
+8,20,197,172,-1,2.003,4.899,0,-1,-1
+8,20,177,166,-1,2.295,3.196,0,-1,-1
+8,20,177,157,-1,1.974,2.805,0,-1,-1
+8,20,194,150,-1,1.733,4.728,0,-1,-1
+8,25,185,162,-1,3.337,4.498,0,-1,-1
+8,25,228,169,-1,2.254,3.296,0,-1,-1
+8,25,221,211,-1,1.182,2.465,0,-1,-1
+8,25,212,176,-1,0.611,1.202,0,-1,-1
+8,25,211,191,-1,0.551,1.082,0,-1,-1
+8,25,219,183,-1,0.461,1.142,0,-1,-1
+8,25,220,170,-1,0.531,1.132,0,-1,-1
+8,25,194,167,-1,0.441,1.172,0,-1,-1
+8,25,205,188,-1,0.451,1.162,0,-1,-1
+8,25,200,185,-1,0.671,1.202,0,-1,-1
+8,25,186,154,-1,0.531,0.962,0,-1,-1
+8,25,219,166,-1,0.451,1.142,0,-1,-1
+8,25,207,174,-1,0.491,1.062,0,-1,-1
+8,25,208,176,-1,0.611,1.123,0,-1,-1
+8,25,196,169,-1,0.571,1.192,0,-1,-1
+8,25,229,188,-1,0.661,1.092,0,-1,-1
+8,25,207,179,-1,0.641,1.042,0,-1,-1
+8,25,172,153,-1,0.701,0.932,0,-1,-1
+8,25,212,185,-1,0.511,1.252,0,-1,-1
+8,25,221,179,-1,0.521,0.872,0,-1,-1
+8,30,245,215,-1,0.621,1.352,0,-1,-1
+8,30,248,207,-1,0.541,1.473,0,-1,-1
+8,30,247,239,-1,0.701,1.433,0,-1,-1
+8,30,257,225,-1,0.541,1.353,0,-1,-1
+8,30,234,207,-1,0.641,1.112,0,-1,-1
+8,30,226,195,-1,0.661,1.483,0,-1,-1
+8,30,239,193,-1,0.782,1.433,0,-1,-1
+8,30,261,213,-1,0.681,1.383,0,-1,-1
+8,30,233,196,-1,0.581,1.373,0,-1,-1
+8,30,257,200,-1,0.751,1.503,0,-1,-1
+8,30,218,197,-1,0.681,1.292,0,-1,-1
+8,30,267,227,-1,0.622,1.202,0,-1,-1
+8,30,202,194,-1,0.672,1.412,0,-1,-1
+8,30,234,203,-1,0.601,1.242,0,-1,-1
+8,30,234,202,-1,0.672,1.392,0,-1,-1
+8,30,246,189,-1,0.671,1.443,0,-1,-1
+8,30,261,249,-1,0.672,1.302,0,-1,-1
+8,30,281,237,-1,0.591,1.633,0,-1,-1
+8,30,241,228,-1,0.541,1.403,0,-1,-1
+8,30,254,220,-1,0.541,1.473,0,-1,-1
+8,50,351,315,-1,1.192,2.535,0,-1,-1
+8,50,337,315,-1,1.072,2.424,0,-1,-1
+8,50,401,391,-1,1.082,2.605,0,-1,-1
+8,50,397,387,-1,0.972,2.635,0,-1,-1
+8,50,420,394,-1,1.021,2.514,0,-1,-1
+8,50,382,349,-1,0.882,2.565,0,-1,-1
+8,50,372,348,-1,0.952,2.404,0,-1,-1
+8,50,361,324,-1,1.022,2.144,0,-1,-1
+8,50,397,375,-1,1.042,2.584,0,-1,-1
+8,50,436,393,-1,0.912,2.595,0,-1,-1
+8,50,406,365,-1,0.902,2.745,0,-1,-1
+8,50,390,360,-1,1.112,2.595,0,-1,-1
+8,50,358,338,-1,1.012,2.585,0,-1,-1
+8,50,412,389,-1,0.942,2.425,0,-1,-1
+8,50,399,361,-1,1.032,2.415,0,-1,-1
+8,50,424,362,-1,0.882,2.374,0,-1,-1
+8,50,365,342,-1,0.992,2.425,0,-1,-1
+8,50,379,355,-1,0.982,2.424,0,-1,-1
+8,50,387,361,-1,0.822,2.274,0,-1,-1
+8,50,410,366,-1,0.952,2.485,0,-1,-1
+8,100,714,689,-1,1.834,5.65,0,-1,-1
+8,100,756,731,-1,1.884,5.64,0,-1,-1
+8,100,742,714,-1,1.874,5.49,0,-1,-1
+8,100,691,648,-1,1.854,5.24,0,-1,-1
+8,100,761,732,-1,1.964,5.29,0,-1,-1
+8,100,639,615,-1,1.833,5.17,0,-1,-1
+8,100,718,700,-1,1.903,5.259,0,-1,-1
+8,100,715,696,-1,1.933,5.38,0,-1,-1
+8,100,649,626,-1,2.004,5.28,0,-1,-1
+8,100,758,739,-1,1.744,5.31,0,-1,-1
+8,100,733,701,-1,1.974,5.239,0,-1,-1
+8,100,718,688,-1,1.763,5.34,0,-1,-1
+8,100,713,666,-1,1.893,5.2,0,-1,-1
+8,100,772,755,-1,1.824,5.32,0,-1,-1
+8,100,705,679,-1,2.004,4.849,0,-1,-1
+8,100,777,726,-1,1.814,5.059,0,-1,-1
+8,100,776,738,-1,1.883,5.08,0,-1,-1
+8,100,713,680,-1,1.804,5.51,0,-1,-1
+8,100,734,692,-1,2.014,5.38,0,-1,-1
+8,100,721,709,-1,1.883,5.39,0,-1,-1
--- a/greed/results/gpu_sim_plots.png
+++ b/greed/results/gpu_sim_plots.png
--- a/greed/results/gpu_sim_results.csv
+++ b/greed/results/gpu_sim_results.csv
@@ -0,0 +1,10 @@
+Load,Strategy,Utilization (eta),Avg Delay Penalty (delta),Score (Balanced)
+0.5,conservative,0.4012140025347487,0.0,0.5987859974652513
+0.5,aggressive,0.9990193308369786,34.41715150502985,3.4426958196660067
+0.5,adaptive,0.8222579304480057,5.087451279471799e-07,0.17774212042650714
+0.9,conservative,0.724052992773303,0.00013269873256010987,0.27596027709995297
+0.9,aggressive,0.9999425933302338,595.1345244746948,59.51350985413925
+0.9,adaptive,0.9766266112625738,0.033832922246208194,0.026756680962047044
+1.1,conservative,0.8882862947134675,0.06546476541988962,0.11826018182852142
+1.1,aggressive,0.9998654986492419,852.7230561640825,85.27244011775902
+1.1,adaptive,0.9388200169307546,0.7324962497082855,0.13442960804007398
--- a/greed/results/ratio_boxplot.png
+++ b/greed/results/ratio_boxplot.png
--- a/greed/results/ratio_vs_n.png
+++ b/greed/results/ratio_vs_n.png
--- a/greed/results/time_comparison.png
+++ b/greed/results/time_comparison.png
--- a/greed/results/worst_case_verification.csv
+++ b/greed/results/worst_case_verification.csv
@@ -0,0 +1,5 @@
+case_type,m,n,input_desc,greedy_res,opt_res,ratio,theory_bound
+LS_Worst,3,7,"m*(m-1) 1s + one m",5,3,1.66667,1.66667
+LS_Worst,4,13,"m*(m-1) 1s + one m",7,4,1.75,1.75
+LS_Worst,5,21,"m*(m-1) 1s + one m",9,5,1.8,1.8
+LPT_Worst,2,5,"{3,3,2,2,2}",7,6,1.16667,1.16667
--- a/greed/src/gpu_sim.py
+++ b/greed/src/gpu_sim.py
@@ -0,0 +1,224 @@
+import heapq
+import random
+import math
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+class Task:
+    def __init__(self, id, arrival_time, duration, sigma):
+        self.id = id
+        self.arrival_time = arrival_time
+        self.base_duration = duration # Time on 1 GPU
+        self.sigma = sigma
+        self.start_time = -1
+        self.finish_time = -1
+        self.assigned_k = 0
+
+    def get_exec_time(self, k):
+        # Efficiency = sigma ^ log2(k)
+        # Time = base / (k * efficiency)
+        if k == 1: return self.base_duration
+        eff = self.sigma ** math.log2(k)
+        return self.base_duration / (k * eff)
+
+class Event:
+    def __init__(self, time, type, data):
+        self.time = time
+        self.type = type # 'ARRIVAL' or 'FINISH'
+        self.data = data
+    
+    def __lt__(self, other):
+        return self.time < other.time
+
+class Cluster:
+    def __init__(self, total_gpus=64):
+        self.total_gpus = total_gpus
+        self.free_gpus = total_gpus
+        self.running_tasks = [] # List of (task, finish_time)
+
+class Simulator:
+    def __init__(self, strategy_name, arrival_rate, max_tasks=1000):
+        self.strategy_name = strategy_name
+        self.arrival_rate = arrival_rate
+        self.max_tasks = max_tasks
+        self.events = []
+        self.cluster = Cluster()
+        self.queue = []
+        self.finished_tasks = []
+        self.current_time = 0.0
+        self.total_gpu_busy_time = 0.0 # Integral of busy GPUs over time
+        self.last_update_time = 0.0
+
+    def schedule_events(self):
+        # Generate all arrivals upfront or dynamically
+        t = 0
+        for i in range(self.max_tasks):
+            dt = random.expovariate(self.arrival_rate)
+            t += dt
+            duration = random.uniform(10, 100)
+            sigma = random.uniform(0.75, 0.95)
+            task = Task(i, t, duration, sigma)
+            heapq.heappush(self.events, Event(t, 'ARRIVAL', task))
+
+    def update_metrics(self):
+        dt = self.current_time - self.last_update_time
+        busy_gpus = self.cluster.total_gpus - self.cluster.free_gpus
+        self.total_gpu_busy_time += busy_gpus * dt
+        self.last_update_time = self.current_time
+
+    def get_k(self, task):
+        # Strategies
+        available = self.cluster.free_gpus
+        q_len = len(self.queue)
+        
+        possible_ks = [1, 2, 4, 8, 16, 32, 64]
+        valid_ks = [k for k in possible_ks if k <= available]
+        
+        if not valid_ks: return 0
+        
+        if self.strategy_name == 'conservative':
+            # Always k=1
+            return 1 if 1 in valid_ks else 0
+
+        elif self.strategy_name == 'aggressive':
+            # Use max possible parallel, up to limit (e.g. 16 to avoid total waste)
+            # Or just max available
+            return valid_ks[-1]
+            
+        elif self.strategy_name == 'adaptive':
+            # Based on queue length
+            if q_len == 0:
+                target = 32 # High speed if no contention
+            elif q_len < 5:
+                target = 16
+            elif q_len < 10:
+                target = 8
+            elif q_len < 20:
+                target = 4
+            elif q_len < 50:
+                target = 2
+            else:
+                target = 1
+            
+            # Find largest valid k <= target
+            best_k = 1
+            for k in valid_ks:
+                if k <= target:
+                    best_k = k
+            return best_k
+        
+        return 1
+
+    def run(self):
+        self.schedule_events()
+        
+        while self.events or self.cluster.running_tasks:
+            if not self.events and not self.cluster.running_tasks:
+                break
+                
+            # Peek next event
+            if not self.events:
+                next_time = float('inf')
+            else:
+                next_time = self.events[0].time
+            
+            # Jump time
+            self.current_time = next_time
+            self.update_metrics()
+            
+            event = heapq.heappop(self.events)
+            
+            if event.type == 'ARRIVAL':
+                task = event.data
+                self.queue.append(task)
+            elif event.type == 'FINISH':
+                task = event.data
+                self.cluster.free_gpus += task.assigned_k
+                self.finished_tasks.append(task)
+            
+            # Try to schedule waiting tasks
+            # We iterate queue. Note: Standard queue is FIFO.
+            # We can't easily remove from middle if we skip, so we only look at head
+            # OR we can try to fit small tasks? 
+            # Simple FIFO: look at head. If can schedule, do it. Else stop (or continue?)
+            # Let's do Strict FIFO for simplicity and fairness
+            
+            while self.queue:
+                head_task = self.queue[0]
+                k = self.get_k(head_task)
+                if k > 0:
+                    # Assign
+                    self.queue.pop(0)
+                    head_task.assigned_k = k
+                    head_task.start_time = self.current_time
+                    exec_time = head_task.get_exec_time(k)
+                    head_task.finish_time = self.current_time + exec_time
+                    
+                    self.cluster.free_gpus -= k
+                    heapq.heappush(self.events, Event(head_task.finish_time, 'FINISH', head_task))
+                else:
+                    # Cannot schedule head task
+                    break
+
+    def calculate_results(self):
+        total_time = self.current_time
+        avg_utilization = self.total_gpu_busy_time / (total_time * 64)
+        
+        delays = []
+        for t in self.finished_tasks:
+            expected_finish = t.arrival_time + t.base_duration
+            if t.finish_time <= expected_finish:
+                delta = 0
+            else:
+                overdue = t.finish_time - expected_finish
+                # Normalized penalty
+                delta = (overdue / t.base_duration) ** 2
+            delays.append(delta)
+        
+        avg_delay = sum(delays) / len(delays) if delays else 0
+        return avg_utilization, avg_delay
+
+def run_simulations():
+    strategies = ['conservative', 'aggressive', 'adaptive']
+    # Load: Light (0.5), Medium (0.9), Heavy (1.1)
+    loads = [0.5, 0.9, 1.1] 
+    
+    results = []
+    
+    print("Running GPU Simulations...")
+    for load in loads:
+        for strat in strategies:
+            # Run multiple times to average? Just once for this demo with 1000 tasks
+            sim = Simulator(strat, load, max_tasks=1000)
+            sim.run()
+            eta, delta = sim.calculate_results()
+            results.append({
+                'Load': load,
+                'Strategy': strat,
+                'Utilization (eta)': eta,
+                'Avg Delay Penalty (delta)': delta,
+                'Score (Balanced)': (1-eta) + 0.1 * delta # Example lambda=0.1
+            })
+            print(f"Load {load}, Strat {strat}: Eta={eta:.3f}, Delta={delta:.3f}")
+
+    df = pd.DataFrame(results)
+    df.to_csv("results/gpu_sim_results.csv", index=False)
+    
+    # Plotting
+    plt.figure(figsize=(12, 5))
+    
+    plt.subplot(1, 2, 1)
+    sns.barplot(data=df, x='Load', y='Utilization (eta)', hue='Strategy')
+    plt.title('Cluster Utilization')
+    
+    plt.subplot(1, 2, 2)
+    sns.barplot(data=df, x='Load', y='Avg Delay Penalty (delta)', hue='Strategy')
+    plt.title('User Average Delay Penalty')
+    
+    plt.tight_layout()
+    plt.savefig('results/gpu_sim_plots.png')
+    plt.close()
+
+if __name__ == "__main__":
+    run_simulations()
--- a/greed/src/multimachine.cpp
+++ b/greed/src/multimachine.cpp
@@ -0,0 +1,273 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <numeric>
+#include <cmath>
+#include <fstream>
+#include <random>
+#include <chrono>
+#include <iomanip>
+#include <climits>
+
+using namespace std;
+
+// --- Data Structures ---
+
+struct Job {
+    int id;
+    long long duration;
+};
+
+struct ExperimentResult {
+    int n;
+    int m;
+    long long greedy_ls_makespan;
+    long long greedy_lpt_makespan;
+    long long optimal_makespan; // -1 if failed
+    double ls_time_us;
+    double lpt_time_us;
+    double opt_time_us;
+};
+
+// --- Algorithms ---
+
+// Greedy 1: List Scheduling (Arbitrary/Online)
+long long greedy_ls(int m, const vector<Job>& jobs) {
+    if (jobs.empty()) return 0;
+    vector<long long> machines(m, 0);
+    for (const auto& job : jobs) {
+        // Find machine with min load
+        int min_idx = 0;
+        for (int i = 1; i < m; ++i) {
+            if (machines[i] < machines[min_idx]) {
+                min_idx = i;
+            }
+        }
+        machines[min_idx] += job.duration;
+    }
+    return *max_element(machines.begin(), machines.end());
+}
+
+// Greedy 2: LPT (Longest Processing Time)
+long long greedy_lpt(int m, vector<Job> jobs) { // Note: pass by value to sort copy
+    if (jobs.empty()) return 0;
+    sort(jobs.begin(), jobs.end(), [](const Job& a, const Job& b) {
+        return a.duration > b.duration;
+    });
+    
+    vector<long long> machines(m, 0);
+    // Optimization: Use a min-priority queue if m is large, but for small m linear scan is fine/faster due to cache
+    for (const auto& job : jobs) {
+        int min_idx = 0;
+        for (int i = 1; i < m; ++i) {
+            if (machines[i] < machines[min_idx]) {
+                min_idx = i;
+            }
+        }
+        machines[min_idx] += job.duration;
+    }
+    return *max_element(machines.begin(), machines.end());
+}
+
+// Optimal Solver: Branch and Bound
+// Global variables for recursion to avoid passing too many args
+long long best_makespan;
+int G_m;
+vector<Job> G_jobs;
+vector<long long> G_machines;
+long long start_time_opt;
+bool time_out;
+
+void dfs(int job_idx, long long current_max) {
+    if (time_out) return;
+    
+    // Check timeout (e.g., 100ms per instance for batch tests)
+    if ((clock() - start_time_opt) / CLOCKS_PER_SEC > 1.0) { // 1 second timeout
+        time_out = true;
+        return;
+    }
+
+    // Pruning 1: If current max load >= best solution found so far, prune
+    if (current_max >= best_makespan) return;
+
+    // Base case: all jobs assigned
+    if (job_idx == G_jobs.size()) {
+        best_makespan = current_max;
+        return;
+    }
+
+    // Pruning 2: Theoretical lower bound
+    // If (sum of remaining jobs + current total load) / m > best_makespan, prune? 
+    // A simpler bound: max(current_max, (sum of remaining + sum of current loads) / m)
+    // Calculating sum every time is slow, can be optimized.
+    
+    long long job_len = G_jobs[job_idx].duration;
+    
+    // Try to assign job to each machine
+    for (int i = 0; i < G_m; ++i) {
+        // Optimization: Symmetry breaking
+        // If this machine has same load as previous machine, and we tried previous, skip this one.
+        // This assumes machines are initially 0. 
+        // A simpler symmetry break: if machines[i] == machines[i-1] (and they are interchangeable), skip.
+        // Requires machines to be sorted or checked. 
+        // For now, simpler check: if this is the first empty machine, stop after trying it.
+        if (G_machines[i] == 0) {
+            G_machines[i] += job_len;
+            dfs(job_idx + 1, max(current_max, G_machines[i]));
+            G_machines[i] -= job_len;
+            break; // Don't try other empty machines
+        }
+        
+        if (G_machines[i] + job_len < best_makespan) {
+            G_machines[i] += job_len;
+            dfs(job_idx + 1, max(current_max, G_machines[i]));
+            G_machines[i] -= job_len;
+        }
+    }
+}
+
+long long solve_optimal(int m, vector<Job> jobs) {
+    if (jobs.empty()) return 0;
+    
+    // Heuristic: LPT gives a good initial bound
+    vector<Job> sorted_jobs = jobs;
+    sort(sorted_jobs.begin(), sorted_jobs.end(), [](const Job& a, const Job& b) {
+        return a.duration > b.duration;
+    });
+    
+    best_makespan = greedy_lpt(m, sorted_jobs);
+    G_m = m;
+    G_jobs = sorted_jobs;
+    G_machines.assign(m, 0);
+    time_out = false;
+    start_time_opt = clock();
+    
+    dfs(0, 0);
+    
+    if (time_out) return -1;
+    return best_makespan;
+}
+
+// --- Test Generation ---
+
+vector<Job> generate_jobs(int n, int min_val, int max_val) {
+    vector<Job> jobs(n);
+    random_device rd;
+    mt19937 gen(rd());
+    uniform_int_distribution<> dis(min_val, max_val);
+    for (int i = 0; i < n; ++i) {
+        jobs[i] = {i, (long long)dis(gen)};
+    }
+    return jobs;
+}
+
+// --- Main Experiments ---
+
+void run_experiments() {
+    ofstream out("results/algo_comparison.csv");
+    out << "m,n,ls_makespan,lpt_makespan,opt_makespan,ls_time,lpt_time,opt_time,ls_ratio,lpt_ratio\n";
+    
+    cout << "Running random experiments..." << endl;
+    
+    vector<int> ms = {3, 5, 8};
+    vector<int> ns = {10, 15, 20, 25, 30, 50, 100};
+    
+    for (int m : ms) {
+        for (int n : ns) {
+            int runs = 10; // More runs for faster algos
+            if (n > 20) runs = 20;
+            
+            for (int r = 0; r < runs; ++r) {
+                vector<Job> jobs = generate_jobs(n, 10, 100);
+                
+                auto t1 = chrono::high_resolution_clock::now();
+                long long ls_res = greedy_ls(m, jobs);
+                auto t2 = chrono::high_resolution_clock::now();
+                
+                auto t3 = chrono::high_resolution_clock::now();
+                long long lpt_res = greedy_lpt(m, jobs);
+                auto t4 = chrono::high_resolution_clock::now();
+                
+                long long opt_res = -1;
+                double opt_dur = 0;
+                
+                // Only run optimal for small n
+                if (n <= 18) { 
+                    auto t5 = chrono::high_resolution_clock::now();
+                    opt_res = solve_optimal(m, jobs);
+                    auto t6 = chrono::high_resolution_clock::now();
+                    opt_dur = chrono::duration<double, micro>(t6 - t5).count();
+                }
+                
+                double ls_dur = chrono::duration<double, micro>(t2 - t1).count();
+                double lpt_dur = chrono::duration<double, micro>(t4 - t3).count();
+                
+                double ls_ratio = (opt_res != -1 && opt_res != 0) ? (double)ls_res / opt_res : -1.0;
+                double lpt_ratio = (opt_res != -1 && opt_res != 0) ? (double)lpt_res / opt_res : -1.0;
+                
+                out << m << "," << n << ","
+                    << ls_res << "," << lpt_res << "," << opt_res << ","
+                    << ls_dur << "," << lpt_dur << "," << opt_dur << ","
+                    << ls_ratio << "," << lpt_ratio << "\n";
+            }
+        }
+    }
+    out.close();
+    cout << "Experiments complete. Results saved." << endl;
+}
+
+void verify_worst_cases() {
+    ofstream out("results/worst_case_verification.csv");
+    out << "case_type,m,n,input_desc,greedy_res,opt_res,ratio,theory_bound\n";
+    
+    // Case 1: LS Worst Case
+    // m machines. Input: m*(m-1) jobs of size 1, then 1 job of size m.
+    // Example m=3. 6 jobs of size 1, 1 job of size 3.
+    // Greedy: [1,1,3], [1,1], [1,1] -> Max 5.
+    // Opt: [3], [1,1,1], [1,1,1] -> Max 3.
+    // Ratio 5/3 approx 1.666. Theory 2 - 1/3 = 1.666.
+    
+    vector<int> test_ms = {3, 4, 5};
+    for (int m : test_ms) {
+        vector<Job> jobs;
+        int num_small = m * (m - 1);
+        for(int i=0; i<num_small; ++i) jobs.push_back({i, 1});
+        jobs.push_back({num_small, (long long)m});
+        
+        long long res_ls = greedy_ls(m, jobs);
+        long long res_opt = solve_optimal(m, jobs);
+        double ratio = (double)res_ls / res_opt;
+        double bound = 2.0 - 1.0/m;
+        
+        out << "LS_Worst," << m << "," << jobs.size() << ","
+            << "\"m*(m-1) 1s + one m\"" << ","
+            << res_ls << "," << res_opt << "," << ratio << "," << bound << "\n";
+    }
+
+    // Case 2: LPT Worst Case
+    // Known example: m=2, Jobs {3, 3, 2, 2, 2}
+    // LPT: M1[3, 2, 2] (7), M2[3, 2] (5). Max 7.
+    // Opt: M1[3, 3] (6), M2[2, 2, 2] (6). Max 6.
+    // Ratio 7/6 = 1.1666. Theory 4/3 - 1/(3m) = 1.33 - 0.166 = 1.166.
+    {
+        int m = 2;
+        vector<Job> jobs = { {0,3}, {1,3}, {2,2}, {3,2}, {4,2} };
+        long long res_lpt = greedy_lpt(m, jobs);
+        long long res_opt = solve_optimal(m, jobs); // Should be fast
+        double ratio = (double)res_lpt / res_opt;
+        double bound = 4.0/3.0 - 1.0/(3.0*m);
+        
+        out << "LPT_Worst," << m << "," << jobs.size() << ","
+            << "\"{3,3,2,2,2}\"" << ","
+            << res_lpt << "," << res_opt << "," << ratio << "," << bound << "\n";
+    }
+
+    out.close();
+    cout << "Worst case verification complete." << endl;
+}
+
+int main() {
+    verify_worst_cases();
+    run_experiments();
+    return 0;
+}
--- a/greed/src/plot_algo.py
+++ b/greed/src/plot_algo.py
@@ -0,0 +1,58 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+
+def plot_core_algo():
+    if not os.path.exists("results/algo_comparison.csv"):
+        print("Core algo results not found.")
+        return
+
+    df = pd.read_csv("results/algo_comparison.csv")
+    
+    # 1. Approximation Ratio Distribution (Boxplot)
+    # Filter valid ratios (where opt was computed)
+    df_ratios = df[df['ls_ratio'] != -1]
+    
+    if not df_ratios.empty:
+        plt.figure(figsize=(10, 6))
+        # Melt for seaborn
+        df_melt = df_ratios.melt(id_vars=['m', 'n'], value_vars=['ls_ratio', 'lpt_ratio'], 
+                                 var_name='Algorithm', value_name='Ratio')
+        
+        sns.boxplot(data=df_melt, x='m', y='Ratio', hue='Algorithm')
+        plt.title('Approximation Ratio Distribution by Machine Count (m)')
+        plt.ylabel('Approximation Ratio (Greedy / Optimal)')
+        plt.xlabel('Number of Machines (m)')
+        plt.savefig('results/ratio_boxplot.png')
+        plt.close()
+        
+        # Plot vs N
+        plt.figure(figsize=(10, 6))
+        sns.lineplot(data=df_melt, x='n', y='Ratio', hue='Algorithm', style='m', markers=True)
+        plt.title('Approximation Ratio vs Job Count (n)')
+        plt.savefig('results/ratio_vs_n.png')
+        plt.close()
+
+    # 2. Running Time Comparison
+    plt.figure(figsize=(10, 6))
+    df_time = df.groupby(['n', 'm']).mean().reset_index()
+    # Log scale for time
+    plt.plot(df_time['n'], df_time['ls_time'], label='List Scheduling', marker='o')
+    plt.plot(df_time['n'], df_time['lpt_time'], label='LPT', marker='x')
+    
+    # Only plot Opt time where available (it drops to 0/empty for large n)
+    df_opt = df_time[df_time['opt_time'] > 0]
+    if not df_opt.empty:
+        plt.plot(df_opt['n'], df_opt['opt_time'], label='Optimal (B&B)', marker='s')
+        
+    plt.yscale('log')
+    plt.title('Average Running Time vs Input Size (n)')
+    plt.ylabel('Time (microseconds)')
+    plt.xlabel('Number of Jobs (n)')
+    plt.legend()
+    plt.savefig('results/time_comparison.png')
+    plt.close()
+
+if __name__ == "__main__":
+    plot_core_algo()
--- a/greed/task.txt
+++ b/greed/task.txt
@@ -0,0 +1,27 @@
+对多机调度算法进行分析，具体要求如下：
+
+    针对多机调度问题，实现基于两种贪心策略的贪心算法；
+    针对多机调度问题，实现遍历的最优解求解算法（也可以用回溯等其它算法）；
+    针对两种贪心策略，构造问题输入，使得贪心算法结果接近最差，结合证明过程展开讨论；
+    以处理机数量m, 作业数量n为输入规模，固定m, n，随机产生大量测试样本，用两种贪心算法分别求解，并计算最优解（无法在合理时间内完成最优解计算则记录为“最优解求解失败”）及近似解上界，对贪心解近似比的概率分布展开分析；
+    改变m和n，对不同组合的结果进行对比分析，并撰写实验报告。
+
+附加：模拟一个GPU集群在线调度问题，该集群有m块GPU，共享开放给全校师生。该集群有以下特点：
+
+    用户提交任务的时间点符合泊松分布，单个任务使用单块GPU所需的时间符合均匀分布。
+    假设提交的任务均具有高度并行性，可拆分到任意多块GPU并行执行，但是由于节点间通信、机架间通信等开销，k块GPU并行时单块效率降为原来的σlogk倍。例如2块GPU并行时，单块GPU性能为σ；4块GPU并行时，单块GPU的性能为σ2。对于不同任务，σ为[0.75, 0.95]之间均匀分布的小数。
+    GPU数量m = 64，并行运算时通常使用2的整幂次块GPU，如2、4、8、16、32、64。
+    对任务i，用户期望的完成时间为任务提交时刻ti，加上单块GPU执行任务所需时间τi。
+    系统有两个关键指标：集群利用率η，用户平均延迟δ。对于η，即任务期内集群所有GPU的平均利用率。对于δ，在用户期望时间之内完成的任务其延迟为0，超出之后按平方惩罚：设任务i结束时间为ti′，则其延迟为δi=0, ti′≤ti+τi时，或 δi=((ti′−ti−τi)/τi)^2, ti′>ti+τi时。
+
+ 请针对该场景：
+
+    考虑多种优化目标：1）仅考虑η；2）仅考虑δ；3）均衡的优化目标(1−η)+λδ，其中λ为设置的常数平衡因子。
+
+    模拟生成多组任务集（注意考虑轻负载、中等负载、重负载等不同情况）。
+
+    设计两种以上调度策略。
+
+    使用调度策略对计算过程进行模拟，按照不同的优化目标对结果进行分析对比，撰写实验报告。
+
+