algo2025/greed/src/multimachine.cpp

#include <iostream>
#include <vector>
#include <algorithm>
#include <numeric>
#include <cmath>
#include <fstream>
#include <random>
#include <chrono>
#include <iomanip>
#include <climits>

using namespace std;

// --- Data Structures ---

struct Job {
    int id;
    long long duration;
};

struct ExperimentResult {
    int n;
    int m;
    long long greedy_ls_makespan;
    long long greedy_lpt_makespan;
    long long optimal_makespan; // -1 if failed
    double ls_time_us;
    double lpt_time_us;
    double opt_time_us;
};

// --- Algorithms ---

// Greedy 1: List Scheduling (Arbitrary/Online)
long long greedy_ls(int m, const vector<Job>& jobs) {
    if (jobs.empty()) return 0;
    vector<long long> machines(m, 0);
    for (const auto& job : jobs) {
        // Find machine with min load
        int min_idx = 0;
        for (int i = 1; i < m; ++i) {
            if (machines[i] < machines[min_idx]) {
                min_idx = i;
            }
        }
        machines[min_idx] += job.duration;
    }
    return *max_element(machines.begin(), machines.end());
}

// Greedy 2: LPT (Longest Processing Time)
long long greedy_lpt(int m, vector<Job> jobs) { // Note: pass by value to sort copy
    if (jobs.empty()) return 0;
    sort(jobs.begin(), jobs.end(), [](const Job& a, const Job& b) {
        return a.duration > b.duration;
    });

    vector<long long> machines(m, 0);
    // Optimization: Use a min-priority queue if m is large, but for small m linear scan is fine/faster due to cache
    for (const auto& job : jobs) {
        int min_idx = 0;
        for (int i = 1; i < m; ++i) {
            if (machines[i] < machines[min_idx]) {
                min_idx = i;
            }
        }
        machines[min_idx] += job.duration;
    }
    return *max_element(machines.begin(), machines.end());
}

// Optimal Solver: Branch and Bound
// Global variables for recursion to avoid passing too many args
long long best_makespan;
int G_m;
vector<Job> G_jobs;
vector<long long> G_machines;
long long start_time_opt;
bool time_out;

void dfs(int job_idx, long long current_max) {
    if (time_out) return;

    // Check timeout (e.g., 100ms per instance for batch tests)
    if ((clock() - start_time_opt) / CLOCKS_PER_SEC > 1.0) { // 1 second timeout
        time_out = true;
        return;
    }

    // Pruning 1: If current max load >= best solution found so far, prune
    if (current_max >= best_makespan) return;

    // Base case: all jobs assigned
    if (job_idx == G_jobs.size()) {
        best_makespan = current_max;
        return;
    }

    // Pruning 2: Theoretical lower bound
    // If (sum of remaining jobs + current total load) / m > best_makespan, prune?
    // A simpler bound: max(current_max, (sum of remaining + sum of current loads) / m)
    // Calculating sum every time is slow, can be optimized.

    long long job_len = G_jobs[job_idx].duration;

    // Try to assign job to each machine
    for (int i = 0; i < G_m; ++i) {
        // Optimization: Symmetry breaking
        // If this machine has same load as previous machine, and we tried previous, skip this one.
        // This assumes machines are initially 0.
        // A simpler symmetry break: if machines[i] == machines[i-1] (and they are interchangeable), skip.
        // Requires machines to be sorted or checked.
        // For now, simpler check: if this is the first empty machine, stop after trying it.
        if (G_machines[i] == 0) {
            G_machines[i] += job_len;
            dfs(job_idx + 1, max(current_max, G_machines[i]));
            G_machines[i] -= job_len;
            break; // Don't try other empty machines
        }

        if (G_machines[i] + job_len < best_makespan) {
            G_machines[i] += job_len;
            dfs(job_idx + 1, max(current_max, G_machines[i]));
            G_machines[i] -= job_len;
        }
    }
}

long long solve_optimal(int m, vector<Job> jobs) {
    if (jobs.empty()) return 0;

    // Heuristic: LPT gives a good initial bound
    vector<Job> sorted_jobs = jobs;
    sort(sorted_jobs.begin(), sorted_jobs.end(), [](const Job& a, const Job& b) {
        return a.duration > b.duration;
    });

    best_makespan = greedy_lpt(m, sorted_jobs);
    G_m = m;
    G_jobs = sorted_jobs;
    G_machines.assign(m, 0);
    time_out = false;
    start_time_opt = clock();

    dfs(0, 0);

    if (time_out) return -1;
    return best_makespan;
}

// --- Test Generation ---

vector<Job> generate_jobs(int n, int min_val, int max_val) {
    vector<Job> jobs(n);
    random_device rd;
    mt19937 gen(rd());
    uniform_int_distribution<> dis(min_val, max_val);
    for (int i = 0; i < n; ++i) {
        jobs[i] = {i, (long long)dis(gen)};
    }
    return jobs;
}

// --- Main Experiments ---

void run_experiments() {
    ofstream out("results/algo_comparison.csv");
    out << "m,n,ls_makespan,lpt_makespan,opt_makespan,ls_time,lpt_time,opt_time,ls_ratio,lpt_ratio\n";

    cout << "Running random experiments..." << endl;

    vector<int> ms = {3, 5, 8};
    vector<int> ns = {10, 15, 20, 25, 30, 50, 100};

    for (int m : ms) {
        for (int n : ns) {
            int runs = 10; // More runs for faster algos
            if (n > 20) runs = 20;

            for (int r = 0; r < runs; ++r) {
                vector<Job> jobs = generate_jobs(n, 10, 100);

                auto t1 = chrono::high_resolution_clock::now();
                long long ls_res = greedy_ls(m, jobs);
                auto t2 = chrono::high_resolution_clock::now();

                auto t3 = chrono::high_resolution_clock::now();
                long long lpt_res = greedy_lpt(m, jobs);
                auto t4 = chrono::high_resolution_clock::now();

                long long opt_res = -1;
                double opt_dur = 0;

                // Only run optimal for small n
                if (n <= 18) {
                    auto t5 = chrono::high_resolution_clock::now();
                    opt_res = solve_optimal(m, jobs);
                    auto t6 = chrono::high_resolution_clock::now();
                    opt_dur = chrono::duration<double, micro>(t6 - t5).count();
                }

                double ls_dur = chrono::duration<double, micro>(t2 - t1).count();
                double lpt_dur = chrono::duration<double, micro>(t4 - t3).count();

                double ls_ratio = (opt_res != -1 && opt_res != 0) ? (double)ls_res / opt_res : -1.0;
                double lpt_ratio = (opt_res != -1 && opt_res != 0) ? (double)lpt_res / opt_res : -1.0;

                out << m << "," << n << ","
                    << ls_res << "," << lpt_res << "," << opt_res << ","
                    << ls_dur << "," << lpt_dur << "," << opt_dur << ","
                    << ls_ratio << "," << lpt_ratio << "\n";
            }
        }
    }
    out.close();
    cout << "Experiments complete. Results saved." << endl;
}

void verify_worst_cases() {
    ofstream out("results/worst_case_verification.csv");
    out << "case_type,m,n,input_desc,greedy_res,opt_res,ratio,theory_bound\n";

    // Case 1: LS Worst Case
    // m machines. Input: m*(m-1) jobs of size 1, then 1 job of size m.
    // Example m=3. 6 jobs of size 1, 1 job of size 3.
    // Greedy: [1,1,3], [1,1], [1,1] -> Max 5.
    // Opt: [3], [1,1,1], [1,1,1] -> Max 3.
    // Ratio 5/3 approx 1.666. Theory 2 - 1/3 = 1.666.

    vector<int> test_ms = {3, 4, 5};
    for (int m : test_ms) {
        vector<Job> jobs;
        int num_small = m * (m - 1);
        for(int i=0; i<num_small; ++i) jobs.push_back({i, 1});
        jobs.push_back({num_small, (long long)m});

        long long res_ls = greedy_ls(m, jobs);
        long long res_opt = solve_optimal(m, jobs);
        double ratio = (double)res_ls / res_opt;
        double bound = 2.0 - 1.0/m;

        out << "LS_Worst," << m << "," << jobs.size() << ","
            << "\"m*(m-1) 1s + one m\"" << ","
            << res_ls << "," << res_opt << "," << ratio << "," << bound << "\n";
    }

    // Case 2: LPT Worst Case
    // Known example: m=2, Jobs {3, 3, 2, 2, 2}
    // LPT: M1[3, 2, 2] (7), M2[3, 2] (5). Max 7.
    // Opt: M1[3, 3] (6), M2[2, 2, 2] (6). Max 6.
    // Ratio 7/6 = 1.1666. Theory 4/3 - 1/(3m) = 1.33 - 0.166 = 1.166.
    {
        int m = 2;
        vector<Job> jobs = { {0,3}, {1,3}, {2,2}, {3,2}, {4,2} };
        long long res_lpt = greedy_lpt(m, jobs);
        long long res_opt = solve_optimal(m, jobs); // Should be fast
        double ratio = (double)res_lpt / res_opt;
        double bound = 4.0/3.0 - 1.0/(3.0*m);

        out << "LPT_Worst," << m << "," << jobs.size() << ","
            << "\"{3,3,2,2,2}\"" << ","
            << res_lpt << "," << res_opt << "," << ratio << "," << bound << "\n";
    }

    out.close();
    cout << "Worst case verification complete." << endl;
}

int main() {
    verify_worst_cases();
    run_experiments();
    return 0;
}