algo2025/backtracking/plotter.py

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set style
sns.set_theme(style="whitegrid")
plt.rcParams['font.sans-serif'] = ['SimHei']  # Use SimHei for Chinese characters if needed, or just English
plt.rcParams['axes.unicode_minus'] = False

output_dir = "backtracking"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

def save_plot(filename):
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, filename), dpi=300)
    plt.close()

# 1. Monte Carlo Estimation
try:
    df_mc = pd.read_csv(os.path.join(output_dir, "mc_estimation.csv"))
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_mc, x="n", y="estimated_nodes", marker="o")
    plt.title("Monte Carlo Estimation of Search Tree Size")
    plt.xlabel("Number of Item Types (n)")
    plt.ylabel("Estimated Nodes (Log Scale)")
    plt.yscale("log")
    save_plot("mc_estimation.png")
except Exception as e:
    print(f"Error plotting MC: {e}")

# 2. Cost Approximation Analysis
try:
    df_level = pd.read_csv(os.path.join(output_dir, "cost_approx_level.csv"))
    # Filter for a specific n, e.g., n=15 or 20
    target_n = 20
    df_level_n = df_level[df_level["n"] == target_n]

    if not df_level_n.empty:
        plt.figure(figsize=(10, 6))
        sns.boxplot(data=df_level_n, x="level", y="ratio")
        plt.title(f"Cost Function Approximation Ratio vs Level (n={target_n})")
        plt.xlabel("Search Tree Level")
        plt.ylabel("Ratio (Bound / True Value)")
        plt.axhline(1.0, color='r', linestyle='--')
        save_plot("cost_ratio_level.png")

    df_n = pd.read_csv(os.path.join(output_dir, "cost_approx_n.csv"))
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_n, x="n", y="avg_ratio", marker="o")
    plt.title("Average Cost Approximation Ratio vs Input Size")
    plt.xlabel("Number of Item Types (n)")
    plt.ylabel("Average Ratio")
    save_plot("cost_ratio_n.png")
except Exception as e:
    print(f"Error plotting Cost Analysis: {e}")

# 3. New Cost Function Comparison
try:
    df_new = pd.read_csv(os.path.join(output_dir, "new_cost_analysis.csv"))

    # Nodes
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_new, x="n", y="nodes", hue="method", marker="o")
    plt.title("Nodes Visited: Fractional vs Simple Bound")
    plt.xlabel("n")
    plt.ylabel("Nodes Visited (Log Scale)")
    plt.yscale("log")
    save_plot("new_cost_nodes.png")

    # Time
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_new, x="n", y="time_us", hue="method", marker="o")
    plt.title("Execution Time: Fractional vs Simple Bound")
    plt.xlabel("n")
    plt.ylabel("Time (microseconds)")
    save_plot("new_cost_time.png")
except Exception as e:
    print(f"Error plotting New Cost Analysis: {e}")

# 4. Multiple Knapsack Comparison
try:
    df_mk = pd.read_csv(os.path.join(output_dir, "multiple_knapsack.csv"))

    # Nodes
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_mk, x="n", y="nodes", hue="method", marker="o")
    plt.title("Multiple Knapsack: Nodes Visited Comparison")
    plt.xlabel("n")
    plt.ylabel("Nodes Visited")
    plt.yscale("log")
    save_plot("mk_nodes.png")

    # Time
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=df_mk, x="n", y="time_us", hue="method", marker="o")
    plt.title("Multiple Knapsack: Execution Time Comparison")
    plt.xlabel("n")
    plt.ylabel("Time (microseconds)")
    save_plot("mk_time.png")
except Exception as e:
    print(f"Error plotting Multiple Knapsack: {e}")