← Back to Leaderboard

SFT Scaling Law

Agent: SLDAgent
Model: o4-mini
Best R²: 0.999264
Mean R²: 0.995574
Min R²: 0.981141
Runs: 5

All Runs (sorted by R²)

Best Run 2 R² = 0.999264
Python
import numpy as np
from scipy.optimize import minimize

# EVOLVE-BLOCK-START
def scaling_law_func(data_points, params):
    """
    4-parameter logistic-saturating law:
      L(D) = L_inf + R / (1 + (D / D0)^alpha)
    params = [logD0, logalpha, logR, L_inf]
    """
    D = np.atleast_1d(np.squeeze(data_points)).astype(np.float64)
    p = np.ravel(params).astype(np.float64)
    D0    = np.exp(p[0])
    alpha = np.exp(p[1])
    R     = np.exp(p[2])
    L_inf = p[3]
    denom = 1.0 + np.power(D / D0, alpha)
    # avoid div-by-zero
    return L_inf + R / (denom + 1e-12)

def fit_scaling_law(data_points, loss_values):
    """
    Fit L(D) = L_inf + R / (1 + (D / D0)^alpha)
    by minimizing combined MSE + log-MSE for robust fit.
    Returns params = [logD0, logalpha, logR, L_inf].
    """
    # Prepare data
    D = np.atleast_1d(np.squeeze(data_points)).astype(np.float64)
    L = np.ravel(loss_values).astype(np.float64)
    D = np.maximum(D, 1e-8)
    eps = 1e-8

    Lmin, Lmax = L.min(), L.max()
    logL = np.log(L + eps)

    # Bounds for [logD0, logalpha, logR, L_inf]
    bounds = [
        (np.log(D.min() * 0.01), np.log(D.max() * 100.0)),   # logD0
        (-5.0, 5.0),                                          # logalpha
        (np.log(eps), np.log(max(Lmax - Lmin, eps) * 10.0)),  # logR
        (0.0, Lmin)                                           # L_inf
    ]

    # Multi-start seeds
    D0_seeds = [np.median(D), D.min() * 5.0, D.max() / 5.0]
    alpha_seeds = [0.3, 0.5, 1.0]
    R0 = max(Lmax - Lmin, 1e-2)
    Linf0 = Lmin

    best_p, best_obj = None, np.inf

    for D0_guess in D0_seeds:
        for alpha_guess in alpha_seeds:
            p0 = np.array([
                np.log(D0_guess),
                np.log(alpha_guess),
                np.log(R0),
                Linf0
            ], dtype=np.float64)

            def obj(p):
                pred = scaling_law_func(D, p)
                mse = np.mean((pred - L) ** 2)
                log_mse = np.mean((np.log(pred + eps) - logL) ** 2)
                return mse + log_mse

            res = minimize(
                obj,
                p0,
                method='L-BFGS-B',
                bounds=bounds,
                options={'ftol': 1e-9, 'maxiter': 3000}
            )
            if res.success and res.fun < best_obj:
                best_obj, best_p = res.fun, res.x

    # Fallback if no start succeeded
    if best_p is None:
        best_p = np.array([
            np.log(np.median(D)),
            np.log(0.5),
            np.log(R0),
            Linf0
        ], dtype=np.float64)

    return best_p
# EVOLVE-BLOCK-END
#2 Run 5 R² = 0.999261
#3 Run 3 R² = 0.999118
#4 Run 4 R² = 0.999086
#5 Run 1 R² = 0.981141