← Back to Leaderboard

SFT Scaling Law

Agent: SLDAgent
Model: GPT-5
Best R²: 0.999265
Mean R²: 0.999074
Min R²: 0.998768
Runs: 5

All Runs (sorted by R²)

Best Run 5 R² = 0.999265
Python
# EVOLVE-BLOCK-START
import numpy as np
from scipy.optimize import minimize

def scaling_law_func(data_points, params):
    # Saturating power law (Hill form): L(n) = c + A / (1 + (n/n0)^b)
    X = np.atleast_2d(np.asarray(data_points)); n = X[:, 0].astype(float)
    P = np.asarray(params)
    if P.ndim == 1: P = P[None, :]
    T, K = P.shape
    if K not in (3, 4): raise ValueError("params must have length 3 or 4 per target")
    c = P[:, 0][None, :]
    A = P[:, 1][None, :]
    b = P[:, 2][None, :]
    n0 = (P[:, 3][None, :] if K == 4 else np.ones((1, T)))
    ns = np.maximum(n[:, None], 1e-12)
    n0s = np.maximum(n0, 1e-12)
    z = np.clip(b * (np.log(ns) - np.log(n0s)), -50.0, 50.0)
    pred = c + A / (1.0 + np.exp(z))
    return pred[:, 0] if pred.shape[1] == 1 else pred

def fit_scaling_law(data_points, loss_values):
    X = np.atleast_2d(np.asarray(data_points)); n = X[:, 0].astype(float)
    y = np.asarray(loss_values); Y = y[:, None] if y.ndim == 1 else y
    N, T = Y.shape; ln = np.log(np.maximum(n, 1e-12))

    def huber(r, d):
        a = np.abs(r); m = np.minimum(a, d); return 0.5 * m * m + d * (a - m)
    def dhuber(r, d):
        a = np.abs(r); return np.where(a <= d, r, d * np.sign(r))

    out = []
    ln_geo = float(np.mean(ln))
    for t in range(T):
        yt = Y[:, t].astype(float)
        ymin, ymax = float(np.min(yt)), float(np.max(yt))
        c0 = max(np.percentile(yt, 5), 0.0)
        A0 = max(np.percentile(yt, 95) - c0, 1e-3)

        # Linearize to estimate b, n0
        diff = np.maximum(yt - c0, 1e-8)
        s = np.maximum(A0 / diff - 1.0, 1e-8)
        S = np.log(s)
        Xls = np.vstack([ln, np.ones_like(ln)]).T
        try:
            sol, _, _, _ = np.linalg.lstsq(Xls, S, rcond=None)
            b0 = max(sol[0], 1e-3)
            ln_n0 = -sol[1] / max(b0, 1e-8)
            n00 = float(np.exp(ln_n0))
        except Exception:
            b0 = 0.5
            n00 = float(np.exp(ln_geo))
        if not np.isfinite(n00) or n00 <= 0: n00 = float(np.exp(ln_geo))

        # Coordinate LS refine c0, A0 given b0, n00
        phi = 1.0 / (1.0 + np.exp(np.clip(b0 * (ln - np.log(max(n00, 1e-12))), -50.0, 50.0)))
        D = np.vstack([np.ones_like(phi), phi]).T
        try:
            sol_ca, _, _, _ = np.linalg.lstsq(D, yt, rcond=None)
            c0 = max(float(sol_ca[0]), 0.0)
            A0 = max(float(sol_ca[1]), 1e-6)
        except Exception:
            c0 = c0
            A0 = A0

        starts = [
            (c0, A0, b0, n00),
            (max(ymin - 0.05, 0.0), max(ymax - max(ymin - 0.05, 0.0), 1e-3), 1.0, n00)
        ]

        def pack(p): c, A, b, n0 = p; return np.log([c + 1e-12, A + 1e-12, b + 1e-12, n0 + 1e-12])
        def unpack(u): return np.exp(u[0]), np.exp(u[1]), np.exp(u[2]), np.exp(u[3])

        md = 1.4826 * np.median(np.abs(yt - np.median(yt)))
        d = max(0.02, 0.5 * md)

        def obj_grad(u):
            c, A, b, n0 = unpack(u)
            ln0 = np.log(max(n0, 1e-12))
            z = np.clip(b * (ln - ln0), -50.0, 50.0)
            s = np.exp(z); den = 1.0 + s
            pred = c + A / den
            r = pred - yt
            w = dhuber(r, d) / max(1, len(r))
            dl = (ln - ln0)
            g_c = np.ones_like(r)
            g_A = 1.0 / den
            g_b = -A * s * dl / (den * den)
            g_n0 = A * s * b / (den * den * max(n0, 1e-12))
            val = float(np.mean(huber(r, d)))
            g = np.array([np.sum(w * g_c) * c,
                          np.sum(w * g_A) * A,
                          np.sum(w * g_b) * b,
                          np.sum(w * g_n0) * n0], dtype=float)
            return val, g

        best_u, best_v = None, np.inf
        for p0 in starts:
            u0 = pack(p0); v0, _ = obj_grad(u0)
            if v0 < best_v: best_v, best_u = v0, u0

        res = minimize(lambda u: obj_grad(u)[0], best_u, jac=lambda u: obj_grad(u)[1],
                       method="L-BFGS-B", options={"maxiter": 400, "ftol": 1e-9})
        u = res.x if res.success else best_u
        out.append(list(unpack(u)))

    P = np.asarray(out)
    return P[0] if P.shape[0] == 1 else P
# EVOLVE-BLOCK-END
#2 Run 4 R² = 0.999255
#3 Run 2 R² = 0.999208
#4 Run 3 R² = 0.998872
#5 Run 1 R² = 0.998768