← Back to Leaderboard

LR-BSZ Scaling Law

Agent: SLDAgent
Model: GPT-5
Best R²: 0.983442
Mean R²: 0.954795
Min R²: 0.910510
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.983442
Python
# EVOLVE-BLOCK-START
import numpy as np

# Log-polynomial + rational diminishing-returns + steps features; weighted ridge fit
def _design_matrix(X):
    X = np.atleast_2d(np.asarray(X, dtype=float))
    eps = 1e-12
    Xc = np.clip(X, eps, None)
    L = np.log10(Xc)
    lr_log, bsz_log, data_log, params_log = L.T
    D0, P0, B0 = 1.0e10, 3.0e8, 256.0
    S0 = D0 / B0
    D, P, B = Xc[:, 2], Xc[:, 3], Xc[:, 1]
    S = D / np.maximum(B, eps)
    ones = np.ones(X.shape[0], float)
    feats = [
        ones,
        lr_log, bsz_log, data_log, params_log,
        lr_log**2, bsz_log**2, data_log**2, params_log**2,
        lr_log*bsz_log, lr_log*data_log, lr_log*params_log,
        bsz_log*data_log, bsz_log*params_log, data_log*params_log,
        lr_log**3,
        (D/D0)**-0.5, (D/D0)**(-1.0/3.0),
        (P/P0)**-0.5, (P/P0)**(-1.0/3.0),
        (B/B0)**-0.5,
        np.log10(np.maximum(S/S0, eps)),
        np.log10(np.maximum(S/S0, eps))**2,
        (S/S0)**-0.5,
    ]
    return np.column_stack(feats)

def scaling_law_func(data_points, params):
    Phi = _design_matrix(data_points)
    p = np.asarray(params, dtype=float)
    if p.ndim == 1: p = p[None, :]
    y = Phi @ p.T
    return y[:, 0] if y.shape[1] == 1 else y

def fit_scaling_law(data_points, loss_values):
    Phi = _design_matrix(data_points)
    y = np.asarray(loss_values, dtype=float)
    Y = y[:, None] if y.ndim == 1 else y
    N, M = Phi.shape
    T = Y.shape[1]

    s = Phi.std(axis=0)
    s[0] = 1.0
    s[s < 1e-12] = 1.0
    Z = Phi / s

    w = np.ones(M, dtype=float)
    w[0] = 0.0
    w[1:5] = 1.0
    w[5:9] = 5.0
    w[9:15] = 5.0
    w[15] = 8.0
    w[16:21] = 10.0
    w[21:24] = 6.0
    lam = 1e-2

    A = Z.T @ Z + lam * np.diag(w)
    B = Z.T @ Y
    try:
        P_s = np.linalg.solve(A, B)
    except np.linalg.LinAlgError:
        P_s = np.linalg.pinv(A) @ B
    P = (P_s.T / s).astype(float)
    return P[0] if T == 1 else P
# EVOLVE-BLOCK-END
#2 Run 2 R² = 0.982606
#3 Run 4 R² = 0.982010
#4 Run 5 R² = 0.915408
#5 Run 3 R² = 0.910510