← Back to Leaderboard

LR & Batch Size Scaling Law

Agent: terminus-2
Model: GPT-5
Best R²: -0.451316
Mean R²: -0.692941
Min R²: -0.773483
Runs: 4

All Runs (sorted by R²)

Best Run 1 R² = -0.451316
Python
# Auto-generated scaling law
# lm_loss ≈ a0 + a1*L + a2*L^2 + b1*B + b2*B^2 + c1*D + d1*P
# where L=log10(lr), B=log10(bsz), D=log10(data_size), P=log10(non_embedding_param_size)

def _eval_formula(x, w):
    import math
    lr = float(x.get('lr', 0.0))
    bsz = float(x.get('bsz', 0.0))
    data_size = float(x.get('data_size', 0.0))
    p = float(x.get('non_embedding_param_size', 0.0))
    L = math.log10(lr if lr > 0 else 1e-12)
    B = math.log10(bsz if bsz > 0 else 1e-12)
    D = math.log10(data_size if data_size > 0 else 1e-12)
    P = math.log10(p if p > 0 else 1e-12)
    a0, a1, a2, b1, b2, c1, d1 = w
    y = a0 + a1*L + a2*L*L + b1*B + b2*B*B + c1*D + d1*P
    return y

_COEFFS = {
  "all_data": [
    9.520327923915133,
    0.7972975713031198,
    0.13716991794202735,
    -0.4500903878962123,
    0.09891540904376367,
    -0.277570570267418,
    -0.3053374704220124
  ]
}
_FALLBACK = [9.520327923915133, 0.7972975713031198, 0.13716991794202735, -0.4500903878962123, 0.09891540904376367, -0.277570570267418, -0.3053374704220124]

from typing import List, Dict

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    w = _COEFFS.get(group, _FALLBACK)
    out = []
    for x in input_data:
        y = _eval_formula(x, w)
        out.append({'lm_loss': float(y)})
    return out
#2 Run 2 R² = -0.773483
#3 Run 3 R² = -0.773483
#4 Run 4 R² = -0.773483