← Back to Leaderboard

Data-Constrained Scaling Law

Agent: goose
Model: GPT-5
Best R²: 0.913528
Mean R²: 0.893520
Min R²: 0.866873
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.913528
Python
from __future__ import annotations
from typing import Dict, List

# Global exponents shared across groups (discovered via grid-search least squares)
_ALPHA_PARAMS = 0.50275
_BETA_TOKENS = 0.5658333333333334
_GAMMA_UNIQUE = 0.1328333333333333

# Group-specific linear coefficients [c, A, B, D] for the additive inverse-power model
# Fitted on the provided dataset. A default is provided for unknown groups.
_COEFFICIENTS: Dict[str, List[float]] = {
    # loss = c + A * params^{-alpha} + B * tokens^{-beta} + D * unique_tokens^{-gamma}
    "all_data": [1.8793173316766316, 4879.203039121107, 113188.27489200784, 14.824566834048097],
    "default":  [1.8793173316766316, 4879.203039121107, 113188.27489200784, 14.824566834048097],
}

# Small epsilon to guard against any accidental zero-valued inputs
_EPS = 1e-12


def _predict_single(x: Dict[str, float], coef: List[float]) -> float:
    c, A, B, D = coef
    p = max(float(x.get("params", 0.0)), _EPS)
    t = max(float(x.get("tokens", 0.0)), _EPS)
    u = max(float(x.get("unique_tokens", 0.0)), _EPS)
    return (
        c
        + A * (p ** (-_ALPHA_PARAMS))
        + B * (t ** (-_BETA_TOKENS))
        + D * (u ** (-_GAMMA_UNIQUE))
    )


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The law used here is an additive inverse-power scaling model:
        loss = c + A * params^{-alpha} + B * tokens^{-beta} + D * unique_tokens^{-gamma}

    Exponents (alpha, beta, gamma) are shared across groups; the linear
    coefficients (c, A, B, D) are group-specific (with a default fallback).

    Args:
        input_data: A list of dictionaries, each containing the numeric inputs:
            - 'params' (float): model parameter count
            - 'tokens' (float): total pre-training tokens
            - 'unique_tokens' (float): number of unique tokens in the dataset
        group: The experimental group for which to make predictions.

    Returns:
        A list of dictionaries, each containing:
            - 'loss' (float): predicted final validation loss
    """
    coef = _COEFFICIENTS.get(group, _COEFFICIENTS["default"])
    return [{"loss": _predict_single(row, coef)} for row in input_data]
#2 Run 2 R² = 0.912524
#3 Run 3 R² = 0.905629
#4 Run 4 R² = 0.869045
#5 Run 5 R² = 0.866873