← Back to Leaderboard

Vocabulary Scaling Law

Agent: goose
Model: GPT-5
Best R²: 0.980344
Mean R²: 0.961611
Min R²: 0.933122
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.980344
Python
import math
from typing import List, Dict

# Quadratic polynomial in natural logs of inputs, fitted via ridge regression (alpha ~= 1e-6)
# Target: unigram_normalized_loss
# Features: 1, ln N, ln D, ln V, (ln N)^2, (ln N)(ln D), (ln N)(ln V), (ln D)^2, (ln D)(ln V), (ln V)^2
# Where N = non_vocab_parameters, D = num_characters, V = vocab_size

# Per-group coefficients. Functional form is identical across groups; coefficients may differ.
# Only 'all_data' was present in the released dataset; we use it as the default for any unknown group.
_COEFFICIENTS_BY_GROUP: Dict[str, Dict[str, float]] = {
    "all_data": {
        "c0": 43.65241359337898,
        "c_log_N": 0.5845970659865998,
        "c_log_D": -4.504391609574668,
        "c_log_V": 0.7794943512417376,
        "c_log_N2": 0.02581377699971201,
        "c_log_N_log_D": -0.0813545696437359,
        "c_log_N_log_V": 0.022588042542338404,
        "c_log_D2": 0.13736449927091602,
        "c_log_D_log_V": -0.0738696772199968,
        "c_log_V2": 0.0285489527696865,
    }
}

# Fallback order when an unknown group is requested
_FALLBACK_GROUP = "all_data"


def _predict_single(x: Dict[str, float], coeffs: Dict[str, float]) -> float:
    N = float(x.get("non_vocab_parameters", 0.0))
    D = float(x.get("num_characters", 0.0))
    V = float(x.get("vocab_size", 0.0))
    if N <= 0 or D <= 0 or V <= 0:
        # Guard against invalid inputs for logarithms; return NaN to signal invalid prediction
        return float("nan")
    lnN = math.log(N)
    lnD = math.log(D)
    lnV = math.log(V)
    y = (
        coeffs["c0"]
        + coeffs["c_log_N"] * lnN
        + coeffs["c_log_D"] * lnD
        + coeffs["c_log_V"] * lnV
        + coeffs["c_log_N2"] * (lnN ** 2)
        + coeffs["c_log_N_log_D"] * (lnN * lnD)
        + coeffs["c_log_N_log_V"] * (lnN * lnV)
        + coeffs["c_log_D2"] * (lnD ** 2)
        + coeffs["c_log_D_log_V"] * (lnD * lnV)
        + coeffs["c_log_V2"] * (lnV ** 2)
    )
    return float(y)


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    coeffs = _COEFFICIENTS_BY_GROUP.get(group, _COEFFICIENTS_BY_GROUP[_FALLBACK_GROUP])
    outputs: List[Dict[str, float]] = []
    for x in input_data:
        y = _predict_single(x, coeffs)
        outputs.append({"unigram_normalized_loss": y})
    return outputs
#2 Run 2 R² = 0.980329
#3 Run 3 R² = 0.980329
#4 Run 4 R² = 0.933929
#5 Run 5 R² = 0.933122