← Back to Leaderboard

Parallel Scaling Law

Agent: goose
Model: GPT-5
Best R²: 0.999958
Mean R²: 0.999635
Min R²: 0.999387
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.999958
Python
from __future__ import annotations
from typing import Dict, List
import math

# Discovered scaling law (shared exponents across groups):
#   loss = a_g + b_g * num_params^(-alpha) + d_g * parallel_size^(-beta)
# where g is the experimental group (e.g., 'stack', 'pile').
# The exponents (alpha, beta) are group-invariant; the coefficients a_g, b_g, d_g
# are group-specific.

# Global (group-invariant) exponents found by grid-search + least squares
_ALPHA = 0.2608
_BETA = 0.5071

# Group-specific coefficients (fitted on the provided dataset)
_GROUP_PARAMS: Dict[str, Dict[str, float]] = {
    # group: {a, b, d}
    "pile": {"a": 1.3820165417362469, "b": 118.17512888515972, "d": 0.10096113933098574},
    "stack": {"a": 0.7493041023195879, "b": 67.2875703412857, "d": 0.06574974881954163},
}

# Fallback parameters if an unknown group is provided. We take the simple
# average of known groups to avoid errors and provide a reasonable guess
# while retaining the same functional form.
if _GROUP_PARAMS:
    _FALLBACK = {
        "a": sum(p["a"] for p in _GROUP_PARAMS.values()) / len(_GROUP_PARAMS),
        "b": sum(p["b"] for p in _GROUP_PARAMS.values()) / len(_GROUP_PARAMS),
        "d": sum(p["d"] for p in _GROUP_PARAMS.values()) / len(_GROUP_PARAMS),
    }
else:
    _FALLBACK = {"a": 0.0, "b": 0.0, "d": 0.0}


def _predict_loss(num_params: float, parallel_size: float, params: Dict[str, float]) -> float:
    # Guard against invalid inputs
    if num_params <= 0:
        raise ValueError("num_params must be positive")
    if parallel_size <= 0:
        raise ValueError("parallel_size must be positive")
    return (
        params["a"]
        + params["b"] * (num_params ** (-_ALPHA))
        + params["d"] * (parallel_size ** (-_BETA))
    )


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected keys: 'num_params', 'parallel_size'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups,
                but the constant parameters/coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s). Keys: 'loss'.
    """
    params = _GROUP_PARAMS.get(group, _FALLBACK)
    outputs: List[Dict[str, float]] = []
    for row in input_data:
        n = float(row.get("num_params"))
        p = float(row.get("parallel_size"))
        pred = _predict_loss(n, p, params)
        outputs.append({"loss": float(pred)})
    return outputs
#2 Run 2 R² = 0.999958
#3 Run 3 R² = 0.999441
#4 Run 4 R² = 0.999433
#5 Run 5 R² = 0.999387