SLD - U-shaped Scaling Law

All Runs (sorted by R²)

Best Run 1 R² = 0.376596

▼

Python

from typing import List, Dict

# Discovered functional form (shared across groups):
#   y = d + a * ((x - c)**2) / (1 + b * ((x - c)**2))
# where:
#   - x is log_flops
#   - y is the predicted brier_score
#   - (a, b, c, d) are group-specific constants
# This form is a saturated U-/inverted-U-shaped bowl around x=c.

PARAMS: Dict[str, Dict[str, float]] = {
    # Fitted via grid-search over c and b with linear least squares for a and d (see explain.md)
    'abstract_narrative_understanding': {'a': 0.13395361132733768, 'b': 0.1584893192461114, 'c': -0.8996294548824371, 'd': -0.6633218562832404},
    'analogical_similarity': {'a': 124.33853714716155, 'b': 1000.0, 'c': -0.8996294548824371, 'd': -0.6633823698387435},
    'arc': {'a': 0.6201543020597179, 'b': 2.5118864315095824, 'c': -0.8996294548824371, 'd': -0.25249340822304334},
    'arithmetic': {'a': 45.30452598924281, 'b': 79.43282347242821, 'c': -0.8996294548824371, 'd': -0.7553992280671666},
    'conceptual_combinations': {'a': 7.186631573231778, 'b': 31.622776601683793, 'c': -0.7753098165335611, 'd': -0.6151787648441417},
    'hellaswag': {'a': 0.7981556898735167, 'b': 3.981071705534973, 'c': -0.8678001480465772, 'd': -0.19577493649254435},
    'hindu_knowledge': {'a': -125.65727220964706, 'b': 1000.0, 'c': -0.6533515330526072, 'd': -0.308362822442369},
    'mmlu': {'a': 0.12319687240192848, 'b': 0.7943282347242822, 'c': 1.073944803905969, 'd': -0.5430288350323806},
    'parsinlu_qa_mc': {'a': -0.05675351773277077, 'b': 0.001, 'c': 0.8722019589804288, 'd': -0.3915881996663963},
}


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    if group not in PARAMS:
        raise ValueError(f"Unknown group '{group}'. Known groups: {sorted(PARAMS.keys())}")

    p = PARAMS[group]
    a, b, c, d = p['a'], p['b'], p['c'], p['d']

    preds: List[Dict[str, float]] = []
    for row in input_data:
        x = float(row['log_flops'])
        t = (x - c) ** 2
        h = t / (1.0 + b * t)
        y = d + a * h
        preds.append({'brier_score': float(y)})
    return preds

#2 Run 2 R² = 0.233327

▼

Python

from __future__ import annotations

import math
from typing import Dict

# Discovered U-shaped scaling law (convex quadratic with a per-group vertex)
# brier_score_hat = A_g * (log_flops - x0_g)**2 + y0_g
# If an unknown group is provided, fall back to global parameters.

_PARAMS: Dict[str, Dict[str, float]] = {
    # Per-group coefficients (A >= 0 to ensure U-shape)
    "abstract_narrative_understanding": {
        "A": 0.03214388218595266,
        "x0": -1.9797057339438404,
        "y0": -0.6692579971226988,
    },
    "analogical_similarity": {
        "A": 0.00020609102288613097,
        "x0": -1.974158844930027,
        "y0": -0.5450904289774107,
    },
    "arc": {
        "A": 0.009434861805795676,
        "x0": -1.9797057339438404,
        "y0": -0.14428225628245808,
    },
    "arithmetic": {
        "A": 0.013609532311191466,
        "x0": -1.9722285414494285,
        "y0": -0.33307724639575065,
    },
    "conceptual_combinations": {
        "A": 0.009595500593550611,
        "x0": -1.9665433001166273,
        "y0": -0.47675901651862695,
    },
    "hellaswag": {
        "A": 0.006971187846219622,
        "x0": -1.9797057339438404,
        "y0": -0.09468762206014317,
    },
    "hindu_knowledge": {
        "A": 0.007301246345498951,
        "x0": 1.9175770859897108,
        "y0": -0.45176952034934426,
    },
    "mmlu": {
        "A": 0.012742411942846674,
        "x0": 2.498277609393177,
        "y0": -0.5604740985794635,
    },
    "parsinlu_qa_mc": {
        "A": 0.00014937797818099304,
        "x0": -1.9797057339438404,
        "y0": -0.4357720632980487,
    },
    # Global fallback (used when group is unknown)
    "__global__": {
        "A": 0.014783837901881494,
        "x0": -1.9797057339438404,
        "y0": -0.43768867398318495,
    },
}


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    U-shaped scaling law (convex quadratic with per-group vertex):
        brier_score_hat = A_g * (log_flops - x0_g)**2 + y0_g

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Must include 'log_flops'. If only 'flops'
                    is provided, log10(flops) will be used.
        group: The name of the experimental group for which to make predictions.
                The functional form is the same across groups; parameters (A, x0, y0)
                differ per group. Unknown groups fall back to global parameters.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    outputs: list[dict[str, float]] = []
    # Read parameters (fallback to global if unknown group)
    params = _PARAMS.get(group, _PARAMS["__global__"]) if group is not None else _PARAMS["__global__"]
    # Safety: ensure convexity
    A = max(0.0, float(params.get("A", 0.0)))
    x0 = float(params.get("x0", 0.0))
    y0 = float(params.get("y0", 0.0))

    for row in input_data:
        if "log_flops" in row and row["log_flops"] is not None:
            x = float(row["log_flops"])  # use provided log-scale compute
        elif "flops" in row and row["flops"] is not None and row["flops"] > 0:
            # Fallback: infer log10 if only raw flops are provided
            x = math.log10(float(row["flops"]))
        else:
            raise ValueError("Each input row must contain 'log_flops' or a positive 'flops'.")
        y_hat = A * (x - x0) ** 2 + y0
        outputs.append({"brier_score": float(y_hat)})
    return outputs

#3 Run 3 R² = 0.232024

▼

Python

from __future__ import annotations
from typing import Dict, List

# Discovered U-shaped scaling law parameters per group, fitted on /app/data
# Model: brier_score = y0 + a * (log_flops - x0) ** 2
# If an unknown group is requested, fall back to __global__ parameters.
_COEFS: Dict[str, Dict[str, float]] = {
    "mmlu": {"a": 0.01169773590789709, "x0": 2.6968603736796997, "y0": -0.5655432103249903},
    "parsinlu_qa_mc": {"a": 0.00034209457028094674, "x0": -2.5910947966754305, "y0": -0.43916728492647705},
    "arithmetic": {"a": 0.012246848542454692, "x0": -2.3491144570504465, "y0": -0.3492357139274325},
    "hindu_knowledge": {"a": 0.00757799492499591, "x0": 1.8390625647228829, "y0": -0.45066511033217826},
    "analogical_similarity": {"a": 0.00023197297948281156, "x0": -2.4115838253545965, "y0": -0.5459677024337186},
    "conceptual_combinations": {"a": 0.008958201903805205, "x0": -2.1651260644031494, "y0": -0.481736135841471},
    "hellaswag": {"a": 0.005890914852546286, "x0": -2.5910947966754305, "y0": -0.10808171765479378},
    "arc": {"a": 0.007939175580261961, "x0": -2.5910947966754305, "y0": -0.16192561080590254},
    "abstract_narrative_understanding": {"a": 0.02656654490315, "x0": -2.5910947966754305, "y0": -0.7224324643921198},
    "__global__": {"a": 0.01213867881111136, "x0": -2.5910947966754305, "y0": -0.4612036153597828},
}


def _predict_single(log_flops: float, coefs: Dict[str, float]) -> float:
    a = float(coefs["a"])
    x0 = float(coefs["x0"])
    y0 = float(coefs["y0"])
    return y0 + a * (log_flops - x0) ** 2


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # Retrieve coefficients for the requested group; fall back to global if not found.
    coefs = _COEFS.get(group, _COEFS["__global__"])  # type: ignore[index]

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        # Expect 'log_flops' as input; be forgiving about key presence/casing
        if "log_flops" in row:
            x = float(row["log_flops"])  # type: ignore[arg-type]
        else:
            # If not present, try to infer from 'flops' if provided (assume log already in dataset; if not, take log10)
            if "flops" in row:
                # The dataset we fit on already used log_flops as the regressor; if only 'flops' is provided, use log10
                import math
                fl = float(row["flops"])  # type: ignore[arg-type]
                # Guard against non-positive
                x = math.log10(fl) if fl > 0 else float("nan")
            else:
                x = float("nan")
        y_hat = _predict_single(x, coefs)
        outputs.append({"brier_score": float(y_hat)})
    return outputs

#4 Run 4 R² = -1.000000

▼

Python

from __future__ import annotations

import math
from typing import Dict, List

# Quadratic "U-shaped" (parabolic) scaling law in vertex form:
#   brier_score = A_g + B_g * (log_flops - C_g)**2
# The functional form is identical across groups; only (A_g, B_g, C_g) vary.
# Coefficients were fitted on the provided dataset (/app/data) using
# least-squares per group (see explain.md for details).

# Per-group coefficients (A, B, C)
COEFS: Dict[str, tuple[float, float, float]] = {
    "abstract_narrative_understanding": (
        7.970033279539426,
        -0.001002095718967947,
        92.17033191535235,
    ),
    "analogical_similarity": (
        -0.5304185448899755,
        -0.019175879672698386,
        0.7277707192544342,
    ),
    "arc": (
        -0.013302783633511131,
        -0.036868206393668695,
        1.5951344248085169,
    ),
    "arithmetic": (
        -0.1409780699201859,
        -0.12997814962868381,
        0.905421790691835,
    ),
    "conceptual_combinations": (
        -0.3764895974128954,
        -0.07148356706471518,
        0.6779596990512674,
    ),
    "hellaswag": (
        0.004186441953299702,
        -0.03367064575568232,
        1.4560376278632392,
    ),
    "hindu_knowledge": (
        -0.40326908437141745,
        -0.034402388960080924,
        -0.452635870593496,
    ),
    "mmlu": (
        -0.5667445812898363,
        0.011476264280523035,
        2.7435075277399577,
    ),
    "parsinlu_qa_mc": (
        -0.39171748895915726,
        -0.05656739537407165,
        0.8742300814130766,
    ),
}

# Global fallback coefficients (A, B, C) learned on all groups combined.
GLOBAL_COEFS = (
    -0.9443866011285826,
    0.002644673247271381,
    -14.628568661252253,
)


def _predict_single(log_flops: float, group: str) -> float:
    A, B, C = COEFS.get(group, GLOBAL_COEFS)
    return A + B * (log_flops - C) ** 2


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    This implementation uses a group-specific quadratic law in vertex form
    (same functional form for all groups):

        brier_score = A_g + B_g * (log_flops - C_g)**2

    If "log_flops" is not provided but "flops" is present and positive, it is
    converted via log10.

    Args:
        input_data: A list of dicts with keys including either "log_flops" or
            "flops" (>0). Only these inputs are used.
        group: Experimental group name. The form of the law is the same for all
            groups; coefficients differ by group. Unknown groups fall back to a
            global fit.

    Returns:
        A list with dicts containing the predicted "brier_score" for each row.
    """
    outputs: List[Dict[str, float]] = []

    for row in input_data:
        if "log_flops" in row and row["log_flops"] is not None:
            x = float(row["log_flops"])  # already log10 scale in dataset
        else:
            fl = float(row.get("flops", float("nan")))
            if not (fl > 0.0) or math.isnan(fl):
                raise ValueError(
                    "Each input row must include 'log_flops' or a positive 'flops'."
                )
            x = math.log10(fl)

        y_hat = _predict_single(x, group)
        outputs.append({"brier_score": float(y_hat)})

    return outputs

#5 Run 5 R² = -1.000000

▼

Python

from __future__ import annotations

import math
from typing import Dict, List

# Coefficients fitted on the provided dataset at /app/data
# We use a single functional form for all groups (vertex form of a quadratic):
#   brier_score = alpha[g] + beta[g] * (log_flops - x0[g])**2
# where (alpha, beta, x0) are constants that depend on the group.
# These were obtained via least-squares fitting (equivalent to converting from
# the polynomial coefficients c2, c1, c0 with beta=c2, x0=-c1/(2*c2), alpha=y(x0)).

PARAMS: Dict[str, Dict[str, float]] = {
    # alpha, beta, x0 per group
    "abstract_narrative_understanding": {
        "alpha": 7.970033279539732,
        "beta": -0.001002095718967912,
        "x0": 92.1703319153556,
    },
    "analogical_similarity": {
        "alpha": -0.530418544889976,
        "beta": -0.019175879672698435,
        "x0": 0.7277707192544356,
    },
    "arc": {
        "alpha": -0.013302783633511228,
        "beta": -0.036868206393668744,
        "x0": 1.5951344248085157,
    },
    "arithmetic": {
        "alpha": -0.1409780699201859,
        "beta": -0.12997814962868387,
        "x0": 0.9054217906918345,
    },
    "conceptual_combinations": {
        "alpha": -0.3764895974128952,
        "beta": -0.07148356706471508,
        "x0": 0.6779596990512687,
    },
    "hellaswag": {
        "alpha": 0.0041864419532996605,
        "beta": -0.033670645755682356,
        "x0": 1.4560376278632394,
    },
    "hindu_knowledge": {
        "alpha": -0.4032690843714171,
        "beta": -0.034402388960081354,
        "x0": -0.4526358705934933,
    },
    "mmlu": {
        "alpha": -0.5667445812898321,
        "beta": 0.011476264280523694,
        "x0": 2.7435075277398155,
    },
    "parsinlu_qa_mc": {
        "alpha": -0.39171748895915737,
        "beta": -0.05656739537407183,
        "x0": 0.8742300814130773,
    },
}

# Fallback global fit (across all groups), used if an unknown group name is passed.
GLOBAL_FALLBACK = {"c2": 0.002644673247271387, "c1": 0.07737556836857276, "c0": -0.3784396938370407}


def _predict_vertex(log_flops: float, params: Dict[str, float]) -> float:
    """Evaluate vertex-form quadratic with given parameters."""
    return params["alpha"] + params["beta"] * (log_flops - params["x0"]) ** 2


def _predict_poly(log_flops: float, c2: float, c1: float, c0: float) -> float:
    """Evaluate standard polynomial form y = c2*x^2 + c1*x + c0."""
    return c2 * log_flops * log_flops + c1 * log_flops + c0


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    results: List[Dict[str, float]] = []

    # Resolve coefficients for the requested group (or use global fallback)
    params = PARAMS.get(group)
    use_fallback = params is None

    for row in input_data:
        if "log_flops" in row and isinstance(row["log_flops"], (int, float)):
            x = float(row["log_flops"])  # primary expected input
        elif "flops" in row and isinstance(row["flops"], (int, float)):
            # Best-effort fallback: approximate log_flops as base-10 logarithm of flops.
            # (This branch is only used if downstream callers provide 'flops' instead
            # of 'log_flops'. The training data already supplies 'log_flops'.)
            x = math.log10(float(row["flops"])) if row["flops"] > 0 else float("nan")
        else:
            x = float("nan")

        if use_fallback:
            y = _predict_poly(x, GLOBAL_FALLBACK["c2"], GLOBAL_FALLBACK["c1"], GLOBAL_FALLBACK["c0"]) if math.isfinite(x) else float("nan")
        else:
            y = _predict_vertex(x, params) if math.isfinite(x) else float("nan")

        results.append({"brier_score": float(y)})

    return results