SLD - U-shaped Scaling Law

All Runs (sorted by R²)

Best Run 1 R² = 0.300870

▼

Python

from __future__ import annotations

from typing import Dict, List


def _params() -> Dict[str, Dict[str, float]]:
    """
    Learned parameters for each group for the U-shaped law:
    brier_score = a * (log_flops - c) ** 2 + d
    """
    return {
        # Fitted via least-squares with a>=0 enforced
        "abstract_narrative_understanding": {"a": 0.04001825364162668, "c": -1.3996294548824372, "d": -0.6199287929106076},
        "analogical_similarity": {"a": 0.00010371220793670686, "c": -1.3996294548824372, "d": -0.5438329258237591},
        "arc": {"a": 0.011434159908664807, "c": -1.3996294548824372, "d": -0.1276202057679939},
        "arithmetic": {"a": 0.0162306500936723, "c": -1.3996294548824372, "d": -0.3083157507005531},
        "conceptual_combinations": {"a": 0.01196725341226211, "c": -1.3996294548824372, "d": -0.4625683460391293},
        "hellaswag": {"a": 0.00839362107171478, "c": -1.3996294548824372, "d": -0.081992924539709},
        "hindu_knowledge": {"a": 0.00982758033980399, "c": 1.359497342333281, "d": -0.4441339558691414},
        "mmlu": {"a": 0.017046194119479145, "c": 1.9313646383491184, "d": -0.5466050695778857},
        "parsinlu_qa_mc": {"a": 1e-09, "c": -1.3996294548824372, "d": -0.4342412802172517},
    }


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The functional form is U-shaped in `log_flops` and shared across groups:
        brier_score = a * (log_flops - c)^2 + d

    Parameters (a, c, d) are learned per group.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expects key 'log_flops'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups,
                but the constant parameters/coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s): {'brier_score': float}.
    """
    params = _params().get(group)
    if params is None:
        # Fallback: if group unknown, use a simple global prior that encodes U-shape
        # Choose a small curvature and center near 0 for stability
        params = {"a": 0.01, "c": 0.0, "d": -0.3}

    a = float(params["a"]) if params["a"] >= 0 else 0.0
    c = float(params["c"]) 
    d = float(params["d"]) 

    out: List[Dict[str, float]] = []
    for row in input_data:
        x = float(row.get("log_flops", 0.0))
        y_hat = a * (x - c) ** 2 + d
        out.append({"brier_score": float(y_hat)})
    return out

#2 Run 2 R² = -1.000000

▼

Python

from __future__ import annotations

import math
from typing import Dict, List


# Quadratic coefficients per group for:
#   brier_score = a * (log_flops**2) + b * log_flops + c
# Fitted via least squares on the provided dataset.
COEFS: Dict[str, Dict[str, float]] = {
    "abstract_narrative_understanding": {"a": -0.001002095718967912, "b": 0.18472699005645873, "c": -0.5431407140744655},
    "analogical_similarity": {"a": -0.019175879672698435, "b": 0.0279112874834725, "c": -0.5405750537735581},
    "arc": {"a": -0.036868206393668744, "b": 0.11761949039897288, "c": -0.1071122327154294},
    "arithmetic": {"a": -0.12997814962868387, "b": 0.23537009797522832, "c": -0.2475326777122078},
    "conceptual_combinations": {"a": -0.07148356706471508, "b": 0.09692595522861085, "c": -0.40934554313141813},
    "hellaswag": {"a": -0.033670645755682356, "b": 0.09805145434945438, "c": -0.06719686154646047},
    "hindu_knowledge": {"a": -0.034402388960081354, "b": -0.031143510554884814, "c": -0.4103174193780911},
    "mmlu": {"a": 0.011476264280523694, "b": -0.06297043488789662, "c": -0.480364650219835},
    "parsinlu_qa_mc": {"a": -0.05656739537407183, "b": 0.0989058373264011, "c": -0.43495071806820146},
}

# Global fallback coefficients if an unknown group is requested.
DEFAULT: Dict[str, float] = {"a": 0.002644673247271387, "b": 0.07737556836857276, "c": -0.3784396938370407}


def _predict_single(log_flops: float, group: str) -> float:
    """Predict brier_score for a single example given log_flops and group."""
    params = COEFS.get(group, DEFAULT)
    a, b, c = params["a"], params["b"], params["c"]
    return a * (log_flops ** 2) + b * log_flops + c


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        # Prefer explicit log_flops if provided; otherwise fall back to flops -> log10.
        if "log_flops" in row and row["log_flops"] is not None:
            x = float(row["log_flops"])  # already a log-scale value
        elif "flops" in row and row["flops"] is not None and row["flops"] > 0:
            # Conservative fallback: assume base-10 logarithm if only raw flops are given.
            x = math.log10(float(row["flops"]))
        else:
            raise ValueError("Each input datum must include 'log_flops' or a positive 'flops'.")

        y_hat = _predict_single(x, group)
        outputs.append({"brier_score": float(y_hat)})

    return outputs

#3 Run 3 R² = -1.000000

▼

Python

from __future__ import annotations

import math
from typing import Dict, List


def _coefficients_by_group() -> Dict[str, Dict[str, float]]:
    """Quadratic coefficients fitted on the provided dataset.

    The functional form is: brier_score = a2 * x^2 + a1 * x + a0
    where x = log_flops (base-10). Coefficients are per group.
    """
    return {
        "abstract_narrative_understanding": {
            "a2": -0.001002095718967912,
            "a1": 0.18472699005645873,
            "a0": -0.5431407140744655,
        },
        "analogical_similarity": {
            "a2": -0.019175879672698435,
            "a1": 0.0279112874834725,
            "a0": -0.5405750537735581,
        },
        "arc": {
            "a2": -0.036868206393668744,
            "a1": 0.11761949039897288,
            "a0": -0.1071122327154294,
        },
        "arithmetic": {
            "a2": -0.12997814962868387,
            "a1": 0.23537009797522832,
            "a0": -0.2475326777122078,
        },
        "conceptual_combinations": {
            "a2": -0.07148356706471508,
            "a1": 0.09692595522861085,
            "a0": -0.40934554313141813,
        },
        "hellaswag": {
            "a2": -0.033670645755682356,
            "a1": 0.09805145434945438,
            "a0": -0.06719686154646047,
        },
        "hindu_knowledge": {
            "a2": -0.034402388960081354,
            "a1": -0.031143510554884814,
            "a0": -0.4103174193780911,
        },
        "mmlu": {
            "a2": 0.011476264280523694,
            "a1": -0.06297043488789662,
            "a0": -0.480364650219835,
        },
        "parsinlu_qa_mc": {
            "a2": -0.05656739537407183,
            "a1": 0.0989058373264011,
            "a0": -0.43495071806820146,
        },
    }


# Global fallback coefficients (across all groups)
_GLOBAL_COEFS = {"a2": 0.002644673247271387, "a1": 0.07737556836857276, "a0": -0.3784396938370407}


def _predict_quadratic(log_flops: float, coefs: Dict[str, float]) -> float:
    return coefs["a2"] * (log_flops ** 2) + coefs["a1"] * log_flops + coefs["a0"]


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected key: 'log_flops'. If missing,
                    but 'flops' is provided, we compute log10(flops).
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups
                (quadratic in log_flops), while coefficients differ per group.

    Returns:
        A list of dictionaries containing the predicted 'brier_score' for each input.
    """
    coefs_by_group = _coefficients_by_group()
    coefs = coefs_by_group.get(group, _GLOBAL_COEFS)

    preds: List[Dict[str, float]] = []
    for row in input_data:
        if "log_flops" in row and row["log_flops"] is not None:
            x = float(row["log_flops"])
        elif "flops" in row and row["flops"] is not None:
            # The dataset uses base-10 logarithm for log_flops
            f = float(row["flops"])
            # Avoid log of non-positive; fall back to global baseline if needed
            x = math.log10(f) if f > 0 else 0.0
        else:
            # If neither is available, default to 0 so the model degrades gracefully
            x = 0.0

        y = _predict_quadratic(x, coefs)
        preds.append({"brier_score": float(y)})

    return preds

#4 Run 4 R² = -1.000000

▼

Python

from typing import List, Dict


# Quadratic "U-shaped" scaling law (parabolic) for brier_score as a function of log_flops:
#   brier_score = a[group] * x^2 + b[group] * x + c[group]
# where x = log_flops and (a, b, c) are group-specific coefficients fitted via OLS.

# Per-group coefficients fitted from /app/data
_COEFS = {
    # group: (a, b, c)
    "abstract_narrative_understanding": (-0.001002095718968019, 0.18472699005645857, -0.5431407140744654),
    "analogical_similarity": (-0.019175879672698144, 0.02791128748347238, -0.540575053773558),
    "arc": (-0.03686820639366876, 0.1176194903989729, -0.10711223271542945),
    "arithmetic": (-0.12997814962868384, 0.2353700979752282, -0.24753267771220774),
    "conceptual_combinations": (-0.07148356706471536, 0.09692595522861094, -0.40934554313141797),
    "hellaswag": (-0.033670645755682356, 0.09805145434945439, -0.06719686154646048),
    "hindu_knowledge": (-0.03440238896008094, -0.031143510554884568, -0.41031741937809096),
    "mmlu": (0.011476264280523023, -0.06297043488789655, -0.48036465021983477),
    "parsinlu_qa_mc": (-0.05656739537407183, 0.09890583732640096, -0.43495071806820157),
}

# Fallback coefficients (fit on all groups combined)
_GLOBAL = (0.0026446732472713928, 0.07737556836857278, -0.3784396938370408)


def _predict_one(x: float, group: str) -> float:
    a, b, c = _COEFS.get(group, _GLOBAL)
    return a * x * x + b * x + c


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Must include key 'log_flops'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups, with
                group-specific coefficients.

    Returns:
        A list of dictionaries matching input order, each with key 'brier_score'.
    """
    out: List[Dict[str, float]] = []
    for row in input_data:
        if "log_flops" not in row:
            raise KeyError("Each input row must include 'log_flops'.")
        x = float(row["log_flops"])  # ensure numeric
        y = _predict_one(x, group)
        out.append({"brier_score": float(y)})
    return out

#5 Run 5 R² = -1.000000

▼

Python

from __future__ import annotations

from typing import Dict, List


# Quadratic (parabolic) scaling law per group:
#   brier_score = A_g * (log_flops)**2 + B_g * (log_flops) + C_g
# The same functional form is used across groups; only (A_g, B_g, C_g) differ.


_PARAMS: Dict[str, Dict[str, float]] = {
    # Fitted via ordinary least squares on the provided dataset
    # groups: arc, conceptual_combinations, abstract_narrative_understanding, arithmetic,
    #         parsinlu_qa_mc, hellaswag, analogical_similarity, mmlu, hindu_knowledge
    "arc": {"A": -0.03686820639366876, "B": 0.1176194903989729, "C": -0.10711223271542945},
    "conceptual_combinations": {"A": -0.07148356706471536, "B": 0.09692595522861094, "C": -0.40934554313141797},
    "abstract_narrative_understanding": {"A": -0.001002095718968019, "B": 0.18472699005645857, "C": -0.5431407140744654},
    "arithmetic": {"A": -0.12997814962868384, "B": 0.2353700979752282, "C": -0.24753267771220774},
    "parsinlu_qa_mc": {"A": -0.05656739537407183, "B": 0.09890583732640096, "C": -0.43495071806820157},
    "hellaswag": {"A": -0.033670645755682356, "B": 0.09805145434945439, "C": -0.06719686154646048},
    "analogical_similarity": {"A": -0.019175879672698144, "B": 0.02791128748347238, "C": -0.540575053773558},
    "mmlu": {"A": 0.011476264280523023, "B": -0.06297043488789655, "C": -0.48036465021983477},
    "hindu_knowledge": {"A": -0.03440238896008094, "B": -0.031143510554884568, "C": -0.41031741937809096},
}


# Global fallback (in case of an unseen group)
_GLOBAL: Dict[str, float] = {
    "A": 0.0026446732472713928,
    "B": 0.07737556836857278,
    "C": -0.3784396938370408,
}


def _predict_single(log_flops: float, coeffs: Dict[str, float]) -> float:
    return coeffs["A"] * (log_flops ** 2) + coeffs["B"] * log_flops + coeffs["C"]


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected key: 'log_flops'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups
                (quadratic in log_flops), but the coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s): {'brier_score': float}.
    """

    coeffs = _PARAMS.get(group, _GLOBAL)

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        if "log_flops" not in row:
            raise KeyError("Each input row must include 'log_flops'.")
        x = float(row["log_flops"])  # ensure numeric
        y = _predict_single(x, coeffs)
        outputs.append({"brier_score": float(y)})
    return outputs