SLD - U-shaped Scaling Law - opencode + GPT-5

All Runs (sorted by R²)

Best Run 1 R² = 0.302510

▼

Python

from __future__ import annotations
from typing import List, Dict

# Discovered U-shaped scaling law parameters per group
# Model: brier_score = a * (log_flops - c)**2 + b
_PARAMS: Dict[str, Dict[str, float]] = {
    'mmlu': {'a': 0.01625188241125213, 'b': -0.5485520085426114, 'c': 2.0141120689193435},
    'parsinlu_qa_mc': {'a': 0.0, 'b': -0.4342414968542909, 'c': 1.1106711713084738},
    'arithmetic': {'a': 0.015354018314906261, 'b': -0.31591913431197544, 'c': -1.5743719559664417},
    'hindu_knowledge': {'a': 0.01020143688094949, 'b': -0.4433503348958553, 'c': 1.2992805614553293},
    'analogical_similarity': {'a': 0.00015458082325933974, 'b': -0.5443035235943459, 'c': -1.6056066401185167},
    'conceptual_combinations': {'a': 0.011559745311168344, 'b': -0.4646413787056248, 'c': -1.4823777596427932},
    'hellaswag': {'a': 0.007609182534322093, 'b': -0.08846707981157574, 'c': -1.6953621257789337},
    'arc': {'a': 0.010326836994446163, 'b': -0.1361076284330853, 'c': -1.6953621257789337},
    'abstract_narrative_understanding': {'a': 0.03559099077642667, 'b': -0.644905899063743, 'c': -1.6953621257789337},
    # Fallback if an unseen group is requested
    '__default__': {'a': 0.016435167540703028, 'b': -0.42693678124584933, 'c': -1.6953621257789337},
}


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    params = _PARAMS.get(group, _PARAMS['__default__'])
    a = float(params['a'])
    b = float(params['b'])
    c = float(params['c'])

    out: List[Dict[str, float]] = []
    for row in input_data:
        if 'log_flops' not in row:
            raise KeyError("Each input data point must contain 'log_flops'.")
        x = float(row['log_flops'])
        y = a * (x - c) ** 2 + b
        out.append({'brier_score': float(y)})
    return out

#2 Run 2 R² = 0.297759

▼

Python

from __future__ import annotations

from math import log10
from typing import Dict, List

# Discovered U-shaped scaling law parameters per group
# Formula: brier_score_hat = d + k * (log_flops - m)**2
# k > 0 ensures a convex (U-shaped) relationship with a minimum at log_flops = m
PARAMS: Dict[str, Dict[str, float]] = {
    # group: {"k": ..., "m": ..., "d": ...}
    "mmlu": {"k": 0.01704616125434527, "m": 1.9313637641589874, "d": -0.5466048579664784},
    "parsinlu_qa_mc": {"k": 0.04264969208461222, "m": 1.1106711713084738, "d": -0.4639186971129139},
    "arithmetic": {"k": 0.016230746051166, "m": -1.399629454882437, "d": -0.3083157190410354},
    "hindu_knowledge": {"k": 0.009827575915393679, "m": 1.359498558187776, "d": -0.444134127307656},
    "analogical_similarity": {"k": 0.0001037118511611998, "m": -1.399629454882437, "d": -0.5438333453891991},
    "conceptual_combinations": {"k": 0.01196734428335446, "m": -1.399629454882437, "d": -0.46256826937707934},
    "hellaswag": {"k": 0.008393616154251387, "m": -1.399629454882437, "d": -0.0819928507768235},
    "arc": {"k": 0.011434208975433508, "m": -1.399629454882437, "d": -0.12762040957756218},
    "abstract_narrative_understanding": {"k": 0.04001830596210182, "m": -1.399629454882437, "d": -0.6199291547329363},
}

# Global fallback (used when an unknown group is requested)
FALLBACK = {"k": 0.01857402239367786, "m": -1.399629454882437, "d": -0.41593049559167405}


def _predict_one(log_flops: float, params: Dict[str, float]) -> float:
    k = params["k"]
    m = params["m"]
    d = params["d"]
    return d + k * (log_flops - m) ** 2


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    params = PARAMS.get(group, FALLBACK)

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        if "log_flops" in row and row["log_flops"] is not None:
            x = float(row["log_flops"])
        elif "flops" in row and row["flops"] is not None:
            # If raw flops are provided, use log10 for consistency
            x = log10(float(row["flops"]))
        else:
            raise KeyError("Each input row must contain 'log_flops' or 'flops'.")
        y_hat = _predict_one(x, params)
        outputs.append({"brier_score": float(y_hat)})
    return outputs

#3 Run 3 R² = -1.000000

▼

Python

from typing import List, Dict

# Quadratic (U-shaped) scaling law in log_flops:
#   brier_score_hat = a_g * x^2 + b_g * x + c_g
# where x = log_flops and (a_g, b_g, c_g) depend on the group.
# Coefficients were fitted by least squares (numpy.polyfit degree=2)
# on the provided dataset at /app/data.

COEFFS: Dict[str, Dict[str, float]] = {
    # a, b, c per group
    "abstract_narrative_understanding": {
        "a": -0.001002095718967912,
        "b": 0.18472699005645873,
        "c": -0.5431407140744655,
    },
    "analogical_similarity": {
        "a": -0.019175879672698435,
        "b": 0.0279112874834725,
        "c": -0.5405750537735581,
    },
    "arc": {
        "a": -0.036868206393668744,
        "b": 0.11761949039897288,
        "c": -0.1071122327154294,
    },
    "arithmetic": {
        "a": -0.12997814962868387,
        "b": 0.23537009797522832,
        "c": -0.2475326777122078,
    },
    "conceptual_combinations": {
        "a": -0.07148356706471508,
        "b": 0.09692595522861085,
        "c": -0.40934554313141813,
    },
    "hellaswag": {
        "a": -0.033670645755682356,
        "b": 0.09805145434945438,
        "c": -0.06719686154646047,
    },
    "hindu_knowledge": {
        "a": -0.034402388960081354,
        "b": -0.031143510554884814,
        "c": -0.4103174193780911,
    },
    "mmlu": {
        "a": 0.011476264280523694,
        "b": -0.06297043488789662,
        "c": -0.480364650219835,
    },
    "parsinlu_qa_mc": {
        "a": -0.05656739537407183,
        "b": 0.0989058373264011,
        "c": -0.43495071806820146,
    },
}

# Global fallback (in case of unseen group)
GLOBAL_COEFFS = {"a": 0.002644673247271387, "b": 0.07737556836857276, "c": -0.3784396938370407}


def _predict(log_flops: float, coeffs: Dict[str, float]) -> float:
    a = coeffs["a"]
    b = coeffs["b"]
    c = coeffs["c"]
    return a * (log_flops ** 2) + b * log_flops + c


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    coeffs = COEFFS.get(group, GLOBAL_COEFFS)
    out: List[Dict[str, float]] = []
    for row in input_data:
        if "log_flops" not in row:
            raise KeyError("Each input item must include 'log_flops'.")
        x = float(row["log_flops"])  # computation budget in log-scale
        y_hat = _predict(x, coeffs)
        out.append({"brier_score": float(y_hat)})
    return out

#4 Run 4 R² = -1.000000

▼

Python

from __future__ import annotations
from typing import List, Dict

# Quadratic (vertex form) per-group parameters fitted on /app/data
# brier_score = A + B * (log_flops - C)**2
PARAMS: Dict[str, Dict[str, float]] = {
    "abstract_narrative_understanding": {"A": 7.970033279539732, "B": -0.001002095718967912, "C": 92.1703319153556},
    "analogical_similarity": {"A": -0.530418544889976, "B": -0.019175879672698435, "C": 0.7277707192544356},
    "arc": {"A": -0.013302783633511214, "B": -0.036868206393668744, "C": 1.5951344248085157},
    "arithmetic": {"A": -0.1409780699201859, "B": -0.12997814962868387, "C": 0.9054217906918345},
    "conceptual_combinations": {"A": -0.3764895974128952, "B": -0.07148356706471508, "C": 0.6779596990512687},
    "hellaswag": {"A": 0.0041864419532996605, "B": -0.033670645755682356, "C": 1.4560376278632394},
    "hindu_knowledge": {"A": -0.4032690843714171, "B": -0.034402388960081354, "C": -0.4526358705934933},
    "mmlu": {"A": -0.5667445812898321, "B": 0.011476264280523694, "C": 2.7435075277398155},
    "parsinlu_qa_mc": {"A": -0.39171748895915737, "B": -0.05656739537407183, "C": 0.8742300814130773},
}

# Pooled default for unseen groups
DEFAULT_PARAMS = {"A": -0.9443866011285821, "B": 0.002644673247271387, "C": -14.62856866125223}


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    params = PARAMS.get(group, DEFAULT_PARAMS)
    A = float(params["A"])  # intercept at optimum
    B = float(params["B"])  # curvature (U-shaped if B>0, inverted if B<0)
    C = float(params["C"])  # argmin/argmax (location of the vertex)

    out: List[Dict[str, float]] = []
    for row in input_data:
        x = float(row["log_flops"])  # required input
        y = A + B * (x - C) ** 2
        out.append({"brier_score": float(y)})
    return out

#5 Run 5 R² = -1.000000

▼

Python

from __future__ import annotations

import math
from typing import Dict, List

# Quadratic scaling law per group: y = A*x^2 + B*x + C, x = log_flops
# Coefficients were fit by least squares on the provided dataset.
COEFFICIENTS: Dict[str, Dict[str, float]] = {
    # Global fallback if an unknown group is provided
    "GLOBAL": {"A": 0.002644673247271387, "B": 0.07737556836857276, "C": -0.3784396938370407},
    # Per-group coefficients
    "abstract_narrative_understanding": {"A": -0.001002095718967912, "B": 0.18472699005645873, "C": -0.5431407140744655},
    "analogical_similarity": {"A": -0.019175879672698435, "B": 0.0279112874834725, "C": -0.5405750537735581},
    "arc": {"A": -0.036868206393668744, "B": 0.11761949039897288, "C": -0.1071122327154294},
    "arithmetic": {"A": -0.12997814962868387, "B": 0.23537009797522832, "C": -0.2475326777122078},
    "conceptual_combinations": {"A": -0.07148356706471508, "B": 0.09692595522861085, "C": -0.40934554313141813},
    "hellaswag": {"A": -0.033670645755682356, "B": 0.09805145434945438, "C": -0.06719686154646047},
    "hindu_knowledge": {"A": -0.034402388960081354, "B": -0.031143510554884814, "C": -0.4103174193780911},
    "mmlu": {"A": 0.011476264280523694, "B": -0.06297043488789662, "C": -0.480364650219835},
    "parsinlu_qa_mc": {"A": -0.05656739537407183, "B": 0.0989058373264011, "C": -0.43495071806820146},
}


def _predict_single(x: float, coefs: Dict[str, float]) -> float:
    A = coefs["A"]
    B = coefs["B"]
    C = coefs["C"]
    return A * (x ** 2) + B * x + C


def _extract_x(d: Dict[str, float]) -> float:
    if "log_flops" in d:
        return float(d["log_flops"])
    # Fallback: compute log10 if raw flops provided
    if "flops" in d and d["flops"] is not None and d["flops"] > 0:
        return math.log10(float(d["flops"]))
    raise KeyError("Input dict must contain 'log_flops' or positive 'flops'.")


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    coefs = COEFFICIENTS.get(group, COEFFICIENTS["GLOBAL"])  # fallback for unseen groups
    out: List[Dict[str, float]] = []
    for row in input_data:
        x = _extract_x(row)
        y_hat = _predict_single(x, coefs)
        out.append({"brier_score": float(y_hat)})
    return out