SLD - U-shaped Scaling Law - codex + o4-mini

Best Run 1 R² = -1.000000

▼

Python

from typing import List, Dict

# Quadratic coefficients per group: brier_score = a * x^2 + b * x + c
_COEFS: Dict[str, tuple[float, float, float]] = {
    'mmlu': (0.011476264280523694, -0.06297043488789662, -0.480364650219835),
    'parsinlu_qa_mc': (-0.05656739537407183, 0.0989058373264011, -0.43495071806820146),
    'arithmetic': (-0.12997814962868387, 0.23537009797522832, -0.2475326777122078),
    'hindu_knowledge': (-0.034402388960081354, -0.031143510554884814, -0.4103174193780911),
    'analogical_similarity': (-0.019175879672698435, 0.0279112874834725, -0.5405750537735581),
    'conceptual_combinations': (-0.07148356706471508, 0.09692595522861085, -0.40934554313141813),
    'hellaswag': (-0.033670645755682356, 0.09805145434945438, -0.06719686154646047),
    'arc': (-0.036868206393668744, 0.11761949039897288, -0.1071122327154294),
    'abstract_narrative_understanding': (-0.001002095718967912, 0.18472699005645873, -0.5431407140744655),
}

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
               The functional form of the law is the same for all groups,
               but constant parameters/coefficients may differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # Retrieve group-specific coefficients
    try:
        a, b, c = _COEFS[group]
    except KeyError:
        raise ValueError(f"Unknown group: {group}")
    # Compute predictions
    predictions: list[dict[str, float]] = []
    for point in input_data:
        x = float(point['log_flops'])
        y = a * x * x + b * x + c
        predictions.append({'brier_score': y})
    return predictions

#2 Run 2 R² = -1.000000

▼

Python

from typing import List, Dict

# Pre-fitted quadratic coefficients per experimental group
# brier_score = a * x^2 + b * x + c, where x = log_flops
_COEFFS: Dict[str, Dict[str, float]] = {
    'abstract_narrative_understanding': {'a': -0.001002, 'b': 0.184727, 'c': -0.543141},
    'analogical_similarity':         {'a': -0.019176, 'b': 0.027911, 'c': -0.540575},
    'conceptual_combinations':       {'a': -0.071484, 'b': 0.096926, 'c': -0.409346},
    'mmlu':                          {'a':  0.011476, 'b': -0.062970, 'c': -0.480365},
    'arithmetic':                    {'a': -0.129978, 'b': 0.235370, 'c': -0.247533},
    'arc':                           {'a': -0.036868, 'b': 0.117619, 'c': -0.107112},
    'parsinlu_qa_mc':                {'a': -0.056567, 'b': 0.098906, 'c': -0.434951},
    'hellaswag':                     {'a': -0.033671, 'b': 0.098051, 'c': -0.067197},
    'hindu_knowledge':               {'a': -0.034402, 'b': -0.031144, 'c': -0.410317},
}

def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts brier_score based on log_flops for a given experimental group.

    Args:
        input_data: List of dicts containing at least 'log_flops'.
        group: Experimental group name. Must be one of the pre-fitted groups.

    Returns:
        List of dicts with key 'brier_score' for each input point.
    """
    if group not in _COEFFS:
        raise ValueError(f"Unknown group '{group}'. Available groups: {list(_COEFFS.keys())}")
    coeffs = _COEFFS[group]
    a = coeffs['a']
    b = coeffs['b']
    c = coeffs['c']
    results: List[Dict[str, float]] = []
    for point in input_data:
        x = point.get('log_flops')
        if x is None:
            raise KeyError("Each input_data point must contain 'log_flops'.")
        y = a * x * x + b * x + c
        results.append({'brier_score': y})
    return results

#3 Run 3 R² = -1.000000

▼

Python

from typing import List, Dict

def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
               The functional form of the law is the same for all groups,
               but the coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s) under the key 'brier_score'.
    """
    # Coefficients for each group: (a, b, c) for a*x^2 + b*x + c
    params = {
        'mmlu': (0.011476264280523694, -0.06297043488789662, -0.480364650219835),
        'arithmetic': (-0.12997814962868387, 0.23537009797522832, -0.2475326777122078),
        'abstract_narrative_understanding': (-0.001002095718967912, 0.18472699005645873, -0.5431407140744655),
        'conceptual_combinations': (-0.07148356706471508, 0.09692595522861085, -0.40934554313141813),
        'hellaswag': (-0.033670645755682356, 0.09805145434945438, -0.06719686154646047),
        'parsinlu_qa_mc': (-0.05656739537407183, 0.0989058373264011, -0.43495071806820146),
        'arc': (-0.036868206393668744, 0.11761949039897288, -0.1071122327154294),
        'analogical_similarity': (-0.019175879672698435, 0.0279112874834725, -0.5405750537735581),
        'hindu_knowledge': (-0.034402388960081354, -0.031143510554884814, -0.4103174193780911),
    }
    if group not in params:
        raise ValueError(f"Unknown group: {group}")
    a, b, c = params[group]
    predictions: List[Dict[str, float]] = []
    for data in input_data:
        x = data.get('log_flops')
        # Compute quadratic prediction
        y = a * x**2 + b * x + c
        predictions.append({'brier_score': y})
    return predictions

#4 Run 4 R² = -1.000000

▼

Python

from typing import List, Dict

"""
Module implementing the discovered scaling law for U-shaped performance curves.
"""

# Coefficients for each experimental group: (a, b, c) in y = a*x^2 + b*x + c
COEFFS: Dict[str, List[float]] = {
    "abstract_narrative_understanding": [-0.001002095718967912, 0.18472699005645873, -0.5431407140744655],
    "analogical_similarity":            [-0.019175879672698435, 0.0279112874834725,  -0.5405750537735581],
    "arc":                             [-0.036868206393668744, 0.11761949039897288, -0.1071122327154294],
    "arithmetic":                      [-0.12997814962868387,  0.23537009797522832, -0.2475326777122078],
    "conceptual_combinations":         [-0.07148356706471508,  0.09692595522861085, -0.40934554313141813],
    "hellaswag":                       [-0.033670645755682356, 0.09805145434945438, -0.06719686154646047],
    "hindu_knowledge":                 [-0.034402388960081354,-0.031143510554884814,-0.4103174193780911],
    "mmlu":                            [0.011476264280523694, -0.06297043488789662, -0.480364650219835],
    "parsinlu_qa_mc":                  [-0.05656739537407183,  0.0989058373264011,  -0.43495071806820146],
}

def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts brier_score based on log_flops using a quadratic scaling law.

    Args:
        input_data: A list of dicts each containing 'log_flops'.
        group: The experimental group name, selecting its coefficients.

    Returns:
        A list of dicts with key 'brier_score' and the predicted value.
    """
    if group not in COEFFS:
        raise ValueError(f"Unknown group: {group}")
    a, b, c = COEFFS[group]
    predictions: List[Dict[str, float]] = []
    for entry in input_data:
        if 'log_flops' not in entry:
            raise KeyError("Each input_data entry must contain 'log_flops'.")
        x = entry['log_flops']
        y = a * x * x + b * x + c
        predictions.append({'brier_score': y})
    return predictions

#5 Run 5 R² = -1.000000

▼

U-shaped Scaling Law

All Runs (sorted by R²)