SLD - U-shaped Scaling Law - claude-code + claude-haiku-4-5

All Runs (sorted by R²)

Best Run 1 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law is a quadratic function: y = a*x² + b*x + c
    where x is log_flops and y is brier_score.

    Different groups have different coefficients but the same functional form.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group
    # y = a*x² + b*x + c
    group_params = {
        'abstract_narrative_understanding': {
            'a': -0.00100210,
            'b': 0.18472699,
            'c': -0.54314071
        },
        'analogical_similarity': {
            'a': -0.01917588,
            'b': 0.02791129,
            'c': -0.54057506
        },
        'arc': {
            'a': -0.03686821,
            'b': 0.11761949,
            'c': -0.10711223
        },
        'arithmetic': {
            'a': -0.12997815,
            'b': 0.23537010,
            'c': -0.24753268
        },
        'conceptual_combinations': {
            'a': -0.07148357,
            'b': 0.09692596,
            'c': -0.40934554
        },
        'hellaswag': {
            'a': -0.03367065,
            'b': 0.09805145,
            'c': -0.06719686
        },
        'hindu_knowledge': {
            'a': -0.03440239,
            'b': -0.03114351,
            'c': -0.41031742
        },
        'mmlu': {
            'a': 0.01147626,
            'b': -0.06297043,
            'c': -0.48036465
        },
        'parsinlu_qa_mc': {
            'a': -0.05656740,
            'b': 0.09890584,
            'c': -0.43495072
        }
    }

    # Get the parameters for the requested group
    if group not in group_params:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(group_params.keys())}")

    params = group_params[group]
    a = params['a']
    b = params['b']
    c = params['c']

    # Apply the quadratic model to each input point
    results = []
    for point in input_data:
        x = point['log_flops']

        # Calculate prediction using quadratic formula
        y_pred = a * (x ** 2) + b * x + c

        results.append({
            'brier_score': y_pred
        })

    return results

#2 Run 2 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The discovered scaling law is a cubic polynomial:
    brier_score = a + b*log_flops + c*log_flops^2 + d*log_flops^3

    Parameters differ for each experimental group.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected to contain 'log_flops' key.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable 'brier_score'.
    """

    # Fitted parameters for each group: [a, b, c, d]
    # Model: brier_score = a + b*x + c*x^2 + d*x^3, where x = log_flops
    group_params = {
        'mmlu': [-0.46800858115028976, -0.09051398063175257, -0.021489975492820463, 0.038972564334377374],
        'parsinlu_qa_mc': [-0.42695972888959444, 0.09922397715919735, -0.07412182634871955, 0.007862320749733887],
        'arithmetic': [-0.19604559143901135, 0.19459746009506773, -0.2546595928062467, 0.0821918665299763],
        'hindu_knowledge': [-0.3996961291750994, -0.13304218476057397, -0.044744391152483166, 0.17732908490551472],
        'analogical_similarity': [-0.5304705634090436, 0.022810292626678275, -0.04236743730754639, 0.013462997120128843],
        'conceptual_combinations': [-0.4078801162832065, 0.0937775827548764, -0.07532025741514908, 0.004479619863008543],
        'hellaswag': [-0.05190495110743704, 0.0986602590963027, -0.06726358022494805, 0.015045684664276419],
        'arc': [-0.08891677759778377, 0.11834389177138817, -0.0768395846098985, 0.0179024774544576],
        'abstract_narrative_understanding': [-0.5499928694872032, 0.18445421113956606, 0.014050582245724086, -0.006741835673370389],
    }

    # Get parameters for the specified group
    if group not in group_params:
        raise ValueError(f"Unknown group: {group}. Supported groups: {list(group_params.keys())}")

    params = group_params[group]
    a, b, c, d = params

    # Make predictions for each input point
    results = []
    for point in input_data:
        x = point['log_flops']
        # Cubic polynomial: y = a + b*x + c*x^2 + d*x^3
        brier_score = a + b*x + c*(x**2) + d*(x**3)
        results.append({'brier_score': brier_score})

    return results

#3 Run 3 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law is a fourth-degree polynomial model:
    brier_score = a*x^4 + b*x^3 + c*x^2 + d*x + e

    where x = log_flops and the coefficients a, b, c, d, e are group-specific.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected to contain 'log_flops'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group (fourth-degree polynomial coefficients)
    # Format: coefficients [a, b, c, d, e] for equation: a*x^4 + b*x^3 + c*x^2 + d*x + e
    group_params = {
        'abstract_narrative_understanding': {
            'a': 0.00297357,
            'b': -0.01574499,
            'c': 0.01733127,
            'd': 0.19073961,
            'e': -0.55204005
        },
        'analogical_similarity': {
            'a': -0.02879407,
            'b': 0.07981207,
            'c': -0.04569143,
            'd': -0.02378954,
            'e': -0.52241379
        },
        'arc': {
            'a': 0.00112476,
            'b': 0.01449702,
            'c': -0.07559866,
            'd': 0.12072136,
            'e': -0.08969112
        },
        'arithmetic': {
            'a': -0.15560601,
            'b': 0.40313018,
            'c': -0.23428989,
            'd': -0.02766946,
            'e': -0.16636060
        },
        'conceptual_combinations': {
            'a': -0.09769580,
            'b': 0.11494576,
            'c': -0.00120401,
            'd': 0.02484563,
            'e': -0.41118812
        },
        'hellaswag': {
            'a': 0.00058158,
            'b': 0.01328482,
            'c': -0.06662193,
            'd': 0.09988958,
            'e': -0.05230534
        },
        'hindu_knowledge': {
            'a': 0.03736588,
            'b': 0.18072138,
            'c': -0.07440077,
            'd': -0.13412345,
            'e': -0.39668110
        },
        'mmlu': {
            'a': 0.01953715,
            'b': 0.01668490,
            'c': -0.03592157,
            'd': -0.07667280,
            'e': -0.46762245
        },
        'parsinlu_qa_mc': {
            'a': 0.00604901,
            'b': -0.01045242,
            'c': -0.06744806,
            'd': 0.11201012,
            'e': -0.43112421
        }
    }

    # Get parameters for the specified group
    if group not in group_params:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(group_params.keys())}")

    params = group_params[group]
    a, b, c, d, e = params['a'], params['b'], params['c'], params['d'], params['e']

    # Make predictions for each data point
    results = []
    for data_point in input_data:
        x = data_point['log_flops']

        # Apply the fourth-degree polynomial
        brier_score = a * (x ** 4) + b * (x ** 3) + c * (x ** 2) + d * x + e

        results.append({'brier_score': brier_score})

    return results

#4 Run 4 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law is a quadratic function: y = a + b*x + c*x^2
    where x = log_flops and y = brier_score

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Parameters fitted for each group
    # Quadratic model: y = a + b*x + c*x^2
    group_params = {
        'abstract_narrative_understanding': {'a': -0.543141, 'b': 0.184727, 'c': -0.001002},
        'analogical_similarity': {'a': -0.540575, 'b': 0.027911, 'c': -0.019176},
        'arc': {'a': -0.107112, 'b': 0.117619, 'c': -0.036868},
        'arithmetic': {'a': -0.247533, 'b': 0.235370, 'c': -0.129978},
        'conceptual_combinations': {'a': -0.409346, 'b': 0.096926, 'c': -0.071484},
        'hellaswag': {'a': -0.067197, 'b': 0.098051, 'c': -0.033671},
        'hindu_knowledge': {'a': -0.410317, 'b': -0.031144, 'c': -0.034402},
        'mmlu': {'a': -0.480365, 'b': -0.062970, 'c': 0.011476},
        'parsinlu_qa_mc': {'a': -0.434951, 'b': 0.098906, 'c': -0.056567},
    }

    if group not in group_params:
        raise ValueError(f"Unknown group: {group}")

    params = group_params[group]
    a = params['a']
    b = params['b']
    c = params['c']

    results = []
    for data_point in input_data:
        x = data_point.get('log_flops')
        if x is None:
            raise ValueError(f"Missing 'log_flops' in input data: {data_point}")

        # Apply the quadratic model
        y_pred = a + b * x + c * (x ** 2)

        results.append({'brier_score': y_pred})

    return results

#5 Run 5 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law is a quadratic (U-shaped) function:
        brier_score = a * (log_flops)^2 + b * (log_flops) + c

    Group-specific coefficients are used to capture differences in task difficulty
    and other group-specific factors.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Group-specific coefficients for the quadratic scaling law
    # Fitted using polynomial regression on the experimental dataset
    coefficients = {
        'abstract_narrative_understanding': {'a': -0.00100210, 'b': 0.18472699, 'c': -0.54314071},
        'analogical_similarity': {'a': -0.01917588, 'b': 0.02791129, 'c': -0.54057505},
        'arc': {'a': -0.03686821, 'b': 0.11761949, 'c': -0.10711223},
        'arithmetic': {'a': -0.12997815, 'b': 0.23537010, 'c': -0.24753268},
        'conceptual_combinations': {'a': -0.07148357, 'b': 0.09692596, 'c': -0.40934554},
        'hellaswag': {'a': -0.03367065, 'b': 0.09805145, 'c': -0.06719686},
        'hindu_knowledge': {'a': -0.03440239, 'b': -0.03114351, 'c': -0.41031742},
        'mmlu': {'a': 0.01147626, 'b': -0.06297043, 'c': -0.48036465},
        'parsinlu_qa_mc': {'a': -0.05656740, 'b': 0.09890584, 'c': -0.43495072},
    }

    # Get coefficients for the specified group
    if group not in coefficients:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(coefficients.keys())}")

    coeff = coefficients[group]
    a, b, c = coeff['a'], coeff['b'], coeff['c']

    # Apply the scaling law to each input data point
    results = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Compute prediction using quadratic formula: y = a*x^2 + b*x + c
        brier_score = a * (log_flops ** 2) + b * log_flops + c

        results.append({'brier_score': brier_score})

    return results