SLD - U-shaped Scaling Law - claude-code + claude-sonnet-4-5

All Runs (sorted by R²)

Best Run 1 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group using quadratic model: brier_score = a*log_flops^2 + b*log_flops + c
    # These parameters were obtained by fitting the training data using scipy.optimize.curve_fit

    group_parameters = {
        'mmlu': {
            'a': 0.01147626,
            'b': -0.06297043,
            'c': -0.48036465
        },
        'parsinlu_qa_mc': {
            'a': -0.05656740,
            'b': 0.09890584,
            'c': -0.43495072
        },
        'arithmetic': {
            'a': -0.12997815,
            'b': 0.23537010,
            'c': -0.24753268
        },
        'hindu_knowledge': {
            'a': -0.03440239,
            'b': -0.03114351,
            'c': -0.41031742
        },
        'analogical_similarity': {
            'a': -0.01917588,
            'b': 0.02791129,
            'c': -0.54057506
        },
        'conceptual_combinations': {
            'a': -0.07148357,
            'b': 0.09692596,
            'c': -0.40934554
        },
        'hellaswag': {
            'a': -0.03367065,
            'b': 0.09805145,
            'c': -0.06719686
        },
        'arc': {
            'a': -0.03686821,
            'b': 0.11761949,
            'c': -0.10711223
        },
        'abstract_narrative_understanding': {
            'a': -0.00100210,
            'b': 0.18472699,
            'c': -0.54314071
        }
    }

    # Get parameters for the specified group
    if group not in group_parameters:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(group_parameters.keys())}")

    params = group_parameters[group]
    a, b, c = params['a'], params['b'], params['c']

    # Apply the quadratic scaling law to each input data point
    output_data = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Quadratic formula: brier_score = a * log_flops^2 + b * log_flops + c
        brier_score = a * (log_flops ** 2) + b * log_flops + c

        output_data.append({'brier_score': brier_score})

    return output_data

#2 Run 2 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group (quadratic model: a*x^2 + b*x + c)
    # These were determined by fitting to the training data
    params = {
        'abstract_narrative_understanding': {
            'a': -0.001002,
            'b': 0.184727,
            'c': -0.543141
        },
        'analogical_similarity': {
            'a': -0.019176,
            'b': 0.027911,
            'c': -0.540575
        },
        'arc': {
            'a': -0.036868,
            'b': 0.117619,
            'c': -0.107112
        },
        'arithmetic': {
            'a': -0.129978,
            'b': 0.235370,
            'c': -0.247533
        },
        'conceptual_combinations': {
            'a': -0.071484,
            'b': 0.096926,
            'c': -0.409346
        },
        'hellaswag': {
            'a': -0.033671,
            'b': 0.098051,
            'c': -0.067197
        },
        'hindu_knowledge': {
            'a': -0.034402,
            'b': -0.031144,
            'c': -0.410317
        },
        'mmlu': {
            'a': 0.011476,
            'b': -0.062970,
            'c': -0.480365
        },
        'parsinlu_qa_mc': {
            'a': -0.056567,
            'b': 0.098906,
            'c': -0.434951
        }
    }

    # Get parameters for the specified group
    if group not in params:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(params.keys())}")

    group_params = params[group]
    a = group_params['a']
    b = group_params['b']
    c = group_params['c']

    # Apply the quadratic scaling law to each input data point
    results = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Quadratic scaling law: brier_score = a * log_flops^2 + b * log_flops + c
        predicted_brier_score = a * log_flops**2 + b * log_flops + c

        results.append({'brier_score': predicted_brier_score})

    return results

#3 Run 3 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group using quadratic model: y = a*x^2 + b*x + c
    # where x = log_flops and y = brier_score
    parameters = {
        'mmlu': {
            'a': 0.01147626,
            'b': -0.06297043,
            'c': -0.48036465
        },
        'parsinlu_qa_mc': {
            'a': -0.05656740,
            'b': 0.09890584,
            'c': -0.43495072
        },
        'arithmetic': {
            'a': -0.12997815,
            'b': 0.23537010,
            'c': -0.24753268
        },
        'hindu_knowledge': {
            'a': -0.03440239,
            'b': -0.03114351,
            'c': -0.41031742
        },
        'analogical_similarity': {
            'a': -0.01917588,
            'b': 0.02791129,
            'c': -0.54057506
        },
        'conceptual_combinations': {
            'a': -0.07148357,
            'b': 0.09692596,
            'c': -0.40934554
        },
        'hellaswag': {
            'a': -0.03367065,
            'b': 0.09805145,
            'c': -0.06719686
        },
        'arc': {
            'a': -0.03686821,
            'b': 0.11761949,
            'c': -0.10711223
        },
        'abstract_narrative_understanding': {
            'a': -0.00100210,
            'b': 0.18472699,
            'c': -0.54314071
        }
    }

    # Get parameters for the specified group
    if group not in parameters:
        raise ValueError(f"Unknown group: {group}. Available groups: {list(parameters.keys())}")

    params = parameters[group]
    a = params['a']
    b = params['b']
    c = params['c']

    # Apply the quadratic scaling law to each input data point
    output_data = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Quadratic model: brier_score = a * log_flops^2 + b * log_flops + c
        brier_score = a * log_flops**2 + b * log_flops + c

        output_data.append({'brier_score': brier_score})

    return output_data

#4 Run 4 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters for each group (quadratic model: y = a + b*x + c*x^2)
    # where x = log_flops and y = brier_score
    group_parameters = {
        'mmlu': {
            'a': -0.4804,
            'b': -0.0630,
            'c': 0.0115
        },
        'parsinlu_qa_mc': {
            'a': -0.4350,
            'b': 0.0989,
            'c': -0.0566
        },
        'arithmetic': {
            'a': -0.2475,
            'b': 0.2354,
            'c': -0.1300
        },
        'hindu_knowledge': {
            'a': -0.4103,
            'b': -0.0311,
            'c': -0.0344
        },
        'analogical_similarity': {
            'a': -0.5406,
            'b': 0.0279,
            'c': -0.0192
        },
        'conceptual_combinations': {
            'a': -0.4093,
            'b': 0.0969,
            'c': -0.0715
        },
        'hellaswag': {
            'a': -0.0672,
            'b': 0.0981,
            'c': -0.0337
        },
        'arc': {
            'a': -0.1071,
            'b': 0.1176,
            'c': -0.0369
        },
        'abstract_narrative_understanding': {
            'a': -0.5431,
            'b': 0.1847,
            'c': -0.0010
        }
    }

    # Get parameters for the specified group
    if group not in group_parameters:
        raise ValueError(f"Unknown group: {group}")

    params = group_parameters[group]
    a = params['a']
    b = params['b']
    c = params['c']

    # Apply the quadratic scaling law to each data point
    predictions = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Quadratic formula: brier_score = a + b * log_flops + c * log_flops^2
        brier_score = a + b * log_flops + c * log_flops**2

        predictions.append({'brier_score': brier_score})

    return predictions

#5 Run 5 R² = -1.000000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # Quadratic scaling law: brier_score = a + b * log_flops + c * log_flops^2
    # This models U-shaped scaling where performance changes non-linearly with compute

    # Parameters fitted for each group
    parameters = {
        'abstract_narrative_understanding': {
            'a': -0.543141,
            'b': 0.184727,
            'c': -0.001002
        },
        'analogical_similarity': {
            'a': -0.540575,
            'b': 0.027911,
            'c': -0.019176
        },
        'arc': {
            'a': -0.107112,
            'b': 0.117619,
            'c': -0.036868
        },
        'arithmetic': {
            'a': -0.247533,
            'b': 0.235370,
            'c': -0.129978
        },
        'conceptual_combinations': {
            'a': -0.409346,
            'b': 0.096926,
            'c': -0.071484
        },
        'hellaswag': {
            'a': -0.067197,
            'b': 0.098051,
            'c': -0.033671
        },
        'hindu_knowledge': {
            'a': -0.410317,
            'b': -0.031144,
            'c': -0.034402
        },
        'mmlu': {
            'a': -0.480365,
            'b': -0.062970,
            'c': 0.011476
        },
        'parsinlu_qa_mc': {
            'a': -0.434951,
            'b': 0.098906,
            'c': -0.056567
        }
    }

    # Get parameters for the specified group
    if group not in parameters:
        raise ValueError(f"Unknown group: {group}. Known groups: {list(parameters.keys())}")

    params = parameters[group]
    a = params['a']
    b = params['b']
    c = params['c']

    # Apply the scaling law to each input data point
    output_data = []
    for data_point in input_data:
        log_flops = data_point['log_flops']

        # Calculate brier_score using quadratic formula
        brier_score = a + b * log_flops + c * log_flops ** 2

        output_data.append({'brier_score': brier_score})

    return output_data