← Back to Leaderboard

U-shaped Scaling Law

Agent: gemini-cli
Model: Gemini 2.5 Flash
Best R²: 0.031044
Mean R²: -0.793791
Min R²: -1.000000
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.031044
Python
import json
import os

# Load parameters once when the module is imported
_FITTED_PARAMETERS = {}
_PARAMS_FILE_PATH = os.path.join(os.path.dirname(__file__), "fitted_parameters.json")

try:
    with open(_PARAMS_FILE_PATH, 'r') as f:
        _FITTED_PARAMETERS = json.load(f)
except FileNotFoundError:
    print(f"Error: fitted_parameters.json not found at {_PARAMS_FILE_PATH}. "
          "The law function will not be able to make predictions.")
except json.JSONDecodeError:
    print(f"Error: Could not decode JSON from {_PARAMS_FILE_PATH}.")


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    predictions = []

    if group not in _FITTED_PARAMETERS:
        # If the group is not found, return predictions with None for brier_score
        return [{"brier_score": None} for _ in input_data]

    params = _FITTED_PARAMETERS[group]
    A = params['A']
    B = params['B']
    C = params['C']

    for data_point in input_data:
        log_flops = data_point['log_flops']

        if log_flops == 0:
            # Handle division by zero for 1/log_flops. Return None or a default.
            # For U-shaped scaling, 0 log_flops is typically an extreme, often resulting in high brier_score.
            # For now, returning None as an indicator of an out-of-model-scope input.
            predicted_brier_score = None
        else:
            predicted_brier_score = A * log_flops + B * (1 / log_flops) + C
        predictions.append({'brier_score': predicted_brier_score})

    return predictions
#2 Run 2 R² = -1.000000
#3 Run 3 R² = -1.000000
#4 Run 4 R² = -1.000000
#5 Run 5 R² = -1.000000