SLD - Parallel Scaling Law - gemini-cli + Gemini 2.5 Flash

Best Run 1 R² = 0.999575

▼

Python

import math

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    group_parameters = {
        "stack": {
            "K": 4.4301,
            "a": -0.0664,
            "b": -0.0221
        },
        "pile": {
            "K": 7.6843,
            "a": -0.0645,
            "b": -0.0189
        }
    }

    if group not in group_parameters:
        raise ValueError(f"Unknown group: {group}. Supported groups are {list(group_parameters.keys())}")

    params = group_parameters[group]
    K = params["K"]
    a = params["a"]
    b = params["b"]

    predictions = []
    for data_point in input_data:
        num_params = data_point.get("num_params")
        parallel_size = data_point.get("parallel_size")

        if num_params is None or parallel_size is None:
            raise ValueError("Input data point must contain 'num_params' and 'parallel_size'.")

        # Ensure num_params and parallel_size are positive for log transformation
        if num_params <= 0 or parallel_size <= 0:
            # Handle non-positive inputs gracefully, perhaps by returning NaN or raising a specific error
            # For now, let's raise an error as the law is based on log-transformation
            raise ValueError("Input values 'num_params' and 'parallel_size' must be positive.")

        predicted_loss = K * (num_params ** a) * (parallel_size ** b)
        predictions.append({"loss": predicted_loss})

    return predictions

#2 Run 2 R² = 0.999572

▼

#3 Run 3 R² = 0.999568

▼

Python

import json
import os

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    
    # Load fitted parameters from the JSON file
    # Assuming fitted_params.json is in the same directory as law.py for deployment or accessible path
    script_dir = os.path.dirname(__file__)
    params_file_path = os.path.join(script_dir, "fitted_params.json")
    
    # If the script is run from /app, and fitted_params.json is in /app
    if not os.path.exists(params_file_path):
        params_file_path = "/app/fitted_params.json" # Fallback for current execution context

    with open(params_file_path, 'r') as f:
        fitted_params = json.load(f)

    if group not in fitted_params:
        raise ValueError(f"No fitted parameters found for group: {group}")

    group_params = fitted_params[group]
    C = group_params['C']
    alpha = group_params['alpha']
    beta = group_params['beta']

    predictions = []
    for data_point in input_data:
        num_params = data_point['num_params']
        parallel_size = data_point['parallel_size']
        
        # Applying the discovered scaling law: L = C * N^alpha * P^beta
        predicted_loss = C * (num_params ** alpha) * (parallel_size ** beta)
        predictions.append({'loss': predicted_loss})

    return predictions

#4 Run 4 R² = -1.000000

▼

Python

import json
import os

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # Load fitted parameters
    # Assuming fitted_params.json is in the same directory as law.py
    script_dir = os.path.dirname(__file__)
    params_file_path = os.path.join(script_dir, 'fitted_params.json')
    
    # If law.py is in /app and fitted_params.json is also in /app
    # then the path will be /app/fitted_params.json
    params_file_path = '/app/fitted_params.json' 

    with open(params_file_path, 'r') as f:
        fitted_params = json.load(f)

    if group not in fitted_params:
        raise ValueError(f"No fitted parameters found for group: {group}")

    group_params = fitted_params[group]
    A = group_params['A']
    B = group_params['B']
    D = group_params['D']
    C = group_params['C']

    predictions = []
    for data_point in input_data:
        num_params = data_point['num_params']
        parallel_size = data_point['parallel_size']

        # Apply the scaling law: loss = A * (num_params ** B) * (parallel_size ** D) + C
        predicted_loss = A * (num_params ** B) * (parallel_size ** D) + C
        predictions.append({'loss': predicted_loss})

    return predictions

Parallel Scaling Law

All Runs (sorted by R²)