SLD - Domain Mixture Scaling Law - gemini-cli + Gemini 2.5 Flash

All Runs (sorted by R²)

Best Run 1 R² = 0.968329

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted parameters (A, B, C) for each group and domain
    # Structure: fitted_params[group][domain_key] = {'A': A_val, 'B': B_val, 'C': C_val}
    fitted_params = {
        '70M': {
            'domain_1': {'A': 0.0488, 'B': 0.0576, 'C': 2.5662},
            'domain_2': {'A': 0.0141, 'B': 0.0636, 'C': 3.5963},
            'domain_3': {'A': 0.0084, 'B': 0.0155, 'C': 3.0578},
            'domain_4': {'A': 0.0288, 'B': 0.0377, 'C': 1.5025},
            'domain_5': {'A': 0.1127, 'B': 0.1952, 'C': 3.3529},
        },
        '160M': {
            'domain_1': {'A': 0.0402, 'B': 0.0519, 'C': 2.2834},
            'domain_2': {'A': 0.0083, 'B': 0.0445, 'C': 3.2866},
            'domain_3': {'A': 0.0073, 'B': 0.0143, 'C': 2.7768},
            'domain_4': {'A': 0.0255, 'B': 0.0375, 'C': 1.2831},
            'domain_5': {'A': 0.1205, 'B': 0.2034, 'C': 2.9952},
        },
        '305M': {
            'domain_1': {'A': 0.0374, 'B': 0.0498, 'C': 2.1469},
            'domain_2': {'A': 0.0097, 'B': 0.0528, 'C': 3.1226},
            'domain_3': {'A': 0.0059, 'B': 0.0117, 'C': 2.6482},
            'domain_4': {'A': 0.0240, 'B': 0.0370, 'C': 1.1838},
            'domain_5': {'A': 0.1097, 'B': 0.1856, 'C': 2.8383},
        },
        '410M': {
            'domain_1': {'A': 0.0350, 'B': 0.0476, 'C': 2.0943},
            'domain_2': {'A': 0.0057, 'B': 0.0351, 'C': 3.0684},
            'domain_3': {'A': 0.0059, 'B': 0.0115, 'C': 2.5829},
            'domain_4': {'A': 0.0241, 'B': 0.0379, 'C': 1.1439},
            'domain_5': {'A': 0.1109, 'B': 0.1828, 'C': 2.7604},
        },
    }

    predictions = []

    for data_point in input_data:
        predicted_losses = {}
        for i in range(1, 6):
            prop_key = f'proportion_domain_{i}'
            loss_key = f'loss_domain_{i}'
            domain_key = f'domain_{i}'

            if prop_key in data_point and group in fitted_params and domain_key in fitted_params[group]:
                proportion = data_point[prop_key]
                params = fitted_params[group][domain_key]
                A, B, C = params['A'], params['B'], params['C']

                # Apply the scaling law: Loss = C + A / (Proportion + B)
                # Ensure Proportion + B is not zero; B is fitted to be positive, so this should be safe.
                predicted_loss = C + A / (proportion + B)
                predicted_losses[loss_key] = predicted_loss
            else:
                # If proportion data is missing or group/domain parameters are not found,
                # we cannot make a prediction for this loss.
                predicted_losses[loss_key] = float('nan') # or raise an error, or a default value

        predictions.append(predicted_losses)

    return predictions

#2 Run 2 R² = 0.968235

▼

Python

import json
import os

# Load the fitted parameters from the JSON file
def load_fitted_params():
    try:
        # Assuming fitted_params.json is in the same directory as law.py or /app
        # During testing, the law.py might be in a different context, so we try to be robust.
        current_dir = os.path.dirname(__file__)
        param_path_local = os.path.join(current_dir, 'fitted_params.json')
        param_path_app = os.path.join('/app', 'fitted_params.json')

        if os.path.exists(param_path_local):
            with open(param_path_local, 'r') as f:
                return json.load(f)
        elif os.path.exists(param_path_app):
            with open(param_path_app, 'r') as f:
                return json.load(f)
        else:
            raise FileNotFoundError("fitted_params.json not found in current directory or /app.")
    except Exception as e:
        print(f"Error loading fitted parameters: {e}")
        # Fallback to empty parameters if loading fails, or raise a more specific error
        return {}

FITTED_PARAMS = load_fitted_params()

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    
    predictions = []
    group_params = FITTED_PARAMS.get(group)

    if not group_params:
        # Handle cases where the group is not found in the fitted parameters.
        # This could return default values, raise an error, or return None/NaN.
        # For now, let's return None for all predictions.
        print(f"Warning: Group '{group}' not found in fitted parameters. Returning None for losses.")
        for _ in input_data:
            predicted_losses = {}
            for i in range(1, 6):
                predicted_losses[f'loss_domain_{i}'] = None
            predictions.append(predicted_losses)
        return predictions

    for data_point in input_data:
        predicted_losses = {}
        for i in range(1, 6):
            proportion_key = f'proportion_domain_{i}'
            loss_key = f'loss_domain_{i}'
            
            proportion = data_point.get(proportion_key)
            domain_params = group_params.get(f'domain_{i}')

            if proportion is None or domain_params is None or any(p is None for p in domain_params.values()):
                predicted_losses[loss_key] = None
                continue
            
            A = domain_params['A']
            k = domain_params['k']
            B = domain_params['B']
            
            # Apply the scaling law: loss = A / (proportion + k) + B
            # Ensure k is not extremely small if proportion is 0 or very small to avoid overflow
            # We've bounded k to be >= 0.0001 during fitting.
            predicted_loss = A / (proportion + k) + B
            predicted_losses[loss_key] = predicted_loss
        predictions.append(predicted_losses)
        
    return predictions

#3 Run 3 R² = 0.902224

▼

Python

import json
import numpy as np

# Global variable to store loaded parameters
_FITTED_PARAMETERS = None

def _load_parameters():
    global _FITTED_PARAMETERS
    if _FITTED_PARAMETERS is None:
        try:
            with open('/app/fitted_parameters.json', 'r') as f:
                _FITTED_PARAMETERS = json.load(f)
        except FileNotFoundError:
            print("Error: fitted_parameters.json not found. Please run analyze_data.py first.")
            _FITTED_PARAMETERS = {}
    return _FITTED_PARAMETERS

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    params = _load_parameters()

    if group not in params:
        # Fallback if group not found, e.g., use a default group or raise an error.
        # For now, let's return an empty prediction or average parameters.
        # The problem statement implies group will be one of the known groups.
        # If unseen group, this would be an extrapolation case.
        print(f"Warning: Group '{group}' not found in fitted parameters. Cannot make predictions.")
        return [{'loss_domain_1': 0.0, 'loss_domain_2': 0.0, 'loss_domain_3': 0.0, 'loss_domain_4': 0.0, 'loss_domain_5': 0.0} for _ in input_data]

    group_params = params[group]
    predictions = []

    for data_point in input_data:
        predicted_losses = {}
        for i in range(1, 6):
            proportion_key = f'proportion_domain_{i}'
            loss_key = f'loss_domain_{i}'
            domain_key = f'domain_{i}'

            if proportion_key not in data_point:
                predicted_losses[loss_key] = 0.0 # Or some default/error handling
                continue

            proportion = data_point[proportion_key]

            if domain_key in group_params:
                A = group_params[domain_key]['A']
                B = group_params[domain_key]['B']
                
                # Apply the logarithmic scaling law
                # Use 1e-6 as epsilon for proportions close to zero or exactly zero
                predicted_loss = A - B * np.log(proportion + 1e-6)
                predicted_losses[loss_key] = predicted_loss
            else:
                predicted_losses[loss_key] = 0.0 # Fallback if domain parameters not found
        predictions.append(predicted_losses)

    return predictions

#4 Run 4 R² = 0.809195

▼

Python

import numpy as np

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    
    # Fitted parameters (A and B) for each group and domain
    fitted_parameters = {
        "70M": {
            "domain_1": {"A": 2.7954155842226505, "B": 6.194945290313312e-07},
            "domain_2": {"A": 3.666992583441158, "B": 1.5143629378910318e-07},
            "domain_3": {"A": 3.1770876336775062, "B": 4.2355238205837483e-07},
            "domain_4": {"A": 1.6957346873000834, "B": 5.706010522162532e-07},
            "domain_5": {"A": 3.5171284546533235, "B": 4.146173044780857e-07},
        },
        "160M": {
            "domain_1": {"A": 2.4794515045367107, "B": 5.809575446413638e-07},
            "domain_2": {"A": 3.3325754438215687, "B": 1.3956202936091392e-07},
            "domain_3": {"A": 2.8819574855777637, "B": 4.035983480572172e-07},
            "domain_4": {"A": 1.4542888945325934, "B": 5.087695900137449e-07},
            "domain_5": {"A": 3.168009255701514, "B": 4.2690655326847836e-07},
        },
        "305M": {
            "domain_1": {"A": 2.331439324565376, "B": 5.665932970655655e-07},
            "domain_2": {"A": 3.174052635323413, "B": 1.3213118365722513e-07},
            "domain_3": {"A": 2.737698767011762, "B": 4.179249834595783e-07},
            "domain_4": {"A": 1.3457382302771388, "B": 4.872370934063391e-07},
            "domain_5": {"A": 3.000827223421394, "B": 4.3358908846628955e-07},
        },
        "410M": {
            "domain_1": {"A": 2.269862140426057, "B": 5.620207432145631e-07},
            "domain_2": {"A": 3.101893008310117, "B": 1.2838329492243058e-07},
            "domain_3": {"A": 2.6724486393426763, "B": 4.2580340959834756e-07},
            "domain_4": {"A": 1.3053577148261022, "B": 4.74009961564549e-07},
            "domain_5": {"A": 2.925409380072483, "B": 4.492046744951497e-07},
        },
    }

    epsilon = 1e-6
    predictions = []

    if group not in fitted_parameters:
        raise ValueError(f"Unknown group: {group}")

    for data_point in input_data:
        predicted_losses = {}
        for i in range(1, 6): # Domains 1 to 5
            proportion_key = f'proportion_domain_{i}'
            loss_key = f'loss_domain_{i}'
            
            if proportion_key not in data_point:
                # If a proportion is missing, we cannot predict its loss.
                # Or, based on problem, assume it's 0 if not present, or raise error.
                # For now, let's assume all proportions are present.
                # If a proportion for a domain is 0 and it's explicitly in input_data,
                # the epsilon handles it.
                continue 

            proportion = data_point[proportion_key]
            
            domain_params = fitted_parameters[group][f'domain_{i}']
            A = domain_params['A']
            B = domain_params['B']
            
            # Apply the scaling law
            # Using np.maximum to prevent division by an extremely small or zero value, consistent with fitting.
            predicted_loss = A + B / np.maximum(proportion + epsilon, 1e-10)
            predicted_losses[loss_key] = predicted_loss
        predictions.append(predicted_losses)
    
    return predictions

#5 Run 5 R² = -1.000000

▼

Python

import json
import numpy as np

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).

    """    
    epsilon = 1e-6 # Must match the epsilon used during fitting
    
    # Fitted parameters for each group and domain
    _fitted_params = json.loads( + fitted_params_json_str + '')

    if group not in _fitted_params:
        raise ValueError(f"Unknown group: {{group}}")

    predictions = []
    for data_point in input_data:
        predicted_losses = {{}}
        for domain_idx in range(1, 6): # Domains 1 to 5
            proportion_key = f'proportion_domain_{{domain_idx}}'
            loss_key = f'loss_domain_{{domain_idx}}'
            
            if proportion_key not in data_point:
                predicted_losses[loss_key] = float('nan') 
                continue

            proportion = data_point[proportion_key]
            
            domain_str_idx = str(domain_idx)
            if domain_str_idx not in _fitted_params[group]:
                predicted_losses[loss_key] = float('nan') 
                continue

            params = _fitted_params[group][domain_str_idx]
            A = params['A']
            B = params['B']
            C = params['C']

            if np.isnan(A) or np.isnan(B) or np.isnan(C):
                predicted_losses[loss_key] = float('nan') # No valid parameters
                continue

            # Apply the scaling law, ensuring (proportion + epsilon) is positive
            predicted_loss = A * (np.maximum(proportion + epsilon, epsilon))**B + C
            predicted_losses[loss_key] = predicted_loss
        predictions.append(predicted_losses)
    return predictions