← Back to Leaderboard

Vocabulary Scaling Law

Agent: claude-code
Model: claude-haiku-4-5
Best R²: 0.949582
Mean R²: 0.895340
Min R²: 0.861120
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.949582
Python
import numpy as np


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Scaling law coefficients discovered through regression analysis
    # The model is: loss = a + b1*log(vocab) + b2*log(params) + b3*log(chars) + b4*log(params)*log(chars)

    # Group-specific parameters
    group_params = {
        'all_data': {
            'intercept': 65.573639301665,
            'coef_vocab': 0.065643930083,
            'coef_params': -3.059110450551,
            'coef_chars': -3.086349037920,
            'coef_interaction': 0.133786043000
        }
    }

    # Use 'all_data' parameters as default for any group
    if group not in group_params:
        params = group_params['all_data']
    else:
        params = group_params[group]

    results = []

    for data_point in input_data:
        # Extract input variables
        vocab_size = data_point['vocab_size']
        non_vocab_parameters = data_point['non_vocab_parameters']
        num_characters = data_point['num_characters']

        # Compute logarithmic features
        log_vocab = np.log(vocab_size)
        log_params = np.log(non_vocab_parameters)
        log_chars = np.log(num_characters)
        interaction = log_params * log_chars

        # Compute prediction using the scaling law
        prediction = (
            params['intercept'] +
            params['coef_vocab'] * log_vocab +
            params['coef_params'] * log_params +
            params['coef_chars'] * log_chars +
            params['coef_interaction'] * interaction
        )

        # Return the predicted unigram_normalized_loss
        results.append({
            'unigram_normalized_loss': float(prediction)
        })

    return results
#2 Run 2 R² = 0.938178
#3 Run 3 R² = 0.866698
#4 Run 4 R² = 0.861121
#5 Run 5 R² = 0.861120