SLD - SFT Scaling Law - claude-code + claude-haiku-4-5

All Runs (sorted by R²)

Best Run 1 R² = 0.948364

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law is: sft_loss = a - b * log(sft_data_size)

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    import math

    # Parameters for each group: {a, b}
    params = {
        "('MBZUAI/LaMini-GPT-124M', 'flan')": {'a': 5.9882655454224425, 'b': 0.28582842496758415},
        "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {'a': 4.737755543864644, 'b': 0.2633818404689799},
        "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {'a': 3.391898489600237, 'b': 0.1380796078259761},
        "('MBZUAI/LaMini-GPT-774M', 'flan')": {'a': 4.783598285711526, 'b': 0.21223122975241257},
        "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {'a': 4.204573308225127, 'b': 0.23807369864162195},
        "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {'a': 2.652870267476408, 'b': 0.09602793910113468},
        "('cerebras/Cerebras-GPT-1.3B', 'flan')": {'a': 3.4606313078949356, 'b': 0.12168554598363243},
        "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {'a': 3.820355957753342, 'b': 0.19060958263967723},
        "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {'a': 2.950335145432975, 'b': 0.10204513762986303},
        "('cerebras/Cerebras-GPT-256M', 'flan')": {'a': 4.2106324635240435, 'b': 0.14467116014683556},
        "('cerebras/Cerebras-GPT-256M', 'gigaword')": {'a': 4.218447739714603, 'b': 0.22564133332313316},
        "('cerebras/Cerebras-GPT-256M', 'wikiword')": {'a': 3.928449799282596, 'b': 0.16694369370158185},
        "('facebook/bart-base', 'flan')": {'a': 5.722811839840599, 'b': 0.2788501405317051},
        "('facebook/bart-base', 'gigaword')": {'a': 5.410755825061724, 'b': 0.3347248646956692},
        "('facebook/bart-base', 'wikiword')": {'a': 4.492633730872094, 'b': 0.241355286392432},
        "('facebook/bart-large', 'flan')": {'a': 4.453518961316551, 'b': 0.1981433193894148},
        "('facebook/bart-large', 'gigaword')": {'a': 5.242535976034646, 'b': 0.33392092315554134},
        "('facebook/bart-large', 'wikiword')": {'a': 2.7193492501969665, 'b': 0.1081314220510578},
        "('facebook/opt-1.3b', 'flan')": {'a': 3.003527124075447, 'b': 0.10226900929213374},
        "('facebook/opt-1.3b', 'gigaword')": {'a': 4.06738375106869, 'b': 0.2250385091589151},
        "('facebook/opt-1.3b', 'wikiword')": {'a': 2.227609752184879, 'b': 0.06738256750517932},
        "('facebook/opt-350m', 'flan')": {'a': 4.096427283011327, 'b': 0.15677711010750922},
        "('facebook/opt-350m', 'gigaword')": {'a': 4.7892172166877485, 'b': 0.28416051186028063},
        "('facebook/opt-350m', 'wikiword')": {'a': 2.969051299948199, 'b': 0.10931922575322839},
        "('facebook/opt-6.7b', 'flan')": {'a': 2.206063530252997, 'b': 0.03597445331653896},
        "('facebook/opt-6.7b', 'gigaword')": {'a': 2.1626361802570546, 'b': 0.02785806595901192},
        "('facebook/opt-6.7b', 'wikiword')": {'a': 1.9623193503403864, 'b': 0.05153332239800601},
        "('google/mt5-base', 'flan')": {'a': 4.09880753315647, 'b': 0.16562129414738644},
        "('google/mt5-base', 'gigaword')": {'a': 3.3355539188668133, 'b': 0.08234869144321573},
        "('google/mt5-base', 'wikiword')": {'a': 3.8616481733826316, 'b': 0.19493726215183346},
        "('google/mt5-large', 'flan')": {'a': 3.228231895001962, 'b': 0.11550187850708908},
        "('google/mt5-large', 'gigaword')": {'a': 3.4146676288749753, 'b': 0.09533853606014771},
        "('google/mt5-large', 'wikiword')": {'a': 3.3530996656383256, 'b': 0.15291379237134867},
        "('gpt2', 'flan')": {'a': 6.1798663808272165, 'b': 0.29617705498721825},
        "('gpt2', 'gigaword')": {'a': 4.817199540073447, 'b': 0.2808748579274315},
        "('gpt2', 'wikiword')": {'a': 3.4933775690283317, 'b': 0.14749971597990033},
        "('t5-base', 'flan')": {'a': 3.2855166640801268, 'b': 0.1171722866230908},
        "('t5-base', 'gigaword')": {'a': 1.4802511540185386, 'b': 0.06962929672117146},
        "('t5-base', 'wikiword')": {'a': 2.132300509025821, 'b': 0.06785004659432728},
        "('t5-small', 'flan')": {'a': 3.752075117502899, 'b': 0.1344429088427537},
        "('t5-small', 'gigaword')": {'a': 1.6089334542349931, 'b': 0.07229854242895724},
        "('t5-small', 'wikiword')": {'a': 2.5944002606124483, 'b': 0.09053553066033125},
    }

    if group not in params:
        raise ValueError(f'Unknown group: {group}')

    a, b = params[group]['a'], params[group]['b']

    results = []
    for item in input_data:
        sft_data_size = item['sft_data_size']
        predicted_loss = a - b * math.log(sft_data_size)
        results.append({'sft_loss': predicted_loss})

    return results

#2 Run 2 R² = 0.948364

▼

Python

import numpy as np

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    The scaling law for SFT loss follows a log-linear relationship:
    sft_loss = a + b * log(sft_data_size)

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Group-specific parameters fitted from training data
    # Each group has parameters (a, b) for the formula: loss = a + b * log(size)
    group_params = {
        "('MBZUAI/LaMini-GPT-124M', 'flan')": {"a": 5.988266, "b": -0.285828},
        "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {"a": 4.737756, "b": -0.263382},
        "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {"a": 3.391898, "b": -0.138080},
        "('MBZUAI/LaMini-GPT-774M', 'flan')": {"a": 4.783598, "b": -0.212231},
        "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {"a": 4.204573, "b": -0.238074},
        "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {"a": 2.652870, "b": -0.096028},
        "('cerebras/Cerebras-GPT-1.3B', 'flan')": {"a": 3.460631, "b": -0.121686},
        "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {"a": 3.820356, "b": -0.190610},
        "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {"a": 2.950335, "b": -0.102045},
        "('cerebras/Cerebras-GPT-256M', 'flan')": {"a": 4.210632, "b": -0.144671},
        "('cerebras/Cerebras-GPT-256M', 'gigaword')": {"a": 4.218448, "b": -0.225641},
        "('cerebras/Cerebras-GPT-256M', 'wikiword')": {"a": 3.928450, "b": -0.166944},
        "('facebook/bart-base', 'flan')": {"a": 5.722812, "b": -0.278850},
        "('facebook/bart-base', 'gigaword')": {"a": 5.410756, "b": -0.334725},
        "('facebook/bart-base', 'wikiword')": {"a": 4.492634, "b": -0.241355},
        "('facebook/bart-large', 'flan')": {"a": 4.453519, "b": -0.198143},
        "('facebook/bart-large', 'gigaword')": {"a": 5.242536, "b": -0.333921},
        "('facebook/bart-large', 'wikiword')": {"a": 2.719349, "b": -0.108131},
        "('facebook/opt-1.3b', 'flan')": {"a": 3.003527, "b": -0.102269},
        "('facebook/opt-1.3b', 'gigaword')": {"a": 4.067384, "b": -0.225039},
        "('facebook/opt-1.3b', 'wikiword')": {"a": 2.227610, "b": -0.067383},
        "('facebook/opt-350m', 'flan')": {"a": 4.096427, "b": -0.156777},
        "('facebook/opt-350m', 'gigaword')": {"a": 4.789217, "b": -0.284161},
        "('facebook/opt-350m', 'wikiword')": {"a": 2.969051, "b": -0.109319},
        "('facebook/opt-6.7b', 'flan')": {"a": 2.206064, "b": -0.035974},
        "('facebook/opt-6.7b', 'gigaword')": {"a": 2.162636, "b": -0.027858},
        "('facebook/opt-6.7b', 'wikiword')": {"a": 1.962319, "b": -0.051533},
        "('google/mt5-base', 'flan')": {"a": 4.098808, "b": -0.165621},
        "('google/mt5-base', 'gigaword')": {"a": 3.335554, "b": -0.082349},
        "('google/mt5-base', 'wikiword')": {"a": 3.861648, "b": -0.194937},
        "('google/mt5-large', 'flan')": {"a": 3.228232, "b": -0.115502},
        "('google/mt5-large', 'gigaword')": {"a": 3.414668, "b": -0.095339},
        "('google/mt5-large', 'wikiword')": {"a": 3.353100, "b": -0.152914},
        "('gpt2', 'flan')": {"a": 6.179866, "b": -0.296177},
        "('gpt2', 'gigaword')": {"a": 4.817200, "b": -0.280875},
        "('gpt2', 'wikiword')": {"a": 3.493378, "b": -0.147500},
        "('t5-base', 'flan')": {"a": 3.285517, "b": -0.117172},
        "('t5-base', 'gigaword')": {"a": 1.480251, "b": -0.069629},
        "('t5-base', 'wikiword')": {"a": 2.132301, "b": -0.067850},
        "('t5-small', 'flan')": {"a": 3.752075, "b": -0.134443},
        "('t5-small', 'gigaword')": {"a": 1.608933, "b": -0.072299},
        "('t5-small', 'wikiword')": {"a": 2.594400, "b": -0.090536},
    }

    # Get parameters for this group
    if group not in group_params:
        raise ValueError(f"Unknown group: {group}")

    params = group_params[group]
    a = params["a"]
    b = params["b"]

    # Make predictions
    predictions = []
    for data_point in input_data:
        size = data_point["sft_data_size"]
        # Apply the log-linear formula: loss = a + b * log(size)
        predicted_loss = a + b * np.log(size)
        predictions.append({"sft_loss": predicted_loss})

    return predictions

#3 Run 3 R² = 0.893000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    
    # Fitted parameters for each group
    # Model: sft_loss = a * sft_data_size^(-exponent)
    group_params = {
        "('MBZUAI/LaMini-GPT-124M', 'flan')": {'a': 7.465597659576905, 'exponent': 0.08938336391209896},
        "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {'a': 6.969626540790517, 'exponent': 0.12488797569428042},
        "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {'a': 3.8453873317979776, 'exponent': 0.06600249911783039},
        "('MBZUAI/LaMini-GPT-774M', 'flan')": {'a': 5.713040837756609, 'exponent': 0.07829225824344388},
        "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {'a': 6.293121529736756, 'exponent': 0.12918481189914824},
        "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {'a': 2.8960123299335456, 'exponent': 0.05447544270876587},
        "('cerebras/Cerebras-GPT-1.3B', 'flan')": {'a': 3.763368646463403, 'exponent': 0.05245645350446021},
        "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {'a': 4.8571354747287865, 'exponent': 0.09575030044686328},
        "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {'a': 3.1982974522931946, 'exponent': 0.05122695523416853},
        "('cerebras/Cerebras-GPT-256M', 'flan')": {'a': 4.582026537858244, 'exponent': 0.05133628273093655},
        "('cerebras/Cerebras-GPT-256M', 'gigaword')": {'a': 5.787318731177163, 'exponent': 0.11196737789112267},
        "('cerebras/Cerebras-GPT-256M', 'wikiword')": {'a': 4.508753213335996, 'exponent': 0.0704142021738492},
        "('facebook/bart-base', 'flan')": {'a': 7.161143632325512, 'exponent': 0.09164738334291277},
        "('facebook/bart-base', 'gigaword')": {'a': 9.296091035384052, 'exponent': 0.1581352715189432},
        "('facebook/bart-base', 'wikiword')": {'a': 5.82331151864561, 'exponent': 0.10583613940732714},
        "('facebook/bart-large', 'flan')": {'a': 5.239565986675214, 'exponent': 0.07673442669563073},
        "('facebook/bart-large', 'gigaword')": {'a': 9.506911791025203, 'exponent': 0.16937059578718577},
        "('facebook/bart-large', 'wikiword')": {'a': 3.022916994149884, 'exponent': 0.06221765532278332},
        "('facebook/opt-1.3b', 'flan')": {'a': 3.242895597483687, 'exponent': 0.049961389646430265},
        "('facebook/opt-1.3b', 'gigaword')": {'a': 5.693457761695671, 'exponent': 0.11822788318303022},
        "('facebook/opt-1.3b', 'wikiword')": {'a': 2.352359108086735, 'exponent': 0.04191838265818277},
        "('facebook/opt-350m', 'flan')": {'a': 4.585856367154473, 'exponent': 0.06062077349418788},
        "('facebook/opt-350m', 'gigaword')": {'a': 7.476891875498453, 'exponent': 0.1403389747361835},
        "('facebook/opt-350m', 'wikiword')": {'a': 3.250071982062783, 'exponent': 0.05575365697201477},
        "('facebook/opt-6.7b', 'flan')": {'a': 2.2344284064674027, 'exponent': 0.019171706023120263},
        "('facebook/opt-6.7b', 'gigaword')": {'a': 2.180823669815048, 'exponent': 0.014692781292003678},
        "('facebook/opt-6.7b', 'wikiword')": {'a': 2.0379653707032412, 'exponent': 0.034542865866457675},
        "('google/mt5-base', 'flan')": {'a': 4.621169816481478, 'exponent': 0.06486990722366648},
        "('google/mt5-base', 'gigaword')": {'a': 3.4542367430363625, 'exponent': 0.032132757710130744},
        "('google/mt5-base', 'wikiword')": {'a': 4.880269855685557, 'exponent': 0.09617770187230981},
        "('google/mt5-large', 'flan')": {'a': 3.5193429654174015, 'exponent': 0.05367728546091697},
        "('google/mt5-large', 'gigaword')": {'a': 3.5889984641635846, 'exponent': 0.038154542411887996},
        "('google/mt5-large', 'wikiword')": {'a': 3.9537507864011348, 'exponent': 0.07896963709680072},
        "('gpt2', 'flan')": {'a': 7.743248668603319, 'exponent': 0.09034898764189149},
        "('gpt2', 'gigaword')": {'a': 7.448123349328367, 'exponent': 0.1368351251564653},
        "('gpt2', 'wikiword')": {'a': 4.005381596620439, 'exponent': 0.06983526356123387},
        "('t5-base', 'flan')": {'a': 3.5927162123671015, 'exponent': 0.05385301204930485},
        "('t5-base', 'gigaword')": {'a': 1.7460064580179184, 'exponent': 0.08169520675626497},
        "('t5-base', 'wikiword')": {'a': 2.2734524247637107, 'exponent': 0.04525490715783883},
        "('t5-small', 'flan')": {'a': 4.10316214565362, 'exponent': 0.054114550232136904},
        "('t5-small', 'gigaword')": {'a': 1.8635831534568412, 'exponent': 0.07576270233096472},
        "('t5-small', 'wikiword')": {'a': 2.8159260594072437, 'exponent': 0.05183604484401076},
    }
    
    # Get parameters for the specified group
    if group not in group_params:
        raise ValueError(f"Unknown group: {group}")
    
    params = group_params[group]
    a = params['a']
    exponent = params['exponent']
    
    # Make predictions for each input data point
    predictions = []
    for data_point in input_data:
        n = data_point['sft_data_size']
        # Apply the power law: loss = a * n^(-exponent)
        predicted_loss = a * (n ** (-exponent))
        predictions.append({'sft_loss': predicted_loss})
    
    return predictions

#4 Run 4 R² = 0.893000

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts sft_loss based on sft_data_size according to a discovered scaling law.

    The scaling law is a power law of the form:
        sft_loss = a * sft_data_size^(-b)
    
    where a and b are group-specific parameters determined through fitting.

    Args:
        input_data: A list of dictionaries, where each dictionary contains
                   'sft_data_size' as a key with its numerical value.
        group: The name of the experimental group for which to make predictions.
               Format: "('model_name', 'dataset_name')"

    Returns:
        A list of dictionaries with 'sft_loss' predictions corresponding to
        each input data point.
    """
    
    # Group-specific parameters fitted from the data
    parameters = {
        "('MBZUAI/LaMini-GPT-124M', 'flan')": {"a": 7.465597659576905, "b": 0.08938336391209892},
        "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {"a": 6.96962654079052, "b": 0.12488797569428042},
        "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {"a": 3.8453873317979776, "b": 0.06600249911783033},
        "('MBZUAI/LaMini-GPT-774M', 'flan')": {"a": 5.713040837756612, "b": 0.07829225824344388},
        "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {"a": 6.293121529736758, "b": 0.12918481189914818},
        "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {"a": 2.8960123299335465, "b": 0.05447544270876588},
        "('cerebras/Cerebras-GPT-1.3B', 'flan')": {"a": 3.763368646463405, "b": 0.05245645350446024},
        "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {"a": 4.857135474728788, "b": 0.09575030044686328},
        "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {"a": 3.1982974522931973, "b": 0.05122695523416855},
        "('cerebras/Cerebras-GPT-256M', 'flan')": {"a": 4.582026537858246, "b": 0.05133628273093654},
        "('cerebras/Cerebras-GPT-256M', 'gigaword')": {"a": 5.787318731177165, "b": 0.11196737789112265},
        "('cerebras/Cerebras-GPT-256M', 'wikiword')": {"a": 4.508753213335998, "b": 0.07041420217384921},
        "('facebook/bart-base', 'flan')": {"a": 7.161143632325516, "b": 0.09164738334291274},
        "('facebook/bart-base', 'gigaword')": {"a": 9.296091035384052, "b": 0.15813527151894313},
        "('facebook/bart-base', 'wikiword')": {"a": 5.823311518645614, "b": 0.10583613940732714},
        "('facebook/bart-large', 'flan')": {"a": 5.239565986675217, "b": 0.07673442669563074},
        "('facebook/bart-large', 'gigaword')": {"a": 9.506911791025212, "b": 0.1693705957871858},
        "('facebook/bart-large', 'wikiword')": {"a": 3.022916994149884, "b": 0.0622176553227833},
        "('facebook/opt-1.3b', 'flan')": {"a": 3.242895597483687, "b": 0.04996138964643025},
        "('facebook/opt-1.3b', 'gigaword')": {"a": 5.693457761695674, "b": 0.1182278831830302},
        "('facebook/opt-1.3b', 'wikiword')": {"a": 2.3523591080867354, "b": 0.04191838265818277},
        "('facebook/opt-350m', 'flan')": {"a": 4.585856367154477, "b": 0.0606207734941879},
        "('facebook/opt-350m', 'gigaword')": {"a": 7.476891875498456, "b": 0.14033897473618348},
        "('facebook/opt-350m', 'wikiword')": {"a": 3.250071982062783, "b": 0.05575365697201476},
        "('facebook/opt-6.7b', 'flan')": {"a": 2.2344284064674036, "b": 0.01917170602312028},
        "('facebook/opt-6.7b', 'gigaword')": {"a": 2.180823669815048, "b": 0.014692781292003673},
        "('facebook/opt-6.7b', 'wikiword')": {"a": 2.0379653707032412, "b": 0.03454286586645768},
        "('google/mt5-base', 'flan')": {"a": 4.621169816481478, "b": 0.06486990722366646},
        "('google/mt5-base', 'gigaword')": {"a": 3.4542367430363616, "b": 0.032132757710130716},
        "('google/mt5-base', 'wikiword')": {"a": 4.880269855685559, "b": 0.09617770187230978},
        "('google/mt5-large', 'flan')": {"a": 3.5193429654174024, "b": 0.05367728546091695},
        "('google/mt5-large', 'gigaword')": {"a": 3.588998464163584, "b": 0.03815454241188795},
        "('google/mt5-large', 'wikiword')": {"a": 3.9537507864011348, "b": 0.07896963709680066},
        "('gpt2', 'flan')": {"a": 7.743248668603322, "b": 0.09034898764189146},
        "('gpt2', 'gigaword')": {"a": 7.448123349328367, "b": 0.13683512515646526},
        "('gpt2', 'wikiword')": {"a": 4.005381596620442, "b": 0.06983526356123386},
        "('t5-base', 'flan')": {"a": 3.592716212367101, "b": 0.053853012049304816},
        "('t5-base', 'gigaword')": {"a": 1.7460064580179189, "b": 0.08169520675626496},
        "('t5-base', 'wikiword')": {"a": 2.2734524247637116, "b": 0.04525490715783882},
        "('t5-small', 'flan')": {"a": 4.10316214565362, "b": 0.0541145502321369},
        "('t5-small', 'gigaword')": {"a": 1.8635831534568412, "b": 0.07576270233096469},
        "('t5-small', 'wikiword')": {"a": 2.8159260594072437, "b": 0.05183604484401074},
    }
    
    # Get parameters for the requested group
    if group not in parameters:
        raise ValueError(f"Unknown group: {group}")
    
    params = parameters[group]
    a = params['a']
    b = params['b']
    
    # Generate predictions
    results = []
    for data_point in input_data:
        sft_data_size = data_point['sft_data_size']
        sft_loss = a * (sft_data_size ** (-b))
        results.append({'sft_loss': sft_loss})
    
    return results

#5 Run 5 R² = 0.786727

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """

    # Fitted coefficients for all groups
    # Formula: sft_loss = a * (sft_data_size)^(-b)
    coefficients = {
        "('MBZUAI/LaMini-GPT-124M', 'flan')": {'a': 6.973891185245945, 'b': 0.08134163752541239},
        "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {'a': 5.923751048149881, 'b': 0.10520893093179817},
        "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {'a': 3.7815964255046533, 'b': 0.06407346186997772},
        "('MBZUAI/LaMini-GPT-774M', 'flan')": {'a': 5.416193625275251, 'b': 0.07203586819166597},
        "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {'a': 5.350687076892025, 'b': 0.10952445340340916},
        "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {'a': 2.8815070865108526, 'b': 0.05390319968344039},
        "('cerebras/Cerebras-GPT-1.3B', 'flan')": {'a': 3.7266869381996803, 'b': 0.05133583227717968},
        "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {'a': 4.601803968873482, 'b': 0.08937900629316706},
        "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {'a': 3.1670520597922627, 'b': 0.050104996343903396},
        "('cerebras/Cerebras-GPT-256M', 'flan')": {'a': 4.497775931862506, 'b': 0.049196929046929636},
        "('cerebras/Cerebras-GPT-256M', 'gigaword')": {'a': 5.237427891913482, 'b': 0.10004141372200408},
        "('cerebras/Cerebras-GPT-256M', 'wikiword')": {'a': 4.456019889827087, 'b': 0.069057369913849},
        "('facebook/bart-base', 'flan')": {'a': 6.796595180988769, 'b': 0.08550178029769556},
        "('facebook/bart-base', 'gigaword')": {'a': 7.594475800843526, 'b': 0.1333101297857832},
        "('facebook/bart-base', 'wikiword')": {'a': 5.887327930373316, 'b': 0.10705314537852908},
        "('facebook/bart-large', 'flan')": {'a': 5.121315181934767, 'b': 0.07407767825176088},
        "('facebook/bart-large', 'gigaword')": {'a': 7.683916870299283, 'b': 0.14310698030068143},
        "('facebook/bart-large', 'wikiword')": {'a': 3.045921040435319, 'b': 0.06308000007269023},
        "('facebook/opt-1.3b', 'flan')": {'a': 3.2171038872961275, 'b': 0.04905088635033772},
        "('facebook/opt-1.3b', 'gigaword')": {'a': 5.229994837261266, 'b': 0.10805319932857703},
        "('facebook/opt-1.3b', 'wikiword')": {'a': 2.3491294948341816, 'b': 0.04176293502699097},
        "('facebook/opt-350m', 'flan')": {'a': 4.468173816883176, 'b': 0.05761164449175344},
        "('facebook/opt-350m', 'gigaword')": {'a': 6.499202939301533, 'b': 0.12331852088030368},
        "('facebook/opt-350m', 'wikiword')": {'a': 3.2418385316050526, 'b': 0.055463768281788755},
        "('facebook/opt-6.7b', 'flan')": {'a': 2.233968276937049, 'b': 0.019147533945821453},
        "('facebook/opt-6.7b', 'gigaword')": {'a': 2.177080305099549, 'b': 0.014498768191260266},
        "('facebook/opt-6.7b', 'wikiword')": {'a': 2.038842915408176, 'b': 0.03459143197387006},
        "('google/mt5-base', 'flan')": {'a': 4.5711485738945545, 'b': 0.06361661885066937},
        "('google/mt5-base', 'gigaword')": {'a': 3.4396038919160255, 'b': 0.031650783627889346},
        "('google/mt5-base', 'wikiword')": {'a': 4.734452992155276, 'b': 0.09261318355652703},
        "('google/mt5-large', 'flan')": {'a': 3.4927988186275686, 'b': 0.05281193029885883},
        "('google/mt5-large', 'gigaword')": {'a': 3.5492576879825667, 'b': 0.036880894639070626},
        "('google/mt5-large', 'wikiword')": {'a': 3.9262966949999335, 'b': 0.0781634769888062},
        "('gpt2', 'flan')": {'a': 7.198400851819034, 'b': 0.08171952555395984},
        "('gpt2', 'gigaword')": {'a': 6.339036076095076, 'b': 0.11725578005283898},
        "('gpt2', 'wikiword')": {'a': 3.9462674799699607, 'b': 0.06811717343254313},
        "('t5-base', 'flan')": {'a': 3.5430519689272844, 'b': 0.05225528699896457},
        "('t5-base', 'gigaword')": {'a': 1.7860089226706833, 'b': 0.08428832449189322},
        "('t5-base', 'wikiword')": {'a': 2.2598720088916606, 'b': 0.04457363226916314},
        "('t5-small', 'flan')": {'a': 4.052492829433879, 'b': 0.05269060089293886},
        "('t5-small', 'gigaword')": {'a': 1.8906573816169408, 'b': 0.07740641886387206},
        "('t5-small', 'wikiword')": {'a': 2.7905741253093854, 'b': 0.05080013888197297},
    }

    # Get coefficients for the specified group
    if group not in coefficients:
        raise ValueError(f"Unknown group: {group}. Available groups are: {list(coefficients.keys())}")

    a = coefficients[group]['a']
    b = coefficients[group]['b']

    # Apply the power law formula to each input data point
    result = []
    for data_point in input_data:
        sft_data_size = data_point['sft_data_size']
        predicted_loss = a * (sft_data_size ** (-b))
        result.append({'sft_loss': predicted_loss})

    return result