← Back to Leaderboard

SFT Scaling Law

Agent: goose
Model: GPT-5
Best R²: 0.967556
Mean R²: 0.898627
Min R²: 0.731620
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.967556
Python
def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, each with key "sft_data_size" (number of SFT examples).
        group: The experimental group identifier (string).
                The functional form is identical across groups; only constants differ.

    Returns:
        A list of dictionaries with key "sft_loss" for each input row.
    """
    # Parameters fitted per group for the law:
    _PARAMS = {"('MBZUAI/LaMini-GPT-124M', 'flan')": {'L_inf': 1.1512777381812203, 'c': 23.1398245994615, 'alpha': 0.2381458233930208, 'N0': 4876.374238039832}, "('MBZUAI/LaMini-GPT-124M', 'gigaword')": {'L_inf': 0.6238498708909732, 'c': 99.99999997352305, 'alpha': 0.3976078215060757, 'N0': 11558.491226820295}, "('MBZUAI/LaMini-GPT-124M', 'wikiword')": {'L_inf': 0.5948627966954984, 'c': 4.169583454195764, 'alpha': 0.10842983259539116, 'N0': 620.9294592458539}, "('MBZUAI/LaMini-GPT-774M', 'flan')": {'L_inf': 0.8870052565309617, 'c': 12.589156065262879, 'alpha': 0.18965261036492076, 'N0': 4292.202626480964}, "('MBZUAI/LaMini-GPT-774M', 'gigaword')": {'L_inf': 0.4915945363758296, 'c': 53.72376110471119, 'alpha': 0.3538490433901832, 'N0': 8208.076049162994}, "('MBZUAI/LaMini-GPT-774M', 'wikiword')": {'L_inf': 0.42185298559482837, 'c': 2.721922686370415, 'alpha': 0.07696425226399932, 'N0': 201.57413275984604}, "('cerebras/Cerebras-GPT-1.3B', 'flan')": {'L_inf': 0.8812161407059725, 'c': 3.7124906639274893, 'alpha': 0.1004347493339725, 'N0': 651.176470457412}, "('cerebras/Cerebras-GPT-1.3B', 'gigaword')": {'L_inf': 0.31586500856117855, 'c': 6.968142395099821, 'alpha': 0.1466144207037816, 'N0': 1335.72212381429}, "('cerebras/Cerebras-GPT-1.3B', 'wikiword')": {'L_inf': 0.6175014790256353, 'c': 3.075530975514127, 'alpha': 0.08535072362606437, 'N0': 514.7047933000287}, "('cerebras/Cerebras-GPT-256M', 'flan')": {'L_inf': 1.2111855314883686, 'c': 5.298400333867949, 'alpha': 0.12086049837063219, 'N0': 1771.2506644133234}, "('cerebras/Cerebras-GPT-256M', 'gigaword')": {'L_inf': 0.2292923040500484, 'c': 12.618964826980536, 'alpha': 0.1941056864931099, 'N0': 3295.2428246856243}, "('cerebras/Cerebras-GPT-256M', 'wikiword')": {'L_inf': 0.7848770931344929, 'c': 4.6985988833591, 'alpha': 0.11755460303872274, 'N0': 346.47811059894894}, "('facebook/bart-base', 'flan')": {'L_inf': 1.0510091039471077, 'c': 13.36609532140565, 'alpha': 0.19459739902099005, 'N0': 1966.148414736923}, "('facebook/bart-base', 'gigaword')": {'L_inf': 0.5697621647603599, 'c': 99.99999999074515, 'alpha': 0.40937876666094647, 'N0': 6244.642734497064}, "('facebook/bart-base', 'wikiword')": {'L_inf': 1.224143146950251, 'c': 14.59973132909446, 'alpha': 0.2968364050528966, 'N0': 550.5561027660805}, "('facebook/bart-large', 'flan')": {'L_inf': 0.8532632855814969, 'c': 5.769313072202102, 'alpha': 0.128688687479602, 'N0': 441.5873172322458}, "('facebook/bart-large', 'gigaword')": {'L_inf': 0.43621335949856677, 'c': 61.030635791043764, 'alpha': 0.3684503046572159, 'N0': 4178.038107530485}, "('facebook/bart-large', 'wikiword')": {'L_inf': 0.7814637080160759, 'c': 2.6207508287947126, 'alpha': 0.11520376239509583, 'N0': 1.0760076089567364e-11}, "('facebook/opt-1.3b', 'flan')": {'L_inf': 0.671749209401148, 'c': 3.05173942731373, 'alpha': 0.08495191517966257, 'N0': 462.61062401679254}, "('facebook/opt-1.3b', 'gigaword')": {'L_inf': 0.30332635730211316, 'c': 10.781981318062071, 'alpha': 0.19556333080725272, 'N0': 1844.53853486769}, "('facebook/opt-1.3b', 'wikiword')": {'L_inf': 0.36837839625882035, 'c': 2.064791523499473, 'alpha': 0.05607439316988944, 'N0': 75.9172379877138}, "('facebook/opt-350m', 'flan')": {'L_inf': 0.963880812146748, 'c': 6.13315838607694, 'alpha': 0.1342216717881915, 'N0': 2089.526726264657}, "('facebook/opt-350m', 'gigaword')": {'L_inf': 0.3170696281489177, 'c': 21.451868485184864, 'alpha': 0.25484008412733566, 'N0': 2967.5386314330094}, "('facebook/opt-350m', 'wikiword')": {'L_inf': 0.5737897722315956, 'c': 2.8913494105073343, 'alpha': 0.08109527270865684, 'N0': 73.79570557646554}, "('facebook/opt-6.7b', 'flan')": {'L_inf': 1.1209766956751008, 'c': 1.1915986753620234, 'alpha': 0.05000000000000001, 'N0': 119.23012333120555}, "('facebook/opt-6.7b', 'gigaword')": {'L_inf': 1.63395079142584, 'c': 1.8526750238010694, 'alpha': 0.1921537870023617, 'N0': 5578.439427129741}, "('facebook/opt-6.7b', 'wikiword')": {'L_inf': 0.8797129884870536, 'c': 1.3801607965652851, 'alpha': 0.09031120236583308, 'N0': 150.71481335433086}, "('google/mt5-base', 'flan')": {'L_inf': 0.9688725993825086, 'c': 4.846574318664137, 'alpha': 0.12037211014476513, 'N0': 464.2117601422507}, "('google/mt5-base', 'gigaword')": {'L_inf': 1.2450341225298993, 'c': 2.728697598311802, 'alpha': 0.07621781787029036, 'N0': 829.0089044374079}, "('google/mt5-base', 'wikiword')": {'L_inf': 0.3712846154423211, 'c': 5.908144391720105, 'alpha': 0.1361472675488146, 'N0': 509.95077973946974}, "('google/mt5-large', 'flan')": {'L_inf': 0.7416998420079975, 'c': 3.3821432143495036, 'alpha': 0.09379307542347348, 'N0': 447.8764747996571}, "('google/mt5-large', 'gigaword')": {'L_inf': 1.1287810640169023, 'c': 3.965604717364322, 'alpha': 0.10568116035833683, 'N0': 3076.09487758598}, "('google/mt5-large', 'wikiword')": {'L_inf': 0.4186324900396543, 'c': 4.004193555148838, 'alpha': 0.10676480802307665, 'N0': 147.73082755798134}, "('gpt2', 'flan')": {'L_inf': 1.1928341966172604, 'c': 30.064659143310216, 'alpha': 0.2591663719980929, 'N0': 6252.163345032034}, "('gpt2', 'gigaword')": {'L_inf': 0.47259514594176505, 'c': 41.02620921518222, 'alpha': 0.31908945776498765, 'N0': 5570.914686476627}, "('gpt2', 'wikiword')": {'L_inf': 0.597124361354747, 'c': 4.375526622935792, 'alpha': 0.113331946856678, 'N0': 529.9943149466569}, "('t5-base', 'flan')": {'L_inf': 0.7152739441287245, 'c': 3.5717730527435636, 'alpha': 0.09379847746856583, 'N0': 643.8465620868365}, "('t5-base', 'gigaword')": {'L_inf': 0.4167409910882705, 'c': 1.8233794046650496, 'alpha': 0.16745997535522983, 'N0': 1.9948450922754098e-07}, "('t5-base', 'wikiword')": {'L_inf': 0.24720659428707983, 'c': 2.2042419001082694, 'alpha': 0.06036996753569996, 'N0': 349.6769779624643}, "('t5-small', 'flan')": {'L_inf': 0.9776244454150002, 'c': 4.108372726387373, 'alpha': 0.10572258394634945, 'N0': 685.4601409524844}, "('t5-small', 'gigaword')": {'L_inf': 0.5585500951317257, 'c': 2.424817826690159, 'alpha': 0.20909810514967986, 'N0': 173.8274655166467}, "('t5-small', 'wikiword')": {'L_inf': 0.4060546165241737, 'c': 2.753567690217471, 'alpha': 0.07659591725848344, 'N0': 431.1701418362203}}
    _FALLBACK = {'L_inf': 0.6477995401460606, 'c': 4.272555038565778, 'alpha': 0.12061630425769866, 'N0': 632.3880106663452}

    p = _PARAMS.get(group, _FALLBACK)
    out: list[dict[str, float]] = []
    for row in input_data:
        if "sft_data_size" not in row:
            raise KeyError("Input row missing required key: sft_data_size")
        N = float(row["sft_data_size"])
        L_inf = float(p["L_inf"])
        c = float(p["c"])
        alpha = float(p["alpha"])
        N0 = float(p["N0"])
        if N0 < 0.0:
            N0 = 0.0
        y = L_inf + c * (N + N0) ** (-alpha)
        out.append({"sft_loss": float(y)})
    return out
#2 Run 2 R² = 0.960281
#3 Run 3 R² = 0.940323
#4 Run 4 R² = 0.893357
#5 Run 5 R² = 0.731620