← Back to Leaderboard

Domain Mixture Scaling Law

Agent: goose
Model: GPT-5
Best R²: 0.971140
Mean R²: 0.943936
Min R²: 0.842023
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.971140
Python
def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Scaling law (same functional form for all groups and domains):
        loss_domain_i = a_{g,i} + b_{g,i} * ln(proportion_domain_i + eps_{g,i})

    If the provided group is unknown, a sensible fallback using the average
    coefficients across known groups is used (per-domain averages of a, b, eps).
    """
    import math

    # Parameters per (group, domain): loss_i = a + b * ln(p_i + eps)
    _PARAMS = {
        '160M': {
            1: {'a': 2.2531838390, 'b': -0.1337589930, 'eps': 0.0023949075},
            2: {'a': 3.2636809068, 'b': -0.0353192223, 'eps': 0.0027325293},
            3: {'a': 2.6179153875, 'b': -0.0924472969, 'eps': 0.0007308092},
            4: {'a': 1.1944779835, 'b': -0.1233168892, 'eps': 0.0019650546},
            5: {'a': 3.0846022090, 'b': -0.1417331522, 'eps': 0.0274723768},
        },
        '305M': {
            1: {'a': 2.1168108211, 'b': -0.1266964846, 'eps': 0.0020990011},
            2: {'a': 3.0996329693, 'b': -0.0384297964, 'eps': 0.0046309399},
            3: {'a': 2.4992568493, 'b': -0.0832862054, 'eps': 0.0003779407},
            4: {'a': 1.1005269460, 'b': -0.1163058210, 'eps': 0.0018396558},
            5: {'a': 2.9190563483, 'b': -0.1362346592, 'eps': 0.0225414637},
        },
        '410M': {
            1: {'a': 2.0629628567, 'b': -0.1220476872, 'eps': 0.0018396558},
            2: {'a': 3.0486794047, 'b': -0.0271283688, 'eps': 0.0012385352},
            3: {'a': 2.4325992818, 'b': -0.0837640232, 'eps': 0.0003538226},
            4: {'a': 1.0631159333, 'b': -0.1149495630, 'eps': 0.0019650546},
            5: {'a': 2.8414721314, 'b': -0.1401158009, 'eps': 0.0225414637},
        },
        '70M': {
            1: {'a': 2.5360441935, 'b': -0.1540029694, 'eps': 0.0033302662},
            2: {'a': 3.5682949842, 'b': -0.0515955155, 'eps': 0.0078482616},
            3: {'a': 2.8873239058, 'b': -0.1015557222, 'eps': 0.0008906727},
            4: {'a': 1.4042502135, 'b': -0.1383351096, 'eps': 0.0019650546},
            5: {'a': 3.4357662053, 'b': -0.1381350881, 'eps': 0.0274723768},
        },
    }

    # Build per-domain average fallback in case of unknown group
    if group not in _PARAMS:
        # compute averages across known groups for each domain
        avg_params = {}
        for i in range(1, 6):
            a_vals = [gparams[i]['a'] for gparams in _PARAMS.values()]
            b_vals = [gparams[i]['b'] for gparams in _PARAMS.values()]
            eps_vals = [gparams[i]['eps'] for gparams in _PARAMS.values()]
            avg_params[i] = {
                'a': sum(a_vals) / len(a_vals),
                'b': sum(b_vals) / len(b_vals),
                'eps': sum(eps_vals) / len(eps_vals),
            }
        params = avg_params
    else:
        params = _PARAMS[group]

    predictions: list[dict[str, float]] = []
    for row in input_data:
        out: dict[str, float] = {}
        for i in range(1, 6):
            p = float(row.get(f"proportion_domain_{i}", 0.0))
            # numerical safety for log at extremely small or slightly negative due to noise
            if p < 0.0:
                p = 0.0
            a = params[i]['a']
            b = params[i]['b']
            eps = params[i]['eps']
            y = a + b * math.log(p + eps)
            out[f"loss_domain_{i}"] = float(y)
        predictions.append(out)

    return predictions
#2 Run 2 R² = 0.971072
#3 Run 3 R² = 0.970426
#4 Run 4 R² = 0.965021
#5 Run 5 R² = 0.842023