SLD - Domain Mixture Scaling Law

All Runs (sorted by R²)

Best Run 1 R² = 0.989423

▼

Python

from __future__ import annotations

import math
from typing import List, Dict

# Fixed feature ordering
PROP_COLS = [
    "proportion_domain_1",
    "proportion_domain_2",
    "proportion_domain_3",
    "proportion_domain_4",
    "proportion_domain_5",
]
LOSS_COLS = [
    "loss_domain_1",
    "loss_domain_2",
    "loss_domain_3",
    "loss_domain_4",
    "loss_domain_5",
]

EPS = 1e-6

# Coefficients learned per group for the law:
# loss_i = intercept[i] + sum_j coef_linear[i][j] * p_j + sum_j coef_log[i][j] * log(p_j + EPS)
COEFFS: Dict[str, Dict[str, list]] = {
    "160M": {
        "intercept": [
            2.469311683337708,
            3.3141620411008277,
            2.5975875154705848,
            1.3440867180535057,
            3.2488739962835567,
        ],
        "coef_linear": [
            [-0.39242870031019833, 0.1449840040105368, 0.20870621607378334, 0.012988956774962533, 0.02574952345091407],
            [0.2073851738757125, -0.5034944849958123, 0.08099673956846185, 0.2119097462425706, 0.0032028253090607807],
            [0.4222507474605135, 0.33048349239799873, -1.3184032511886987, 0.3397661471062194, 0.22590286422400102],
            [0.1075280031010361, 0.3278752202366596, 0.018326424467131473, -0.5403846909284411, 0.08665504312361116],
            [0.1224578633506513, -0.06992992306569604, 0.0648541522733341, 0.08654508830086936, -0.20392718085915945],
        ],
        "coef_log": [
            [-0.039451752022555374, -0.0003854857497984469, -2.3239743517545694e-05, 9.268231255609287e-06, -0.0006293642768779598],
            [-0.0015843455126219829, -0.00597505925571199, -0.00010878237745062993, -0.0007202157067082326, -0.0012285972839189082],
            [-0.0009734332588850447, -0.001936822498506686, -0.027443305577813045, -0.00024645647285300213, -0.00019926772803499236],
            [-0.0006024744943890134, -0.002147785787884586, 0.001399812773972361, -0.036472059131277504, 0.00012750772191223904],
            [-0.001567815576140436, 0.0013055621917748808, 0.0002487312848513498, -0.0008614874408401778, -0.019870896443806487],
        ],
    },
    "305M": {
        "intercept": [
            2.3392247012746834,
            3.1651345666056483,
            2.471987105632863,
            1.2404678308980266,
            3.0887017193916093,
        ],
        "coef_linear": [
            [-0.3945995646360234, 0.04212797569256443, 0.3597852823915539, 0.004507385349434609, -0.011821078797535627],
            [0.18765244585849242, -0.5607080638027755, 0.16385032928665508, 0.22772936777546302, -0.018524079117843765],
            [0.36498559947643294, 0.36326950855260254, -1.247281529045098, 0.3544474379638183, 0.16457898305227767],
            [0.11479489142933053, 0.2241274675743544, 0.07534052854957383, -0.4984903999878992, 0.08422751243463553],
            [0.1034302676552572, -0.1442429588936119, 0.1542111292796102, 0.10260754874495605, -0.2160059867862149],
        ],
        "coef_log": [
            [-0.0389843240976756, 0.0003898475662999871, -0.0012326552175473988, -0.0008170951320506675, -0.0006305864869774297],
            [-0.0018382099319297328, -0.004966654576883016, -0.0008004862412949112, -0.0016726743862113481, -0.0014239105552697226],
            [-0.0013605116194238868, -0.0029875971857020777, -0.029138080972677064, -0.0016317402099057068, 0.001163162472447215],
            [-0.0011594434613557832, -0.0010890215347730992, 0.0008814829783619934, -0.035207303872518685, 0.00014797726343401387],
            [-0.0015994149367300917, 0.002183961698325075, -0.0005510279268070304, -0.00175006550618083, -0.020723679414693993],
        ],
    },
    "410M": {
        "intercept": [
            2.2845576475924543,
            3.10221083581893,
            2.4040537489237623,
            1.2320388989073703,
            3.0194029194493215,
        ],
        "coef_linear": [
            [-0.40161868178180443, 0.04851096556048266, 0.37552617435827934, -0.007771674366947659, -0.014646783770016363],
            [0.16564665878501697, -0.5418667012877614, 0.19196166965559713, 0.21461472108487065, -0.030356348237732297],
            [0.3827566078799856, 0.34563333912754424, -1.207292188578679, 0.2962894338651403, 0.18261280770604105],
            [0.054055822096378214, 0.18257490953749397, 0.25515869822947196, -0.5390509589695227, 0.04726152910616942],
            [0.08444528786706501, -0.1235789613695045, 0.1680640099151793, 0.09513264245956578, -0.22406297887230905],
        ],
        "coef_log": [
            [-0.03838578515244451, 0.0010474524802569906, -0.0020612475600514644, 0.0001902706294946067, -0.0012861227733191377],
            [-0.0012829345286925373, -0.004688819508647834, -0.0016346691987556602, -0.0009769593878491815, -0.002091652532429498],
            [-0.0012514981651361474, -0.0022513525226212174, -0.03034764820962916, -0.00021614146982995423, -0.00017869542804964955],
            [-0.0010235048622236945, -7.088093693356411e-05, -0.0007898197931760238, -0.033703719578066345, -0.0007674227907133403],
            [-0.001315884970387432, 0.0024028568720025913, -0.0013333912060313298, -0.0011109928776418308, -0.021811398074324508],
        ],
    },
    "70M": {
        "intercept": [
            2.7857859114105317,
            3.631804815517477,
            2.8681805224896912,
            1.5890762093073625,
            3.585150303901379,
        ],
        "coef_linear": [
            [-0.4416445868640977, 0.07076027431299382, 0.302121611856706, 0.021520016473356454, 0.04724268422103695],
            [0.21834311045644716, -0.553392474651338, 0.04818153280501451, 0.24033451148528265, 0.04653331990458732],
            [0.4651202418236946, 0.2657143234624336, -1.3670739924126776, 0.36683688818700816, 0.2694025389395758],
            [0.0850252828881378, 0.2990024146623459, 0.16822113994472984, -0.6239023579571726, 0.07165352046195206],
            [0.14123915472576296, -0.13497556494772603, 0.03364295958750099, 0.12770730941446795, -0.16761385878000656],
        ],
        "coef_log": [
            [-0.041246477328631105, 0.0006531144880363961, -0.0006596475145338669, -0.00019599814522888677, -0.0015631188541267603],
            [-0.0009803943328683558, -0.005672467237098692, -8.71136475631502e-05, -0.0009074144501494191, -0.0019413115294711764],
            [-0.0006290227608540234, -0.0005120063062147314, -0.02905249764872596, -0.0007835652353273532, -0.000662736071510837],
            [-0.0008408582373940847, -0.0019902435925866755, 0.00039188729846795716, -0.0409361614036341, -0.0005053380487605633],
            [-0.0009055762689076869, 0.0025986071507895507, 0.00015711172405491434, -0.0011903203768379186, -0.019717110434476673],
        ],
    },
}


def _predict_point(p: Dict[str, float], coeff: Dict[str, list]) -> Dict[str, float]:
    # Build feature vectors in fixed order
    P = [float(p.get(k, 0.0)) for k in PROP_COLS]
    logP = [math.log(x + EPS) for x in P]

    y = []
    for i in range(5):
        val = coeff["intercept"][i]
        # linear terms
        for j in range(5):
            val += coeff["coef_linear"][i][j] * P[j]
        # log terms
        for j in range(5):
            val += coeff["coef_log"][i][j] * logP[j]
        y.append(val)

    return {LOSS_COLS[i]: y[i] for i in range(5)}


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    if group not in COEFFS:
        # Fallback: use the average of available groups if unknown label is passed
        # This keeps functional form identical while remaining robust.
        # Precompute simple average coefficients on the fly.
        groups = list(COEFFS.values())
        avg = {
            "intercept": [sum(g["intercept"][i] for g in groups) / len(groups) for i in range(5)],
            "coef_linear": [
                [sum(g["coef_linear"][i][j] for g in groups) / len(groups) for j in range(5)]
                for i in range(5)
            ],
            "coef_log": [
                [sum(g["coef_log"][i][j] for g in groups) / len(groups) for j in range(5)]
                for i in range(5)
            ],
        }
        coeff = avg
    else:
        coeff = COEFFS[group]

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        outputs.append(_predict_point(row, coeff))
    return outputs

#2 Run 2 R² = 0.971145

▼

Python

from __future__ import annotations
from typing import List, Dict
import math

# Log-scaling law per group and domain:
# loss_domain_i = a + b * log(proportion_domain_i + eps)
# The functional form is identical across groups; only (a, b, eps) differ.

COEFFS: Dict[str, Dict[str, Dict[str, float]]] = {
    "160M": {
        "loss_domain_1": {"a": 2.2532615943868906, "b": -0.1337057136545849, "eps": 0.0023901343745956435},
        "loss_domain_2": {"a": 3.2631919363951556, "b": -0.03558508713341269, "eps": 0.0028180383737978713},
        "loss_domain_3": {"a": 2.617689579679002, "b": -0.09252884608244097, "eps": 0.000733714154677849},
        "loss_domain_4": {"a": 1.1948582730723922, "b": -0.12312317609847837, "eps": 0.00195132901468823},
        "loss_domain_5": {"a": 3.0846493236033896, "b": -0.14154552771058287, "eps": 0.027307052357303876},
    },
    "305M": {
        "loss_domain_1": {"a": 2.1166886286314748, "b": -0.12677944713435335, "eps": 0.002106041895388586},
        "loss_domain_2": {"a": 3.0993803822936004, "b": -0.0385714100210861, "eps": 0.00469352643305276},
        "loss_domain_3": {"a": 2.498051621939985, "b": -0.08371626119596315, "eps": 0.0003880182070929469},
        "loss_domain_4": {"a": 1.0998534285937238, "b": -0.11664800642521109, "eps": 0.001864178447017992},
        "loss_domain_5": {"a": 2.918795681535315, "b": -0.13713177186385297, "eps": 0.023253743472940992},
    },
    "410M": {
        "loss_domain_1": {"a": 2.0637659220766347, "b": -0.12150735957421108, "eps": 0.0017973751820540681},
        "loss_domain_2": {"a": 3.0488279748294658, "b": -0.027049899121057236, "eps": 0.0012212145195230296},
        "loss_domain_3": {"a": 2.431248081266068, "b": -0.08424573100842837, "eps": 0.00036447069913629604},
        "loss_domain_4": {"a": 1.0617583927188603, "b": -0.11564152762782738, "eps": 0.0020181562979846223},
        "loss_domain_5": {"a": 2.8417472866298, "b": -0.13918681081923803, "eps": 0.02183585057099095},
    },
    "70M": {
        "loss_domain_1": {"a": 2.536901150373465, "b": -0.1533997450030275, "eps": 0.0032688441418023727},
        "loss_domain_2": {"a": 3.568955616082981, "b": -0.05120827266924541, "eps": 0.007660059062331851},
        "loss_domain_3": {"a": 2.886179438825381, "b": -0.10197118388034185, "eps": 0.0009064844324661106},
        "loss_domain_4": {"a": 1.4039456278566775, "b": -0.13849029661959558, "eps": 0.0019748899726039115},
        "loss_domain_5": {"a": 3.435925433424547, "b": -0.1375046497033619, "eps": 0.02690444281470573},
    },
}

_PROP_COLS = [
    "proportion_domain_1",
    "proportion_domain_2",
    "proportion_domain_3",
    "proportion_domain_4",
    "proportion_domain_5",
]
_LOSS_COLS = [
    "loss_domain_1",
    "loss_domain_2",
    "loss_domain_3",
    "loss_domain_4",
    "loss_domain_5",
]


def _predict_for_group(row: Dict[str, float], group: str) -> Dict[str, float]:
    coeffs = COEFFS.get(group)
    if coeffs is None:
        raise ValueError(f"Unknown group '{group}'. Known groups: {sorted(COEFFS)}")
    out: Dict[str, float] = {}
    for i, loss_key in enumerate(_LOSS_COLS):
        prop_key = _PROP_COLS[i]
        p = float(row.get(prop_key, 0.0))
        a = coeffs[loss_key]["a"]
        b = coeffs[loss_key]["b"]
        eps = coeffs[loss_key]["eps"]
        # Guard against tiny negatives from numerical issues
        val = a + b * math.log(max(p + eps, 1e-12))
        out[loss_key] = float(val)
    return out


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    return [_predict_for_group(row, group) for row in input_data]

#3 Run 3 R² = 0.971000

▼

Python

from __future__ import annotations
from typing import Dict, List

# Discovered scaling law (same functional form across groups):
#   loss_domain_i = a_{g,i} + b_{g,i} * (proportion_domain_i + c_{g,i}) ** (-alpha_{g,i})
# Parameters (a, b, alpha, c) are fitted per group g and domain i.

_PARAMS: Dict[str, Dict[int, Dict[str, float]]] = {
    "160M": {
        1: {"a": -0.11147193199439992, "b": 2.3787691982958217, "alpha": 0.05, "c": 0.003125},
        2: {"a": 3.278500369142649,   "b": 0.01306711991811282, "alpha": 0.8034482758620689, "c": 0.034895833333333334},
        3: {"a": 0.8506733951924391,  "b": 1.7622754024333271, "alpha": 0.05, "c": 0.0015625},
        4: {"a": -1.072627406051137,  "b": 2.2748208460279304, "alpha": 0.05, "c": 0.003125},
        5: {"a": 0.3144198553952366,  "b": 2.772096693493694,  "alpha": 0.05, "c": 0.034895833333333334},
    },
    "305M": {
        1: {"a": -0.18733739807046243, "b": 2.312924189576437,  "alpha": 0.05, "c": 0.003125},
        2: {"a": 3.0583843406593463,  "b": 0.05867924234335276, "alpha": 0.3672413793103448, "c": 0.019791666666666666},
        3: {"a": 2.037098981893403,   "b": 0.48486293207181885, "alpha": 0.1293103448275862, "c": 0.0015625},
        4: {"a": -1.0727458525278075, "b": 2.177270392291824,  "alpha": 0.05, "c": 0.003125},
        5: {"a": 1.465292570110724,   "b": 1.4569415752353443, "alpha": 0.0896551724137931, "c": 0.034895833333333334},
    },
    "410M": {
        1: {"a": -0.21860503605523499, "b": 2.2855798194974324, "alpha": 0.05, "c": 0.003125},
        2: {"a": 3.068706883350695,    "b": 0.005605907282164662, "alpha": 1.0017241379310344, "c": 0.034895833333333334},
        3: {"a": 1.958790095134566,    "b": 0.4939182316775028,  "alpha": 0.1293103448275862, "c": 0.0015625},
        4: {"a": -1.050236886559304,   "b": 2.1205339628269058, "alpha": 0.05, "c": 0.003125},
        5: {"a": 1.3466574766335897,   "b": 1.4981064795012045,  "alpha": 0.0896551724137931, "c": 0.034895833333333334},
    },
    "70M": {
        1: {"a": 2.1270757115611185,  "b": 0.4395199897525094,  "alpha": 0.2482758620689655, "c": 0.013194444444444443},
        2: {"a": 3.552025532207962,   "b": 0.04562819235160885, "alpha": 0.5258620689655172, "c": 0.034895833333333334},
        3: {"a": 1.0433037320725291,  "b": 1.8510839638322976,  "alpha": 0.05, "c": 0.0015625},
        4: {"a": 1.3203779743559676,  "b": 0.16265793539818846, "alpha": 0.4068965517241379, "c": 0.013194444444444443},
        5: {"a": 3.192722657529413,   "b": 0.2586205571225181,  "alpha": 0.446551724137931, "c": 0.09531249999999998},
    },
}

_DOMAIN_KEYS = {
    1: ("proportion_domain_1", "loss_domain_1"),
    2: ("proportion_domain_2", "loss_domain_2"),
    3: ("proportion_domain_3", "loss_domain_3"),
    4: ("proportion_domain_4", "loss_domain_4"),
    5: ("proportion_domain_5", "loss_domain_5"),
}

_DEF_EPS = 1e-8  # numerical floor to avoid zero to negative-power


def _predict_for_group_row(row: Dict[str, float], params_g: Dict[int, Dict[str, float]]) -> Dict[str, float]:
    out: Dict[str, float] = {}
    for i in range(1, 6):
        p_key, y_key = _DOMAIN_KEYS[i]
        p = float(row.get(p_key, 0.0))
        par = params_g[i]
        # Apply shifted power law with small numerical floor
        val = par["a"] + par["b"] * ((max(p, 0.0) + max(par["c"], _DEF_EPS)) ** (-par["alpha"]))
        out[y_key] = float(val)
    return out


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    if group not in _PARAMS:
        raise ValueError(f"Unknown group '{group}'. Known: {sorted(_PARAMS.keys())}")
    params_g = _PARAMS[group]
    return [_predict_for_group_row(row, params_g) for row in input_data]

#4 Run 4 R² = 0.970459

▼

Python

from typing import List, Dict
import math

# Offset power-law with asymptote per group and domain:
# loss_domain_i = a + b * (proportion_domain_i + d) ** c
# Parameters were fitted on the provided dataset.
_PARAMS = {
    "70M": {
        1: {"a": 2.1328, "b": 0.4344, "c": -0.25, "d": 0.01320},
        2: {"a": 3.5859, "b": 0.0212, "c": -0.80, "d": 0.05000},
        3: {"a": 2.6248, "b": 0.3145, "c": -0.20, "d": 0.00349},
        4: {"a": 1.3108, "b": 0.1693, "c": -0.40, "d": 0.01320},
        5: {"a": 2.8438, "b": 0.6006, "c": -0.20, "d": 0.05000},
    },
    "160M": {
        1: {"a": 1.7899, "b": 0.4896, "c": -0.20, "d": 0.00847},
        2: {"a": 3.2759, "b": 0.0149, "c": -0.75, "d": 0.03208},
        3: {"a": 2.4432, "b": 0.2486, "c": -0.20, "d": 0.00224},
        4: {"a": 0.9249, "b": 0.3149, "c": -0.25, "d": 0.00847},
        5: {"a": 2.4777, "b": 0.6157, "c": -0.20, "d": 0.05000},
    },
    "305M": {
        1: {"a": 1.6614, "b": 0.4760, "c": -0.20, "d": 0.00847},
        2: {"a": 3.1208, "b": 0.0108, "c": -0.95, "d": 0.05000},
        3: {"a": 2.3548, "b": 0.2163, "c": -0.20, "d": 0.00143},
        4: {"a": 0.8390, "b": 0.3014, "c": -0.25, "d": 0.00847},
        5: {"a": 2.3085, "b": 0.6178, "c": -0.20, "d": 0.05000},
    },
    "410M": {
        1: {"a": 1.6083, "b": 0.4704, "c": -0.20, "d": 0.00847},
        2: {"a": 3.0372, "b": 0.0275, "c": -0.45, "d": 0.01320},
        3: {"a": 2.2824, "b": 0.2203, "c": -0.20, "d": 0.00143},
        4: {"a": 0.8117, "b": 0.2936, "c": -0.25, "d": 0.00847},
        5: {"a": 2.2138, "b": 0.6352, "c": -0.20, "d": 0.05000},
    },
}

# Fallback parameters: average across known groups per domain.
# This is used if an unknown group is requested.
_avg_params = {}
for dom in range(1, 6):
    vals = [g[dom] for g in _PARAMS.values()]
    _avg_params[dom] = {
        "a": sum(v["a"] for v in vals) / len(vals),
        "b": sum(v["b"] for v in vals) / len(vals),
        "c": sum(v["c"] for v in vals) / len(vals),
        "d": sum(v["d"] for v in vals) / len(vals),
    }


def _predict_for_group(row: Dict[str, float], group: str) -> Dict[str, float]:
    params = _PARAMS.get(group, _avg_params)
    out: Dict[str, float] = {}
    for i in range(1, 6):
        p = float(row.get(f"proportion_domain_{i}", 0.0))
        if p < 0.0:
            p = 0.0
        pr = params[i]
        a = pr["a"]
        b = pr["b"]
        c = pr["c"]
        d = pr["d"]
        y = a + b * ((p + d) ** c)
        out[f"loss_domain_{i}"] = float(y)
    return out


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    return [_predict_for_group(row, group) for row in input_data]

#5 Run 5 R² = 0.899201

▼

Python

from __future__ import annotations

import math
from typing import Dict, List

# Small epsilon to avoid log(0) when a domain proportion is zero
_EPS = 1e-6

# Per-group, per-output coefficients for the multi-log linear law.
# For each group and each output loss_domain_k, the list is:
#   [intercept, b1, b2, b3, b4, b5]
_COEFS: Dict[str, Dict[str, List[float]]] = {
    "160M": {
        "loss_domain_1": [
            2.4042025803406553,
            -0.04878456902733,
            -0.001374141990715709,
            0.00425143168670042,
            -0.004036425589256114,
            0.007660516801797778,
        ],
        "loss_domain_2": [
            3.329935868696654,
            0.002613273350161564,
            -0.011891996557419415,
            2.5572674116154508e-05,
            0.003256629390122456,
            -0.005952205705195364,
        ],
        "loss_domain_3": [
            2.8006343825165314,
            0.0038463371713819104,
            0.0010271048724917586,
            -0.03834514983761013,
            0.003370871701657902,
            -0.004992444549614571,
        ],
        "loss_domain_4": [
            1.3842254371292473,
            0.0001770125184695076,
            0.0019008052563317707,
            0.00040985897229494966,
            -0.04321725706040489,
            0.005203775943861618,
        ],
        "loss_domain_5": [
            3.22070848376419,
            0.005681276339156933,
            0.004826630941776033,
            -0.00156340623248917,
            0.005473022220321205,
            -0.030039287316337895,
        ],
    },
    "305M": {
        "loss_domain_1": [
            2.254097761686427,
            -0.047618308465165196,
            -0.0013550046237553216,
            0.004003192579882288,
            -0.0042909223510698554,
            0.006758159630738282,
        ],
        "loss_domain_2": [
            3.166848403701224,
            0.0023474329227610603,
            -0.011422287483401284,
            4.890168638858063e-06,
            0.0026987505976661923,
            -0.006498741023938888,
        ],
        "loss_domain_3": [
            2.6500359123770685,
            0.0035548451771600445,
            0.0011888369505355075,
            -0.03964115398330717,
            0.0028959099742507494,
            -0.004828358464409514,
        ],
        "loss_domain_4": [
            1.2751944568968825,
            -0.00024923215104993846,
            0.0015693015233153626,
            0.0004352251902864513,
            -0.041431801189773,
            0.00487564464080058,
        ],
        "loss_domain_5": [
            3.0477226649403506,
            0.005424541586794049,
            0.004752807485933067,
            -0.0015473014680650466,
            0.004775541162988269,
            -0.030879417992197157,
        ],
    },
    "410M": {
        "loss_domain_1": [
            2.1969109104720936,
            -0.04711053566759858,
            -0.0005929598868873091,
            0.0032935545744610275,
            -0.0034299883497122844,
            0.006245418652814477,
        ],
        "loss_domain_2": [
            3.096774846113359,
            0.002683578481767577,
            -0.01079532012122322,
            -0.0006061709329291659,
            0.003289915492216653,
            -0.007059325448456723,
        ],
        "loss_domain_3": [
            2.5854698484676923,
            0.0036493802056689617,
            0.0014981486567392278,
            -0.04057579585320029,
            0.003422309711002145,
            -0.005304641068008599,
        ],
        "loss_domain_4": [
            1.2374283294627488,
            -0.0007019754734980677,
            0.0023220834389344196,
            0.00022483249867104974,
            -0.040255057354183485,
            0.0043998574943246395,
        ],
        "loss_domain_5": [
            2.9734625677861892,
            0.005474979456664107,
            0.005292433569369652,
            -0.0021983339298417347,
            0.005328130514437292,
            -0.031848404298810445,
        ],
    },
    "70M": {
        "loss_domain_1": [
            2.709753097020149,
            -0.05223448519596348,
            -0.002096832291136182,
            0.0049309629806633245,
            -0.005121637241168704,
            0.008582633603168494,
        ],
        "loss_domain_2": [
            3.6589940901202946,
            0.0024594854221674217,
            -0.013037306540415955,
            0.00018013301869309603,
            0.0025694674548725075,
            -0.0055192991657138785,
        ],
        "loss_domain_3": [
            3.0882519411499176,
            0.004138978755961604,
            0.000999612740187218,
            -0.040138664176247195,
            0.002610113875372775,
            -0.00489486237526904,
        ],
        "loss_domain_4": [
            1.6123830068952152,
            -0.0002626923872205353,
            0.001840626589019361,
            0.0004871656033250259,
            -0.048580221686945084,
            0.0053741041526707105,
        ],
        "loss_domain_5": [
            3.567736623439916,
            0.005906206376564464,
            0.00463119283202685,
            -0.0015851069219014631,
            0.004999662780701288,
            -0.029220774259506147,
        ],
    },
}


def _predict_one(input_point: Dict[str, float], beta: List[float]) -> float:
    # Build feature vector: [1, log(p1+eps), ..., log(p5+eps)]
    logs = [math.log(max(input_point.get(f"proportion_domain_{i}", 0.0), 0.0) + _EPS) for i in range(1, 6)]
    return beta[0] + sum(b * x for b, x in zip(beta[1:], logs))


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    if group not in _COEFS:
        raise ValueError(f"Unknown group '{group}'. Available groups: {sorted(_COEFS.keys())}")

    group_coefs = _COEFS[group]
    outputs: List[Dict[str, float]] = []
    for point in input_data:
        pred: Dict[str, float] = {}
        for k in range(1, 6):
            key = f"loss_domain_{k}"
            beta = group_coefs[key]
            pred[key] = _predict_one(point, beta)
        outputs.append(pred)
    return outputs