SLD - Domain Mixture Scaling Law

All Runs (sorted by R²)

Best Run 1 R² = 0.971140

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Scaling law (same functional form for all groups and domains):
        loss_domain_i = a_{g,i} + b_{g,i} * ln(proportion_domain_i + eps_{g,i})

    If the provided group is unknown, a sensible fallback using the average
    coefficients across known groups is used (per-domain averages of a, b, eps).
    """
    import math

    # Parameters per (group, domain): loss_i = a + b * ln(p_i + eps)
    _PARAMS = {
        '160M': {
            1: {'a': 2.2531838390, 'b': -0.1337589930, 'eps': 0.0023949075},
            2: {'a': 3.2636809068, 'b': -0.0353192223, 'eps': 0.0027325293},
            3: {'a': 2.6179153875, 'b': -0.0924472969, 'eps': 0.0007308092},
            4: {'a': 1.1944779835, 'b': -0.1233168892, 'eps': 0.0019650546},
            5: {'a': 3.0846022090, 'b': -0.1417331522, 'eps': 0.0274723768},
        },
        '305M': {
            1: {'a': 2.1168108211, 'b': -0.1266964846, 'eps': 0.0020990011},
            2: {'a': 3.0996329693, 'b': -0.0384297964, 'eps': 0.0046309399},
            3: {'a': 2.4992568493, 'b': -0.0832862054, 'eps': 0.0003779407},
            4: {'a': 1.1005269460, 'b': -0.1163058210, 'eps': 0.0018396558},
            5: {'a': 2.9190563483, 'b': -0.1362346592, 'eps': 0.0225414637},
        },
        '410M': {
            1: {'a': 2.0629628567, 'b': -0.1220476872, 'eps': 0.0018396558},
            2: {'a': 3.0486794047, 'b': -0.0271283688, 'eps': 0.0012385352},
            3: {'a': 2.4325992818, 'b': -0.0837640232, 'eps': 0.0003538226},
            4: {'a': 1.0631159333, 'b': -0.1149495630, 'eps': 0.0019650546},
            5: {'a': 2.8414721314, 'b': -0.1401158009, 'eps': 0.0225414637},
        },
        '70M': {
            1: {'a': 2.5360441935, 'b': -0.1540029694, 'eps': 0.0033302662},
            2: {'a': 3.5682949842, 'b': -0.0515955155, 'eps': 0.0078482616},
            3: {'a': 2.8873239058, 'b': -0.1015557222, 'eps': 0.0008906727},
            4: {'a': 1.4042502135, 'b': -0.1383351096, 'eps': 0.0019650546},
            5: {'a': 3.4357662053, 'b': -0.1381350881, 'eps': 0.0274723768},
        },
    }

    # Build per-domain average fallback in case of unknown group
    if group not in _PARAMS:
        # compute averages across known groups for each domain
        avg_params = {}
        for i in range(1, 6):
            a_vals = [gparams[i]['a'] for gparams in _PARAMS.values()]
            b_vals = [gparams[i]['b'] for gparams in _PARAMS.values()]
            eps_vals = [gparams[i]['eps'] for gparams in _PARAMS.values()]
            avg_params[i] = {
                'a': sum(a_vals) / len(a_vals),
                'b': sum(b_vals) / len(b_vals),
                'eps': sum(eps_vals) / len(eps_vals),
            }
        params = avg_params
    else:
        params = _PARAMS[group]

    predictions: list[dict[str, float]] = []
    for row in input_data:
        out: dict[str, float] = {}
        for i in range(1, 6):
            p = float(row.get(f"proportion_domain_{i}", 0.0))
            # numerical safety for log at extremely small or slightly negative due to noise
            if p < 0.0:
                p = 0.0
            a = params[i]['a']
            b = params[i]['b']
            eps = params[i]['eps']
            y = a + b * math.log(p + eps)
            out[f"loss_domain_{i}"] = float(y)
        predictions.append(out)

    return predictions

#2 Run 2 R² = 0.971072

▼

Python

import math


def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups, but
                the constant parameters/coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # Discovered scaling law parameters per group and domain
    # Model: loss_domain_i = a + b * ln(proportion_domain_i + c)
    # Coefficients were fitted on the provided dataset.
    COEFFS: dict[str, dict[int, dict[str, float]]] = {
        "160M": {
            1: {"a": 2.255463034657955, "b": -0.1322014387664249, "c": 0.0022579423766059038},
            2: {"a": 3.2627500668798435, "b": -0.035825707556857135, "c": 0.002896657450965321},
            3: {"a": 2.6244424847304453, "b": -0.09009357911542895, "c": 0.0006498175348279338},
            4: {"a": 1.2003470092201818, "b": -0.12033532343430565, "c": 0.0017600644406102247},
            5: {"a": 3.0846622790213734, "b": -0.14149413638165387, "c": 0.027261874278143278},
        },
        "305M": {
            1: {"a": 2.114124254258875, "b": -0.12852626852708746, "c": 0.0022579423766059038},
            2: {"a": 3.099085405020726, "b": -0.03873697421213334, "c": 0.004767225673466724},
            3: {"a": 2.4972468378996573, "b": -0.08400352953211676, "c": 0.0003948415562753178},
            4: {"a": 1.1027537337452233, "b": -0.1151761426543334, "c": 0.0017600644406102247},
            5: {"a": 2.919559840422695, "b": -0.13456190779289226, "c": 0.021250611175238732},
        },
        "410M": {
            1: {"a": 2.0644864248236448, "b": -0.1210234418135825, "c": 0.0017600644406102247},
            2: {"a": 3.0475764758867196, "b": -0.027711888083369603, "c": 0.0013719689515536608},
            3: {"a": 2.4275273847120293, "b": -0.08557331062431346, "c": 0.0003948415562753178},
            4: {"a": 1.0558984541830227, "b": -0.11864017408189217, "c": 0.0022579423766059038},
            5: {"a": 2.8419851362063593, "b": -0.13840170983545286, "c": 0.021250611175238732},
        },
        "70M": {
            1: {"a": 2.5423740680906737, "b": -0.14957785135641832, "c": 0.002896657450965321},
            2: {"a": 3.568303753809257, "b": -0.0515903682298029, "c": 0.007845746694759024},
            3: {"a": 2.891555035928987, "b": -0.1000216033928399, "c": 0.0008336345619486921},
            4: {"a": 1.4108355902865883, "b": -0.13498993068037493, "c": 0.0017600644406102247},
            5: {"a": 3.435824595629432, "b": -0.1379023492163919, "c": 0.027261874278143278},
        },
    }
    DEFAULT_GROUP = "70M"

    outputs: list[dict[str, float]] = []
    for row in input_data:
        pred: dict[str, float] = {}
        for i in range(1, 6):
            p = float(row.get(f"proportion_domain_{i}", 0.0))
            g = group if group in COEFFS else DEFAULT_GROUP
            params = COEFFS[g][i]
            a = params["a"]
            b = params["b"]
            c = params["c"]
            # Ensure numerical stability for very small/negative p
            if p < 0:
                p = 0.0
            y = a + b * math.log(p + c)
            pred[f"loss_domain_{i}"] = float(y)
        outputs.append(pred)
    return outputs

#3 Run 3 R² = 0.970426

▼

Python

from __future__ import annotations
from typing import List, Dict
import math

# Discovered functional form (per-domain, per-group parameters):
#   loss_domain_i = a_{g,i} + b_{g,i} * (proportion_domain_i + c_{g,i}) ** (-alpha_{g,i})
# The same functional form is used for all groups g; parameters differ by group and domain.
#
# We fitted these parameters on the provided dataset. For the four observed groups
# ('70M','160M','305M','410M'), we hard-code the best parameters found via a coarse grid
# over (alpha, c) and least-squares fit for (a, b). For unseen groups, we linearly
# regress each parameter versus log(model_size) and use that trend to extrapolate.

# Parameters per observed group (a, b, alpha, c) per domain index 1..5
_PARAMS_BY_GROUP: Dict[str, Dict[int, Dict[str, float]]] = {
    '70M': {
        1: {'a': 1.9887972995594814, 'b': 0.5747539023729472, 'alpha': 0.2, 'c': 0.010628090822653227},
        2: {'a': 3.478375647232149,  'b': 0.10946818216563689,'alpha': 0.29743589743589743, 'c': 0.022132450145006925},
        3: {'a': 2.662255623173122,  'b': 0.29249248967351243,'alpha': 0.2, 'c': 0.002944084977655257},
        4: {'a': 1.3106586089315084, 'b': 0.17087362129489647,'alpha': 0.3948717948717949, 'c': 0.012767292740982497},
        5: {'a': 3.269825262023636,  'b': 0.18572717232438435,'alpha': 0.5897435897435898, 'c': 0.11529793501972682},
    },
    '160M': {
        1: {'a': 1.7790306985292368, 'b': 0.49777281762832476,'alpha': 0.2, 'c': 0.008847319226258555},
        2: {'a': 3.147643888465916,  'b': 0.12605528549680592,'alpha': 0.2, 'c': 0.008847319226258555},
        3: {'a': 2.426971402916676,  'b': 0.25800433609650775,'alpha': 0.2, 'c': 0.002450793850108051},
        4: {'a': 0.7868481183561111, 'b': 0.4401977738333642, 'alpha': 0.2, 'c': 0.007364921771696289},
        5: {'a': 2.453268819807029,  'b': 0.6393110153314099, 'alpha': 0.2, 'c': 0.05536652819843693},
    },
    '305M': {
        1: {'a': 1.650844002886236,  'b': 0.48398936723908575,'alpha': 0.2, 'c': 0.008847319226258555},
        2: {'a': 3.145345522432937,  'b': 0.000842624257026407,'alpha': 2.9282051282051285, 'c': 0.16638288403323945},
        3: {'a': 2.329970918889095,  'b': 0.2305617062841119, 'alpha': 0.2, 'c': 0.0016983205437168204},
        4: {'a': 0.7519113608459367, 'b': 0.39047606538528545,'alpha': 0.2, 'c': 0.006130904889496624},
        5: {'a': 2.3271583641121616, 'b': 0.5999351396187924, 'alpha': 0.2, 'c': 0.046089684177061926},
    },
    '410M': {
        1: {'a': 1.6401646612679839, 'b': 0.4464062192995915, 'alpha': 0.2, 'c': 0.007364921771696289},
        2: {'a': 3.0569561734470225, 'b': 0.012635254264443395,'alpha': 0.6871794871794872, 'c': 0.022132450145006925},
        3: {'a': 2.257122316003787,  'b': 0.23487158519383375,'alpha': 0.2, 'c': 0.0016983205437168204},
        4: {'a': 0.6830677895878933, 'b': 0.4103719579323839, 'alpha': 0.2, 'c': 0.007364921771696289},
        5: {'a': 2.2329632735094207, 'b': 0.6168094915316678, 'alpha': 0.2, 'c': 0.046089684177061926},
    },
}

# Linear-in-log(size) trend for unseen groups: parameter ~= u + v * log(model_size)
# model_size parsed from group string (e.g., '70M' -> 70e6). Values fitted from the same training data.
# Structure: FITS[domain][param] = (u, v)
_FITS_BY_LOGSIZE: Dict[int, Dict[str, tuple[float, float]]] = {
    1: {
        'a': (5.660934950865894, -0.20419887240098242),
        'b': (1.7767893971925035, -0.0668774826289396),
        'alpha': (0.2, 0.0),
        'c': (0.03922594390798102, -0.0015882162200686668),
    },
    2: {
        'a': (7.408699678416423, -0.22020436827726178),
        'b': (1.3613732788033166, -0.06808625950778105),
        'alpha': (-14.612320116647743, 0.8197106109208487),
        'c': (-0.6519810182884438, 0.03704583868478927),
    },
    3: {
        'a': (6.678219554420149, -0.2232189634307364),
        'b': (0.9269627425670832, -0.035270492078040415),
        'alpha': (0.2, 0.0),
        'c': (0.016804595335682817, -0.0007655292505396046),
    },
    4: {
        'a': (7.3750527374798045, -0.34023825047309186),
        'b': (-2.0178969199781505, 0.12425623940963766),
        'alpha': (2.3019722108868343, -0.10760983274344801),
        'c': (0.07115201468643739, -0.0032884283026312243),
    },
    5: {
        'a': (13.48467303818107, -0.5719893785046406),
        'b': (-3.9077155402955737, 0.2315531989231151),
        'alpha': (4.4039444217736685, -0.21521966548689597),
        'c': (0.8131752563511551, -0.039174158612878644),
    },
}


def _parse_group_size(group: str) -> float | None:
    """Parse group string like '70M', '1.3B' into a numeric size in tokens.

    Returns None if parsing fails.
    """
    if not isinstance(group, str) or not group:
        return None
    s = group.strip().upper()
    try:
        if s.endswith('B'):
            return float(s[:-1]) * 1e9
        if s.endswith('M'):
            return float(s[:-1]) * 1e6
        if s.endswith('K'):
            return float(s[:-1]) * 1e3
        # Fallback: raw number
        return float(s)
    except Exception:
        return None


def _get_params_for_group(group: str) -> Dict[int, Dict[str, float]]:
    # If we have exact parameters for this group, return them.
    if group in _PARAMS_BY_GROUP:
        return _PARAMS_BY_GROUP[group]

    # Otherwise, extrapolate/interpolate using linear fit versus log(size).
    size = _parse_group_size(group)
    if size is None or size <= 0:
        # Fallback to median known group parameters (use 160M as a reasonable default)
        return _PARAMS_BY_GROUP['160M']

    logn = math.log(size)
    params: Dict[int, Dict[str, float]] = {}
    for d, fits in _FITS_BY_LOGSIZE.items():
        a_u, a_v = fits['a']
        b_u, b_v = fits['b']
        al_u, al_v = fits['alpha']
        c_u, c_v = fits['c']
        a = a_u + a_v * logn
        b = b_u + b_v * logn
        alpha = al_u + al_v * logn
        c = c_u + c_v * logn
        # Guardrails: keep parameters in reasonable ranges
        alpha = max(0.05, float(alpha))
        c = max(1e-6, float(c))
        b = max(1e-8, float(b))
        params[d] = {'a': float(a), 'b': float(b), 'alpha': float(alpha), 'c': float(c)}
    return params


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values. Expected keys: 'proportion_domain_1'..'_5'.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s):
        'loss_domain_1'..'_5'.
    """
    params_by_domain = _get_params_for_group(group)

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        out: Dict[str, float] = {}
        for d in range(1, 6):
            p = float(row.get(f'proportion_domain_{d}', 0.0))
            par = params_by_domain[d]
            a = par['a']
            b = par['b']
            alpha = par['alpha']
            c = par['c']
            # Ensure numerical stability
            x = max(0.0, p) + c
            y = a + b * (x ** (-alpha))
            out[f'loss_domain_{d}'] = float(y)
        outputs.append(out)
    return outputs

#4 Run 4 R² = 0.965021

▼

Python

from __future__ import annotations

from typing import Dict, List

# Discovered scaling law (same functional form for all groups):
#   loss_domain_i(p) = a_i + b_i * p + c_i * sqrt(p_eps) + d_i / sqrt(p_eps)
# where p_eps = max(p, eps) with eps = 1e-8 for numerical stability at p=0.
# Coefficients (a_i, b_i, c_i, d_i) vary by experimental group and by domain i.
# These were fit via least squares on the provided dataset.

import math

# Group- and domain-specific parameters (a, b, c, d)
_PARAMS: Dict[str, Dict[int, List[float]]] = {
    # 160M
    "160M": {
        1: [2.852643507112996, 0.5079373888086807, -1.0796605253398814, 2.0787176420883635e-05],
        2: [3.354383873226113, -0.37869509907848437, 0.0896600728386695, 1.177445603007801e-05],
        3: [2.347377598500148, -13.805640202637328, 5.641670151072003, 9.376136946540265e-05],
        4: [1.8042334785143834, 0.8353298815978365, -1.2981302396786871, 1.589539097983332e-05],
        5: [3.5849119562787277, 0.2559696004923059, -0.758695672520524, 1.0076550902818928e-06],
    },
    # 305M
    "305M": {
        1: [2.706834215254821, 0.5880677740198962, -1.1276437000774089, 1.9130955209367118e-05],
        2: [3.2017362691741527, -0.37187187552261475, 0.07149899259874355, 1.0444032008682824e-05],
        3: [2.169244558185138, -14.140803471052271, 5.865028311622053, 9.857923560960747e-05],
        4: [1.7004746672414206, 0.9689402903601732, -1.3627948240241132, 1.326360669137494e-05],
        5: [3.4565779916529773, 0.38064813646972745, -0.9095505284674135, -2.207395814670825e-06],
    },
    # 410M
    "410M": {
        1: [2.6309647984255844, 0.5673263318126346, -1.085609650668732, 2.010250371618536e-05],
        2: [3.112610624866361, -0.3369700553653706, 0.10271603621695127, 1.1765535009316992e-05],
        3: [2.090987095821702, -14.41194746580547, 5.986201940650726, 0.00010066659956643521],
        4: [1.6814138568367192, 1.1507555128736286, -1.4921179761773755, 9.810216859529726e-06],
        5: [3.3436836877501346, 0.2595236072021162, -0.7633518960486015, 3.100351755000551e-06],
    },
    # 70M
    "70M": {
        1: [3.232089856516487, 0.6342384359826007, -1.284813531062777, 1.8294673741495497e-05],
        2: [3.7081016310626413, -0.444398338713182, 0.06425723222437253, 1.1032071859421705e-05],
        3: [2.727368677530126, -12.547994180570411, 4.978112989985035, 8.727731059550263e-05],
        4: [2.1695959734275863, 1.5585707751928726, -1.9218402484504162, 9.693091047463545e-06],
        5: [4.009115069855263, 0.48733821145363176, -1.042620301077381, -7.726820572537514e-06],
    },
}

# Fallback parameters if group not found: fit across all groups jointly per domain.
_DEFAULT_PARAMS: Dict[int, List[float]] = {
    1: [2.8556330943274784, 0.5743924826559886, -1.1444318517872365, 1.9578827271952478e-05],
    2: [3.3442080995823162, -0.3829838421699124, 0.08203308346968709, 1.1254023726860987e-05],
    3: [2.3337444824927815, -13.726596330795772, 5.617753348542388, 9.507112880974419e-05],
    4: [1.838929494005038, 1.1283991150062023, -1.5187208220827102, 1.2165576394408536e-05],
    5: [3.5985721763843057, 0.34586988890451575, -0.8685545995285723, -1.4565523854500717e-06],
}


def _predict_loss_for_domain(p: float, coeffs: List[float]) -> float:
    eps = 1e-8
    p_clamped = max(min(float(p), 1.0), 0.0)
    r = math.sqrt(max(p_clamped, eps))
    a, b, c, d = coeffs
    return a + b * p_clamped + c * r + d / r


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups,
                but the constant parameters/coefficients differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s):
        loss_domain_1 .. loss_domain_5.
    """
    group_params = _PARAMS.get(group, None)
    if group_params is None:
        group_params = _DEFAULT_PARAMS
    outputs: List[Dict[str, float]] = []
    for row in input_data:
        pred: Dict[str, float] = {}
        for i in range(1, 6):
            p = row.get(f"proportion_domain_{i}")
            if p is None:
                # If missing, assume zero contribution for that domain.
                p = 0.0
            coeffs = group_params.get(i, _DEFAULT_PARAMS[i])
            pred[f"loss_domain_{i}"] = _predict_loss_for_domain(p, coeffs)
        outputs.append(pred)
    return outputs

#5 Run 5 R² = 0.842023

▼

Python

from __future__ import annotations

from math import log
from typing import Dict, List

# Discovered scaling law:
#   For each output loss_domain_i, predict as a linear function of the
#   logarithms of all five domain proportions (with a tiny epsilon for stability):
#       \hat{L}_{g,i} = bias_{g,i} + sum_j w_{g,i,j} * log(p_j + eps)
#   The functional form is fixed; parameters (biases and weights) depend on group g.
#   Coefficients were fit by ordinary least squares on the provided dataset.

EPS = 1e-12

# Per-group, per-output coefficients learned on the provided dataset.
# Keys: group -> domain_{i} -> {bias, log_p1..log_p5}
COEFFS: Dict[str, Dict[str, Dict[str, float]]] = {
    "160M": {
        "domain_1": {
            "bias": 2.4422650614387416,
            "log_p1": -0.02277638164592572,
            "log_p2": -0.0010526071930561662,
            "log_p3": 0.0023643145622796475,
            "log_p4": -0.0025583225480046595,
            "log_p5": 0.004378832847069199,
        },
        "domain_2": {
            "bias": 3.3408696428701075,
            "log_p1": 0.0012198254051165147,
            "log_p2": -0.005388376734279845,
            "log_p3": 3.717373734748179e-05,
            "log_p4": 0.001569209092862663,
            "log_p5": -0.002615879540461004,
        },
        "domain_3": {
            "bias": 2.8574312884633413,
            "log_p1": 0.0018047356370065415,
            "log_p2": 0.0005929693186934749,
            "log_p3": -0.016974557643524405,
            "log_p4": 0.0016523205850350147,
            "log_p5": -0.0022616680413822805,
        },
        "domain_4": {
            "bias": 1.4313732131696744,
            "log_p1": -5.1765954939435764e-05,
            "log_p2": 0.00089014156596828,
            "log_p3": 0.00025768087959735246,
            "log_p4": -0.019712789545137503,
            "log_p5": 0.0031631020272182907,
        },
        "domain_5": {
            "bias": 3.2257864919233756,
            "log_p1": 0.0029557696510542136,
            "log_p2": 0.0026611908853740054,
            "log_p3": -0.0008885188514808986,
            "log_p4": 0.002963471212841412,
            "log_p5": -0.014216518710413713,
        },
    },
    "305M": {
        "domain_1": {
            "bias": 2.2924869661791267,
            "log_p1": -0.022229067565166443,
            "log_p2": -0.001013806799899392,
            "log_p3": 0.00223467186554643,
            "log_p4": -0.002645873902860267,
            "log_p5": 0.003982864991197694,
        },
        "domain_2": {
            "bias": 3.178656522099369,
            "log_p1": 0.001107302828752052,
            "log_p2": -0.005157973767038715,
            "log_p3": 2.3050110124257784e-05,
            "log_p4": 0.001316251689675477,
            "log_p5": -0.0028425410208344046,
        },
        "domain_3": {
            "bias": 2.709373598108862,
            "log_p1": 0.0016635063654583334,
            "log_p2": 0.0006628818961311013,
            "log_p3": -0.017547370822364454,
            "log_p4": 0.0014206959628518868,
            "log_p5": -0.002139388852229081,
        },
        "domain_4": {
            "bias": 1.3211315997161879,
            "log_p1": -0.00024768872185789104,
            "log_p2": 0.0007428895039710641,
            "log_p3": 0.0002693184497085781,
            "log_p4": -0.01890218970791502,
            "log_p5": 0.0030105938208213993,
        },
        "domain_5": {
            "bias": 3.0545845875446473,
            "log_p1": 0.002854476470176697,
            "log_p2": 0.0026446139943498434,
            "log_p3": -0.0008847006391603586,
            "log_p4": 0.002650748867952872,
            "log_p5": -0.014590961474216001,
        },
    },
    "410M": {
        "domain_1": {
            "bias": 2.2343717924971283,
            "log_p1": -0.021983874135919367,
            "log_p2": -0.0006526879839640017,
            "log_p3": 0.0019063557364627178,
            "log_p4": -0.0022375323165275815,
            "log_p5": 0.003693691571213775,
        },
        "domain_2": {
            "bias": 3.1081386488108924,
            "log_p1": 0.0012757792324108383,
            "log_p2": -0.004861060292668727,
            "log_p3": -0.00025971561825861964,
            "log_p4": 0.0015980855298738693,
            "log_p5": -0.003151617877548417,
        },
        "domain_3": {
            "bias": 2.6457079548318867,
            "log_p1": 0.0017179901900188285,
            "log_p2": 0.0008184656194115004,
            "log_p3": -0.017971019588506255,
            "log_p4": 0.0016870680564514456,
            "log_p5": -0.0023896692822175354,
        },
        "domain_4": {
            "bias": 1.2820975851894605,
            "log_p1": -0.00043994437500341244,
            "log_p2": 0.0010961729632694766,
            "log_p3": 0.00016179126609633246,
            "log_p4": -0.01836235643857456,
            "log_p5": 0.0027422260133483827,
        },
        "domain_5": {
            "bias": 2.9806037129271403,
            "log_p1": 0.0028948032957862636,
            "log_p2": 0.002904176943938192,
            "log_p3": -0.0011826414336436638,
            "log_p4": 0.00291564957025589,
            "log_p5": -0.015090031561988756,
        },
    },
    "70M": {
        "domain_1": {
            "bias": 2.7516102214862785,
            "log_p1": -0.024366773302687862,
            "log_p2": -0.00140949302483008,
            "log_p3": 0.0026992835128934697,
            "log_p4": -0.0031027937470278375,
            "log_p5": 0.0048722418014892025,
        },
        "domain_2": {
            "bias": 3.6719317859686043,
            "log_p1": 0.0011625721914211736,
            "log_p2": -0.005916469283232795,
            "log_p3": 0.0001159058459700631,
            "log_p4": 0.001253484647821796,
            "log_p5": -0.0023991782312973043,
        },
        "domain_3": {
            "bias": 3.148901142003107,
            "log_p1": 0.0019476114702619417,
            "log_p2": 0.0005957023862280053,
            "log_p3": -0.01776813563794145,
            "log_p4": 0.0013179243483677264,
            "log_p5": -0.002183184626543552,
        },
        "domain_4": {
            "bias": 1.6665569458710474,
            "log_p1": -0.00025847451148676925,
            "log_p2": 0.0008758521651152874,
            "log_p3": 0.00029735558927713497,
            "log_p4": -0.022152540972955964,
            "log_p5": 0.0033608606565960417,
        },
        "domain_5": {
            "bias": 3.5732020332438177,
            "log_p1": 0.003066036404538771,
            "log_p2": 0.002572530514690887,
            "log_p3": -0.0008951939451058563,
            "log_p4": 0.0027436364335349703,
            "log_p5": -0.013811286335446366,
        },
    },
}

# Fallback pooled coefficients (across all groups) in case an unknown group is requested.
FALLBACK: Dict[str, Dict[str, float]] = {
    "domain_1": {
        "bias": 2.4301835104003184,
        "log_p1": -0.02283902416242487,
        "log_p2": -0.001032148750437415,
        "log_p3": 0.002301156419295567,
        "log_p4": -0.0026361306286050916,
        "log_p5": 0.004231907802742471,
    },
    "domain_2": {
        "bias": 3.3248991499372433,
        "log_p1": 0.0011913699144251472,
        "log_p2": -0.005330970019305012,
        "log_p3": -2.0896481204204573e-05,
        "log_p4": 0.001434257740058452,
        "log_p5": -0.0027523041675352787,
    },
    "domain_3": {
        "bias": 2.8403534958517995,
        "log_p1": 0.0017834609156864088,
        "log_p2": 0.00066750480511602,
        "log_p3": -0.01756527092308412,
        "log_p4": 0.0015195022381765178,
        "log_p5": -0.0022434777005931115,
    },
    "domain_4": {
        "bias": 1.4252898359865924,
        "log_p1": -0.0002494683908218775,
        "log_p2": 0.0009012640495810252,
        "log_p3": 0.0002465365461698426,
        "log_p4": -0.019782469166145775,
        "log_p5": 0.003069195629496027,
    },
    "domain_5": {
        "bias": 3.208544206409745,
        "log_p1": 0.0029427714553889924,
        "log_p2": 0.0026956280845882408,
        "log_p3": -0.0009627637173476962,
        "log_p4": 0.0028183765211462894,
        "log_p5": -0.014427199520516213,
    },
}


def _predict_one(row: Dict[str, float], coeffs: Dict[str, Dict[str, float]]) -> Dict[str, float]:
    # Build log-proportion features with numerical stability for zeros
    logs = [log(float(row.get(f"proportion_domain_{j}", 0.0)) + EPS) for j in range(1, 6)]
    out: Dict[str, float] = {}
    for i in range(1, 6):
        c = coeffs.get(f"domain_{i}")
        if c is None:
            # If missing, fall back to pooled per-domain coefficients
            c = FALLBACK[f"domain_{i}"]
        val = c["bias"]
        for j in range(1, 6):
            val += c[f"log_p{j}"] * logs[j - 1]
        out[f"loss_domain_{i}"] = float(val)
    return out


def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law is the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s): loss_domain_1..5.
    """
    # Select coefficients for the provided group (fallback if unknown)
    coeffs = COEFFS.get(group)
    if coeffs is None:
        # Unknown group: use pooled (group-agnostic) coefficients for all domains
        coeffs = FALLBACK
    return [_predict_one(row, coeffs) for row in input_data]