SLD - Domain Mixture Scaling Law - mini-swe-agent + GPT-5

All Runs (sorted by R²)

Best Run 1 R² = 0.902224

▼

Python

# Auto-generated scaling law function
# Formula: loss_domain_i = a_{group,i} + b_{group,i} * log(1 / (proportion_domain_i + EPS))
# EPS helps handle zero or tiny proportions.
import math
from typing import List, Dict

EPS = 1e-06

COEFFS = {
  "70M": {
    "domain_1": {
      "a": 2.69918728265047,
      "b": 0.05301834148938907
    },
    "domain_2": {
      "a": 3.6412455144352482,
      "b": 0.012884478287480374
    },
    "domain_3": {
      "a": 3.064817132296828,
      "b": 0.03887742050158043
    },
    "domain_4": {
      "a": 1.5883178939323082,
      "b": 0.049537521925556285
    },
    "domain_5": {
      "a": 3.4914427487846966,
      "b": 0.03541574159783458
    }
  },
  "160M": {
    "domain_1": {
      "a": 2.390023453469063,
      "b": 0.04954831984215471
    },
    "domain_2": {
      "a": 3.3089972406617325,
      "b": 0.011847039218535506
    },
    "domain_3": {
      "a": 2.775060340500154,
      "b": 0.03703330098796477
    },
    "domain_4": {
      "a": 1.3585320071946532,
      "b": 0.044165574068738486
    },
    "domain_5": {
      "a": 3.141635642438281,
      "b": 0.036409448055197705
    }
  },
  "305M": {
    "domain_1": {
      "a": 2.244509680794174,
      "b": 0.048262573639627
    },
    "domain_2": {
      "a": 3.151664339501828,
      "b": 0.011228110004841948
    },
    "domain_3": {
      "a": 2.6272256718047498,
      "b": 0.03831525904067123
    },
    "domain_4": {
      "a": 1.2540806183376132,
      "b": 0.04228713975871266
    },
    "domain_5": {
      "a": 2.9742561684134405,
      "b": 0.03681503714017352
    }
  },
  "410M": {
    "domain_1": {
      "a": 2.1839855870092397,
      "b": 0.04779885696477952
    },
    "domain_2": {
      "a": 3.0802841205472307,
      "b": 0.010883493163969587
    },
    "domain_3": {
      "a": 2.559912445312702,
      "b": 0.0390346468996943
    },
    "domain_4": {
      "a": 1.2161032438866803,
      "b": 0.04115602093820587
    },
    "domain_5": {
      "a": 2.8980194286471335,
      "b": 0.038035588514786826
    }
  },
  "GLOBAL": {
    "domain_1": {
      "a": 2.379426500980737,
      "b": 0.04965702298398756
    },
    "domain_2": {
      "a": 3.2955478037865107,
      "b": 0.0117107801687068
    },
    "domain_3": {
      "a": 2.756753897478608,
      "b": 0.03831515685747756
    },
    "domain_4": {
      "a": 1.354258440837814,
      "b": 0.04428656417280338
    },
    "domain_5": {
      "a": 3.1263384970708885,
      "b": 0.03666895382699818
    }
  }
}

def _select_group_key(group: str) -> str:
    if isinstance(group, str) and group in COEFFS:
        return group
    if isinstance(group, str):
        gl = group.lower()
        for k in COEFFS.keys():
            if k.lower() == gl:
                return k
    return "GLOBAL" if "GLOBAL" in COEFFS else list(COEFFS.keys())[0]

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    gkey = _select_group_key(group)
    params = COEFFS[gkey]
    outputs: list[dict[str, float]] = []
    domain_keys = sorted(params.keys(), key=lambda k: int(''.join(ch for ch in k if ch.isdigit())) if any(ch.isdigit() for ch in k) else 9999)
    for row in input_data:
        out: dict[str, float] = {}
        for dom in domain_keys:
            idx = ''.join(ch for ch in dom if ch.isdigit())
            p_key = "proportion_domain_" + idx
            y_key = "loss_domain_" + idx
            p = row.get(p_key, None)
            if p is None:
                # try "proportion_domain{idx}" without underscore (legacy variant)
                p = row.get("proportion_domain" + idx, None)
            if p is None:
                p = 0.0
            try:
                p = float(p)
            except Exception:
                p = 0.0
            a = float(params[dom].get("a", 0.0))
            b = float(params[dom].get("b", 0.0))
            val = a + b * math.log(1.0 / max(p, EPS))
            out[y_key] = float(val)
        outputs.append(out)
    return outputs

#2 Run 2 R² = 0.899201

▼

Python

from typing import List, Dict
import math

# Fitted coefficients for the linear-in-log proportions model:
# loss_domain_i = intercept_i + sum_j beta_ij * log(proportion_domain_j + eps)
COEFS = {
  "160M": {
    "loss_domain_1": {
      "intercept": 2.4042025803406553,
      "betas": {
        "proportion_domain_1": -0.04878456902733,
        "proportion_domain_2": -0.001374141990715709,
        "proportion_domain_3": 0.00425143168670042,
        "proportion_domain_4": -0.004036425589256114,
        "proportion_domain_5": 0.007660516801797778
      }
    },
    "loss_domain_2": {
      "intercept": 3.329935868696654,
      "betas": {
        "proportion_domain_1": 0.002613273350161564,
        "proportion_domain_2": -0.011891996557419415,
        "proportion_domain_3": 2.5572674116154508e-05,
        "proportion_domain_4": 0.003256629390122456,
        "proportion_domain_5": -0.005952205705195364
      }
    },
    "loss_domain_3": {
      "intercept": 2.8006343825165314,
      "betas": {
        "proportion_domain_1": 0.0038463371713819104,
        "proportion_domain_2": 0.0010271048724917586,
        "proportion_domain_3": -0.03834514983761013,
        "proportion_domain_4": 0.003370871701657902,
        "proportion_domain_5": -0.004992444549614571
      }
    },
    "loss_domain_4": {
      "intercept": 1.3842254371292473,
      "betas": {
        "proportion_domain_1": 0.0001770125184695076,
        "proportion_domain_2": 0.0019008052563317707,
        "proportion_domain_3": 0.00040985897229494966,
        "proportion_domain_4": -0.04321725706040489,
        "proportion_domain_5": 0.005203775943861618
      }
    },
    "loss_domain_5": {
      "intercept": 3.22070848376419,
      "betas": {
        "proportion_domain_1": 0.005681276339156933,
        "proportion_domain_2": 0.004826630941776033,
        "proportion_domain_3": -0.00156340623248917,
        "proportion_domain_4": 0.005473022220321205,
        "proportion_domain_5": -0.030039287316337895
      }
    }
  },
  "305M": {
    "loss_domain_1": {
      "intercept": 2.254097761686427,
      "betas": {
        "proportion_domain_1": -0.047618308465165196,
        "proportion_domain_2": -0.0013550046237553216,
        "proportion_domain_3": 0.004003192579882288,
        "proportion_domain_4": -0.0042909223510698554,
        "proportion_domain_5": 0.006758159630738282
      }
    },
    "loss_domain_2": {
      "intercept": 3.166848403701224,
      "betas": {
        "proportion_domain_1": 0.0023474329227610603,
        "proportion_domain_2": -0.011422287483401284,
        "proportion_domain_3": 4.890168638858063e-06,
        "proportion_domain_4": 0.0026987505976661923,
        "proportion_domain_5": -0.006498741023938888
      }
    },
    "loss_domain_3": {
      "intercept": 2.6500359123770685,
      "betas": {
        "proportion_domain_1": 0.0035548451771600445,
        "proportion_domain_2": 0.0011888369505355075,
        "proportion_domain_3": -0.03964115398330717,
        "proportion_domain_4": 0.0028959099742507494,
        "proportion_domain_5": -0.004828358464409514
      }
    },
    "loss_domain_4": {
      "intercept": 1.2751944568968825,
      "betas": {
        "proportion_domain_1": -0.00024923215104993846,
        "proportion_domain_2": 0.0015693015233153626,
        "proportion_domain_3": 0.0004352251902864513,
        "proportion_domain_4": -0.041431801189773,
        "proportion_domain_5": 0.00487564464080058
      }
    },
    "loss_domain_5": {
      "intercept": 3.0477226649403506,
      "betas": {
        "proportion_domain_1": 0.005424541586794049,
        "proportion_domain_2": 0.004752807485933067,
        "proportion_domain_3": -0.0015473014680650466,
        "proportion_domain_4": 0.004775541162988269,
        "proportion_domain_5": -0.030879417992197157
      }
    }
  },
  "410M": {
    "loss_domain_1": {
      "intercept": 2.1969109104720936,
      "betas": {
        "proportion_domain_1": -0.04711053566759858,
        "proportion_domain_2": -0.0005929598868873091,
        "proportion_domain_3": 0.0032935545744610275,
        "proportion_domain_4": -0.0034299883497122844,
        "proportion_domain_5": 0.006245418652814477
      }
    },
    "loss_domain_2": {
      "intercept": 3.096774846113359,
      "betas": {
        "proportion_domain_1": 0.002683578481767577,
        "proportion_domain_2": -0.01079532012122322,
        "proportion_domain_3": -0.0006061709329291659,
        "proportion_domain_4": 0.003289915492216653,
        "proportion_domain_5": -0.007059325448456723
      }
    },
    "loss_domain_3": {
      "intercept": 2.5854698484676923,
      "betas": {
        "proportion_domain_1": 0.0036493802056689617,
        "proportion_domain_2": 0.0014981486567392278,
        "proportion_domain_3": -0.04057579585320029,
        "proportion_domain_4": 0.003422309711002145,
        "proportion_domain_5": -0.005304641068008599
      }
    },
    "loss_domain_4": {
      "intercept": 1.2374283294627488,
      "betas": {
        "proportion_domain_1": -0.0007019754734980677,
        "proportion_domain_2": 0.0023220834389344196,
        "proportion_domain_3": 0.00022483249867104974,
        "proportion_domain_4": -0.040255057354183485,
        "proportion_domain_5": 0.0043998574943246395
      }
    },
    "loss_domain_5": {
      "intercept": 2.9734625677861892,
      "betas": {
        "proportion_domain_1": 0.005474979456664107,
        "proportion_domain_2": 0.005292433569369652,
        "proportion_domain_3": -0.0021983339298417347,
        "proportion_domain_4": 0.005328130514437292,
        "proportion_domain_5": -0.031848404298810445
      }
    }
  },
  "70M": {
    "loss_domain_1": {
      "intercept": 2.709753097020149,
      "betas": {
        "proportion_domain_1": -0.05223448519596348,
        "proportion_domain_2": -0.002096832291136182,
        "proportion_domain_3": 0.0049309629806633245,
        "proportion_domain_4": -0.005121637241168704,
        "proportion_domain_5": 0.008582633603168494
      }
    },
    "loss_domain_2": {
      "intercept": 3.6589940901202946,
      "betas": {
        "proportion_domain_1": 0.0024594854221674217,
        "proportion_domain_2": -0.013037306540415955,
        "proportion_domain_3": 0.00018013301869309603,
        "proportion_domain_4": 0.0025694674548725075,
        "proportion_domain_5": -0.0055192991657138785
      }
    },
    "loss_domain_3": {
      "intercept": 3.0882519411499176,
      "betas": {
        "proportion_domain_1": 0.004138978755961604,
        "proportion_domain_2": 0.000999612740187218,
        "proportion_domain_3": -0.040138664176247195,
        "proportion_domain_4": 0.002610113875372775,
        "proportion_domain_5": -0.00489486237526904
      }
    },
    "loss_domain_4": {
      "intercept": 1.6123830068952152,
      "betas": {
        "proportion_domain_1": -0.0002626923872205353,
        "proportion_domain_2": 0.001840626589019361,
        "proportion_domain_3": 0.0004871656033250259,
        "proportion_domain_4": -0.048580221686945084,
        "proportion_domain_5": 0.0053741041526707105
      }
    },
    "loss_domain_5": {
      "intercept": 3.567736623439916,
      "betas": {
        "proportion_domain_1": 0.005906206376564464,
        "proportion_domain_2": 0.00463119283202685,
        "proportion_domain_3": -0.0015851069219014631,
        "proportion_domain_4": 0.004999662780701288,
        "proportion_domain_5": -0.029220774259506147
      }
    }
  }
}

PROP_COLS = ['proportion_domain_1', 'proportion_domain_2', 'proportion_domain_3', 'proportion_domain_4', 'proportion_domain_5']
LOSS_COLS = ['loss_domain_1', 'loss_domain_2', 'loss_domain_3', 'loss_domain_4', 'loss_domain_5']
EPS = 1e-6

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Functional form (same across groups):
        For each loss_domain_k:
            loss_k = intercept_k(group) + sum_j beta_kj(group) * log(proportion_domain_j + EPS)

    Args:
        input_data: List of dicts with keys including all 'proportion_domain_*' inputs.
        group: The experimental group for which to make predictions.
               Coefficients differ per group but the functional form is fixed.

    Returns:
        List of dicts with keys equal to LOSS_COLS and predicted float values.
    """
    if group not in COEFS:
        raise ValueError(f"Unknown group '{group}'. Available groups: {list(COEFS.keys())}")
    group_coefs = COEFS[group]

    outputs: List[Dict[str, float]] = []
    for row in input_data:
        # Build predictions for each loss column
        out = {}
        for loss_key in LOSS_COLS:
            params = group_coefs[loss_key]
            val = float(params["intercept"])
            for p in PROP_COLS:
                x = float(row.get(p, 0.0))
                val += float(params["betas"][p]) * math.log(max(x, 0.0) + EPS)
            out[loss_key] = val
        outputs.append(out)
    return outputs

#3 Run 3 R² = 0.873244

▼

Python

# Auto-generated scaling law implementation
from __future__ import annotations
import math
from typing import Dict, List

EPS = 1e-9
MODEL_FAMILY = "log"

# Per-group, per-domain coefficients (domain indices 1..5)
PARAMS: dict[str, dict[int, dict[str, float]]] = {
  "70M": {
    "1": {
      "a": 2.73390692,
      "b": -0.03347999
    },
    "2": {
      "a": 3.65073552,
      "b": -0.00812046
    },
    "3": {
      "a": 3.10822759,
      "b": -0.02380902
    },
    "4": {
      "a": 1.62816371,
      "b": -0.03102932
    },
    "5": {
      "a": 3.50046709,
      "b": -0.02257996
    }
  },
  "160M": {
    "1": {
      "a": 2.42222453,
      "b": -0.03132683
    },
    "2": {
      "a": 3.31767555,
      "b": -0.00747288
    },
    "3": {
      "a": 2.81638464,
      "b": -0.02268263
    },
    "4": {
      "a": 1.39405071,
      "b": -0.02766535
    },
    "5": {
      "a": 3.1508949,
      "b": -0.02322456
    }
  },
  "305M": {
    "1": {
      "a": 2.275788,
      "b": -0.03052741
    },
    "2": {
      "a": 3.15991008,
      "b": -0.00707973
    },
    "3": {
      "a": 2.66991027,
      "b": -0.02347551
    },
    "4": {
      "a": 1.28807431,
      "b": -0.02649081
    },
    "5": {
      "a": 2.98356485,
      "b": -0.02351572
    }
  },
  "410M": {
    "1": {
      "a": 2.21485658,
      "b": -0.03025062
    },
    "2": {
      "a": 3.08823094,
      "b": -0.00686847
    },
    "3": {
      "a": 2.60339221,
      "b": -0.02391696
    },
    "4": {
      "a": 1.24921386,
      "b": -0.02577835
    },
    "5": {
      "a": 2.90760213,
      "b": -0.02431623
    }
  }
}

# Fallback median parameters across groups (for unseen groups)
DEFAULT_PARAMS: dict[int, dict[str, float]] = {
  "1": {
    "a": 2.34900627,
    "b": -0.03092712
  },
  "2": {
    "a": 3.23879282,
    "b": -0.0072763
  },
  "3": {
    "a": 2.74314745,
    "b": -0.02364227
  },
  "4": {
    "a": 1.34106251,
    "b": -0.02707808
  },
  "5": {
    "a": 3.06722987,
    "b": -0.02337014
  }
}

def _normalize_coefs(coefs_any) -> dict[int, dict[str, float]]:
    try:
        return {int(k): v for k, v in coefs_any.items()}
    except Exception:
        return coefs_any

def _predict_one(proportions: dict[str, float], coefs: dict[int, dict[str, float]]) -> dict[str, float]:
    out: dict[str, float] = {}
    default_norm = _normalize_coefs(DEFAULT_PARAMS)
    for d in range(1, 6):
        p = float(proportions.get(f"proportion_domain_{d}", 0.0))
        p_eff = p if p > EPS else EPS
        dc = coefs.get(d)
        if dc is None:
            dc = default_norm.get(d, {})
        if MODEL_FAMILY == "power":
            c0 = float(dc.get("c0", 0.0)); c1 = float(dc.get("c1", 0.0)); a = float(dc.get("alpha", 0.0))
            y = c0 + c1 * (p_eff ** (-a))
        else:
            A = float(dc.get("a", 0.0)); B = float(dc.get("b", 0.0))
            y = A + B * math.log(p_eff)
        out[f"loss_domain_{d}"] = float(y)
    return out

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    raw = PARAMS.get(group, DEFAULT_PARAMS)
    coefs = _normalize_coefs(raw)
    return [_predict_one(x, coefs) for x in input_data]

#4 Run 4 R² = 0.862092

▼

Python

# Auto-generated scaling law implementation
# Functional form (shared across groups):
#   For each k in 1..5:
#     loss_domain_k = intercept_k(group) + sum_j weights_kj(group) * log(proportion_domain_j + EPS)
# Where EPS = 1e-09 for numerical stability when proportions are very small.

from math import log
from typing import List, Dict

EPS = 1e-09

COEFFS = {
  "160M": {
    "loss_domain_1": {
      "intercept": 2.4300949275826182,
      "weights": [
        -0.031068969226910582,
        -0.001251970877444727,
        0.0030660145955268667,
        -0.0031923386958230627,
        0.005666961729262434
      ]
    },
    "loss_domain_2": {
      "intercept": 3.337559043041566,
      "weights": [
        0.0016673078859499708,
        -0.0074170755968738225,
        3.951373748949835e-05,
        0.0021209041901585806,
        -0.003650890207385123
      ]
    },
    "loss_domain_3": {
      "intercept": 2.8401818458556174,
      "weights": [
        0.002460792554312405,
        0.0007625714631167231,
        -0.023536811266176717,
        0.002221882872273453,
        -0.0031186174972866886
      ]
    },
    "loss_domain_4": {
      "intercept": 1.416827910017676,
      "weights": [
        -8.308604862155428e-06,
        0.0012190503004574596,
        0.00032399676863148686,
        -0.027078844363725037,
        0.004010845050737586
      ]
    },
    "loss_domain_5": {
      "intercept": 3.224543906049842,
      "weights": [
        0.003905427350717464,
        0.0034498008479645842,
        -0.0011438176738371758,
        0.0038638961855052053,
        -0.019328119858345046
      ]
    }
  },
  "305M": {
    "loss_domain_1": {
      "intercept": 2.280243138043643,
      "weights": [
        -0.030323002210805715,
        -0.001211700929338305,
        0.0028947830670476163,
        -0.003324913376723421,
        0.005113800264772562
      ]
    },
    "loss_domain_2": {
      "intercept": 3.1750876801913237,
      "weights": [
        0.00150916925021668,
        -0.007107484565596503,
        2.2128083411678632e-05,
        0.0017728620875118904,
        -0.003972454524642374
      ]
    },
    "loss_domain_3": {
      "intercept": 2.6913398939739,
      "weights": [
        0.0022708344859972973,
        0.0008607850265603164,
        -0.024331792555795703,
        0.001910605212900081,
        -0.0029724526398083326
      ]
    },
    "loss_domain_4": {
      "intercept": 1.3069649592755754,
      "weights": [
        -0.0002767577345750335,
        0.0010150273630255805,
        0.0003398702171026176,
        -0.02596334582661601,
        0.003801676322267153
      ]
    },
    "loss_domain_5": {
      "intercept": 3.0528011051041433,
      "weights": [
        0.0037594659546520076,
        0.0034195441188783746,
        -0.0011368975113422175,
        0.0034318689601916725,
        -0.01984609557908483
      ]
    }
  },
  "410M": {
    "loss_domain_1": {
      "intercept": 2.222420776701192,
      "weights": [
        -0.0299923842986247,
        -0.0007219779189471512,
        0.002445903709102556,
        -0.0027711721407406966,
        0.004740901934744721
      ]
    },
    "loss_domain_2": {
      "intercept": 3.104708939329123,
      "weights": [
        0.0017336451412636902,
        -0.006704834426433428,
        -0.00036440290744305196,
        0.0021542694049179936,
        -0.004373253420089363
      ]
    },
    "loss_domain_3": {
      "intercept": 2.627413222019661,
      "weights": [
        0.0023403919816331487,
        0.0010679528945524257,
        -0.02491463877855555,
        0.002264951898769409,
        -0.003300544791425321
      ]
    },
    "loss_domain_4": {
      "intercept": 1.268317554857684,
      "weights": [
        -0.0005477525270926719,
        0.001495543684942191,
        0.00019725923029861728,
        -0.025223453889896486,
        0.0034567257553997934
      ]
    },
    "loss_domain_5": {
      "intercept": 2.9787362389662326,
      "weights": [
        0.0038069030391143168,
        0.003769753651834665,
        -0.0015455123055104936,
        0.003789696705093632,
        -0.020506176044717026
      ]
    }
  },
  "70M": {
    "loss_domain_1": {
      "intercept": 2.738245966324205,
      "weights": [
        -0.033247355909198334,
        -0.0017297973305077973,
        0.0035157990300376508,
        -0.003918220474936535,
        0.006317934127290131
      ]
    },
    "loss_domain_2": {
      "intercept": 3.6680087213236505,
      "weights": [
        0.0015828220455031585,
        -0.008139958599724776,
        0.00014447856565839406,
        0.0016881696288695383,
        -0.0033595783088106986
      ]
    },
    "loss_domain_3": {
      "intercept": 3.1304945178100705,
      "weights": [
        0.0026532896643926083,
        0.0007600084197338211,
        -0.024637174874146756,
        0.001757289151853728,
        -0.0030248438725573442
      ]
    },
    "loss_domain_4": {
      "intercept": 1.6498551367668615,
      "weights": [
        -0.0002889005529800089,
        0.001194603021950033,
        0.00037667659236133267,
        -0.030432861224971082,
        0.004231626123725879
      ]
    },
    "loss_domain_5": {
      "intercept": 3.5718538704762426,
      "weights": [
        0.004053652875855752,
        0.0033280287851346077,
        -0.0011542215280026637,
        0.0035637709733213644,
        -0.01878431722025504
      ]
    }
  },
  "DEFAULT": {
    "loss_domain_1": {
      "intercept": 2.417751202162915,
      "weights": [
        -0.03115792791138483,
        -0.001228861764059495,
        0.0029806251004286725,
        -0.0033016611720559285,
        0.005459899514017462
      ]
    },
    "loss_domain_2": {
      "intercept": 3.3213410959714156,
      "weights": [
        0.0016232360807333748,
        -0.007342338297157132,
        -3.957063022087023e-05,
        0.0019340513278645007,
        -0.00383904411523189
      ]
    },
    "loss_domain_3": {
      "intercept": 2.8223573699148123,
      "weights": [
        0.002431327171583865,
        0.0008628294509908216,
        -0.024355104368668678,
        0.0020386822839491676,
        -0.003104114700269422
      ]
    },
    "loss_domain_4": {
      "intercept": 1.4104913902294491,
      "weights": [
        -0.00028042985487746743,
        0.001231056092593816,
        0.0003094507020985136,
        -0.027174626326302152,
        0.003875218313032603
      ]
    },
    "loss_domain_5": {
      "intercept": 3.2069837801491152,
      "weights": [
        0.003881362305084885,
        0.0034917818509530582,
        -0.0012451122546731377,
        0.0036623082060279685,
        -0.019616177175600488
      ]
    }
  }
}

PROP_ORDER = ["proportion_domain_1", "proportion_domain_2", "proportion_domain_3", "proportion_domain_4", "proportion_domain_5"]
LOSS_ORDER = ["loss_domain_1", "loss_domain_2", "loss_domain_3", "loss_domain_4", "loss_domain_5"]

def _select_group(group: str) -> Dict[str, Dict[str, float]]:
    # Use exact match if available; otherwise fallback to DEFAULT
    if group in COEFFS:
        return COEFFS[group]
    # Try case-insensitive match
    for g in COEFFS:
        if g.lower() == (group or "").lower():
            return COEFFS[g]
    return COEFFS["DEFAULT"]

def law(input_data: List[Dict[str, float]], group: str) -> List[Dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
               The functional form of the law is the same for all groups, while
               coefficients differ by group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s) for keys in LOSS_ORDER.
    """
    coeffs = _select_group(group)
    outputs = []
    for row in input_data:
        # Build feature vector in canonical order
        logs = [log(max(float(row.get(name, 0.0)), 0.0) + EPS) for name in PROP_ORDER]
        pred = {}
        for lkey in LOSS_ORDER:
            c = coeffs[lkey]
            y = c["intercept"] + sum(w * x for w, x in zip(c["weights"], logs))
            pred[lkey] = float(y)
        outputs.append(pred)
    return outputs

#5 Run 5 R² = 0.825888

▼

Python

def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: list of dicts containing keys 'proportion_domain_1'..'proportion_domain_5'.
        group: group name string selecting the parameter set.

    Returns:
        list of dicts with keys 'loss_domain_1'..'loss_domain_5'.
    """
    # Fitted parameters per group and domain for the power-law model:
    # loss_domain_i = a_{g,i} + b_{g,i} * (proportion_domain_i + EPS)^(-alpha_{g,i})
    EPS = 1e-06
    _PARAMS = {'160M': {'domain_1': {'a': 2.447836676230049, 'b': 0.01947604243193669, 'alpha': 0.25}, 'domain_2': {'a': 3.324865267668977, 'b': 0.0046611055397499865, 'alpha': 0.25}, 'domain_3': {'a': 2.8536617392943797, 'b': 0.013668198149884528, 'alpha': 0.25}, 'domain_4': {'a': 1.4245257489216498, 'b': 0.017073641554700338, 'alpha': 0.25}, 'domain_5': {'a': 3.1503851621943806, 'b': 0.014361051003233668, 'alpha': 0.25}}, '305M': {'domain_1': {'a': 2.300638607022169, 'b': 0.01899092355571077, 'alpha': 0.25}, 'domain_2': {'a': 3.1667447973834975, 'b': 0.004413698745158715, 'alpha': 0.25}, 'domain_3': {'a': 2.7084277465750337, 'b': 0.014151032173366376, 'alpha': 0.25}, 'domain_4': {'a': 1.3172396091096554, 'b': 0.016350509608112452, 'alpha': 0.25}, 'domain_5': {'a': 2.9829538687037322, 'b': 0.014576042006844646, 'alpha': 0.25}}, '410M': {'domain_1': {'a': 2.239353613802408, 'b': 0.018832858223664272, 'alpha': 0.25}, 'domain_2': {'a': 3.0948104946214414, 'b': 0.004286808779512701, 'alpha': 0.25}, 'domain_3': {'a': 2.6426284303607965, 'b': 0.014417587188864664, 'alpha': 0.25}, 'domain_4': {'a': 1.2776219014512213, 'b': 0.015907819042873157, 'alpha': 0.25}, 'domain_5': {'a': 2.9069132394663955, 'b': 0.01509328547538018, 'alpha': 0.25}}, '70M': {'domain_1': {'a': 2.761596918863867, 'b': 0.020779709248785288, 'alpha': 0.25}, 'domain_2': {'a': 3.6586060682487, 'b': 0.005059595461040137, 'alpha': 0.25}, 'domain_3': {'a': 3.1473798666330657, 'b': 0.014345011881448757, 'alpha': 0.25}, 'domain_4': {'a': 1.6623493783999272, 'b': 0.019149169215257496, 'alpha': 0.25}, 'domain_5': {'a': 3.4999972282096126, 'b': 0.013952976677258505, 'alpha': 0.25}}}
    if group not in _PARAMS:
        raise ValueError(f"Unknown group '{group}'. Available groups: {list(_PARAMS.keys())}")
    group_params = _PARAMS[group]
    outputs: list[dict[str, float]] = []
    for row in input_data:
        out = {}
        for i in range(1, 6):
            pkey = f'proportion_domain_{i}'
            lkey = f'loss_domain_{i}'
            if pkey not in row:
                raise KeyError(f"Missing required input key: {pkey}")
            p = float(row[pkey])
            par = group_params[f'domain_{i}']
            a = float(par['a']); b = float(par['b']); alpha = float(par['alpha'])
            pred = a + b * (max(p, 0.0) + EPS) ** (-alpha)
            out[lkey] = float(pred)
        outputs.append(out)
    return outputs