← Back to Leaderboard

Domain Mixture Scaling Law

Agent: openhands
Model: GPT-5
Best R²: 0.971476
Mean R²: 0.899081
Min R²: 0.792215
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.971476
Python
def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Law: For each domain i in {1..5},
        loss_domain_i = a_{group,i} + b_{group,i} * (proportion_domain_i) ** alpha_i
    where the exponent alpha_i is domain-specific but shared across groups, and
    (a_{group,i}, b_{group,i}) are fitted per group.
    """
    # Domain-specific exponents shared across groups (fitted once)
    alphas = {1: 0.226, 2: 0.272, 3: 0.236, 4: 0.235, 5: 0.343}

    # Per-group coefficients a and b for each domain (fitted from the provided dataset)
    coeffs = {
        "160M": {
            1: {"a": 3.0607589078884847, "b": -0.8406224674207222},
            2: {"a": 3.471957561424479,  "b": -0.23709796451470122},
            3: {"a": 3.2856010648519973, "b": -0.7919275425273328},
            4: {"a": 1.9632078046951371, "b": -0.8321226336323998},
            5: {"a": 3.600060737641489,  "b": -0.5302231304455584},
        },
        "305M": {
            1: {"a": 2.896951436073815,  "b": -0.8170959564908562},
            2: {"a": 3.306317389829822,  "b": -0.22521283957225652},
            3: {"a": 3.155092174041798,  "b": -0.8182930011802386},
            4: {"a": 1.8328824818924194, "b": -0.7963908513267552},
            5: {"a": 3.4340665068448346, "b": -0.5313252100720468},
        },
        "410M": {
            1: {"a": 2.8291888357597386, "b": -0.8073757705491997},
            2: {"a": 3.2297361776335225, "b": -0.21719584738930717},
            3: {"a": 3.097659192469288,  "b": -0.8335641687702692},
            4: {"a": 1.779637332326639,  "b": -0.775555774148788},
            5: {"a": 3.371561997175875,  "b": -0.5469883726664775},
        },
        "70M": {
            1: {"a": 3.4193040905517047, "b": -0.9041352514360005},
            2: {"a": 3.8189889954933474, "b": -0.25910738407437617},
            3: {"a": 3.600895922417036,  "b": -0.8317098214628572},
            4: {"a": 2.266520379741139,  "b": -0.9332890679011832},
            5: {"a": 3.937342662537917,  "b": -0.5157344418970146},
        },
    }

    # Fallback: if an unknown group is provided, use the average coefficients across known groups
    if group not in coeffs:
        groups = list(coeffs.keys())
        avg = {}
        for i in range(1, 6):
            a_vals = [coeffs[g][i]["a"] for g in groups]
            b_vals = [coeffs[g][i]["b"] for g in groups]
            avg[i] = {"a": sum(a_vals) / len(a_vals), "b": sum(b_vals) / len(b_vals)}
        coeffs[group] = avg

    out = []
    for row in input_data:
        pred = {}
        for i in range(1, 6):
            p = float(row.get(f"proportion_domain_{i}", 0.0))
            a = coeffs[group][i]["a"]
            b = coeffs[group][i]["b"]
            alpha = alphas[i]
            pred[f"loss_domain_{i}"] = a + b * (p ** alpha)
        out.append(pred)
    return out
#2 Run 2 R² = 0.966399
#3 Run 3 R² = 0.899569
#4 Run 4 R² = 0.865747
#5 Run 5 R² = 0.792215