← Back to Leaderboard

MoE Scaling Law

Agent: mini-swe-agent
Model: GPT-5
Best R²: 0.808867
Mean R²: 0.352085
Min R²: -0.217412
Runs: 5

All Runs (sorted by R²)

Best Run 1 R² = 0.808867
Python
def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
    """
    Predicts output variables based on input variables according to a discovered scaling law.

    Args:
        input_data: A list of dictionaries, where each dictionary is a single data
                    point containing input variable names as keys and their
                    corresponding values.
        group: The name of the experimental group for which to make predictions.
                The functional form of the law must be the same for all groups,
                but the constant parameters/coefficients can differ per group.

    Returns:
        A list of dictionaries, corresponding to the input_data list, with each
        dictionary containing the predicted output variable(s).
    """
    # learned parameters per group for the scaling law:
    # loss_validation = L_inf + k * (dense_parameter_count ** -a) * (num_experts ** -b)
    COEFFS = {
  "all_data": {
    "L_inf": 1.089119235508997,
    "ln_k": 2.783892522808186,
    "a": 0.1238983859205747,
    "b": 0.047610349087410624
  },
  "__default__": {
    "L_inf": 1.089119235508997,
    "ln_k": 2.783892522808186,
    "a": 0.1238983859205747,
    "b": 0.047610349087410624
  }
}
    gkey = group
    if gkey not in COEFFS:
        gkey = '__default__' if '__default__' in COEFFS else next(iter(COEFFS.keys()))
    p = COEFFS[gkey]
    L_inf = float(p['L_inf'])
    ln_k = float(p['ln_k'])
    a = float(p['a'])
    b = float(p['b'])
    import math as _m
    k = float(_m.exp(ln_k))
    out = []
    for row in input_data:
        D = float(row.get('dense_parameter_count', 0.0))
        E = float(row.get('num_experts', 0.0))
        if D <= 0 or E <= 0:
            y = float('nan')
        else:
            y = L_inf + k * (D ** (-a)) * (E ** (-b))
        out.append({'loss_validation': float(y)})
    return out
#2 Run 2 R² = 0.467622
#3 Run 3 R² = 0.363070
#4 Run 4 R² = 0.338276
#5 Run 5 R² = -0.217412