import numpy as np
from scipy.optimize import least_squares
# EVOLVE-BLOCK-START
def scaling_law_func(data_points, params):
"""
Predict loss as:
loss = b + a * num_params^(−alpha) * parallel_size^(−beta)
using a stable exp(·) form.
params = [a, alpha, beta, b] (4 parameters)
"""
D = np.atleast_2d(np.asarray(data_points, dtype=float))
n, s = D[:, 0], D[:, 1]
a, alpha, beta, b = params
# add small eps inside logs for numerical stability
return b + a * np.exp(-alpha * np.log(n + 1e-12)
- beta * np.log(s + 1e-12))
def fit_scaling_law(data_points, loss_values):
"""
Fit the 4-parameter scaling law via:
1) three baseline offsets b₀ ∈ {0.0, 0.5·min(y), 0.9·min(y)}
2) log-linear least squares to initialize [loga, α, β]
3) refine all [a, α, β, b] by Levenberg-Marquardt (least_squares)
with analytic Jacobian on residuals f(data; p) − y
Returns optimized params [a, alpha, beta, b].
"""
D = np.atleast_2d(np.asarray(data_points, dtype=float))
y = np.ravel(np.asarray(loss_values, dtype=float))
n, s = D[:, 0], D[:, 1]
# precompute logs
ln_n = np.log(n + 1e-12)
ln_s = np.log(s + 1e-12)
y_min = y.min()
eps = 1e-12
best_cost = np.inf
best_params = None
# Multi-start on baseline guesses for b
for frac in (0.0, 0.5, 0.9):
b0 = max(0.0, frac * y_min)
# prepare for log-linear init
y_adj = np.clip(y - b0, eps, None)
L = np.log(y_adj)
A = np.vstack([np.ones_like(L), -ln_n, -ln_s]).T
sol, *_ = np.linalg.lstsq(A, L, rcond=None)
loga0, alpha0, beta0 = sol
a0 = np.exp(loga0)
p0 = np.array([max(a0, eps),
max(alpha0, 0.0),
max(beta0, 0.0),
b0], dtype=float)
# bounds: a>0, α≥0, β≥0, b≥0
lb = [eps, 0.0, 0.0, 0.0]
ub = [np.inf, 10.0, 10.0, y_min]
# residuals and Jacobian for least_squares
def residuals(p):
a, alpha, beta, b = p
# f = b + a * exp(−α ln n − β ln s)
exp_term = np.exp(-alpha * ln_n - beta * ln_s)
return b + a * exp_term - y
def jac(p):
a, alpha, beta, b = p
exp_term = np.exp(-alpha * ln_n - beta * ln_s)
# df/da, df/dα, df/dβ, df/db
df_da = exp_term
df_dalpha = -a * exp_term * ln_n
df_dbeta = -a * exp_term * ln_s
df_db = np.ones_like(y)
# stack columns
return np.vstack([df_da, df_dalpha, df_dbeta, df_db]).T
try:
res = least_squares(residuals,
p0,
jac=jac,
bounds=(lb, ub),
xtol=1e-12,
ftol=1e-12)
if res.success and res.cost < best_cost:
best_cost = res.cost
best_params = res.x
except Exception:
continue
# fallback if all starts fail
if best_params is None:
return np.array([1e-3, 0.5, 0.1, eps], dtype=float)
return best_params
# EVOLVE-BLOCK-END