# EVOLVE-BLOCK-START
import numpy as np
_A = np.array([1e-3, 256.0, 1e10, 1e8], float)
_UMIN, _UMAX = -9.0, 7.0
def _phi(X):
X = np.atleast_2d(np.asarray(X, float))
z = np.log(X / _A)
z1, z2, z3, z4 = z.T
return np.column_stack([
z1, z2, z3, z4,
0.5*z1**2, 0.5*z2**2, 0.5*z3**2, 0.5*z4**2,
z1*z2, z1*z3, z1*z4, z2*z3, z2*z4, z3*z4
])
def scaling_law_func(data_points, params):
Phi = _phi(data_points)
P = Phi.shape[1]
W = np.asarray(params, float)
if W.ndim == 1:
L0, A = W[0], W[1]
w = W[2:2+P]
u = Phi @ w
e = np.exp(np.clip(u, _UMIN, _UMAX))
return L0 + A * e
else:
L0, A = W[:, 0], W[:, 1]
w = W[:, 2:2+P]
u = Phi @ w.T
e = np.exp(np.clip(u, _UMIN, _UMAX))
return L0[None, :] + A[None, :] * e
def fit_scaling_law(data_points, loss_values):
Phi = _phi(data_points)
y = np.asarray(loss_values, float).ravel()
N, P = Phi.shape
A0 = Phi.T @ Phi
lam_grid = np.array([1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1])
ymin = float(np.min(y))
lo = ymin - 0.9
hi = ymin - 0.03
if not (lo < hi):
lo = ymin - 0.2
hi = ymin - 1e-3
L0_grid = np.linspace(lo, hi, 25)
best = (np.inf, hi, 1.0, np.zeros(P), lam_grid[0])
def solve_w(L0, lam):
r = y - L0
r = np.where(r > 1e-8, r, 1e-8)
t = np.log(r)
M = A0.copy()
M.flat[::P+1] += lam
bt = Phi.T @ t
try:
w = np.linalg.solve(M, bt)
except np.linalg.LinAlgError:
w, *_ = np.linalg.lstsq(np.vstack([Phi, np.sqrt(lam)*np.eye(P)]),
np.concatenate([t, np.zeros(P)]), rcond=None)
u = Phi @ w
e = np.exp(np.clip(u, _UMIN, _UMAX))
A = (e @ r) / max(e @ e, 1e-12)
A = float(max(A, 1e-8))
yhat = L0 + A * e
mse = ((yhat - y)**2).mean()
return mse, w, A
# coarse search over L0 and ridge strength
for L0 in L0_grid:
for lam in lam_grid:
mse, w, A = solve_w(L0, lam)
if mse < best[0]:
best = (mse, L0, A, w, lam)
mse_b, L0_b, A_b, w_b, lam_b = best
# refine L0 with golden-section search
gl, gh = max(lo, L0_b - 0.3), min(hi, L0_b + 0.15)
if gl >= gh:
gl, gh = lo, hi
phi = (np.sqrt(5) - 1) / 2
c = gh - phi * (gh - gl)
d = gl + phi * (gh - gl)
mc, wc, Ac = solve_w(c, lam_b)
md, wd, Ad = solve_w(d, lam_b)
for _ in range(14):
if mc < md:
gh, md, wd, Ad = d, mc, wc, Ac
d = c
c = gh - phi * (gh - gl)
mc, wc, Ac = solve_w(c, lam_b)
else:
gl, mc, wc, Ac = c, md, wd, Ad
c = d
d = gl + phi * (gh - gl)
md, wd, Ad = solve_w(d, lam_b)
if mc < md:
L0_b, w_b, A_b = c, wc, Ac
else:
L0_b, w_b, A_b = d, wd, Ad
return np.concatenate([[L0_b, A_b], w_b])
# EVOLVE-BLOCK-END