# EVOLVE-BLOCK-START
import numpy as np
def scaling_law_func(data_points, params):
X = np.atleast_2d(np.asarray(data_points, float))
N, F = X.shape
p = np.asarray(params, float).ravel()
L = p.size
Xc = np.clip(X, 1e-12, 1.0)
# Preferred: per-output alpha, layout L = T*(F+2) => [b(T), W(T*F), a(T)]
if L > 0 and L % (F + 2) == 0:
T = L // (F + 2)
b = p[:T]
W = p[T:T + T * F].reshape(T, F)
a = p[-T:]
Z = Xc[:, None, :] ** a[None, :, None]
return (Z * W[None]).sum(2) + b
# Shared alpha: L = (F+1)*T + 1
if L > 1 and (L - 1) % (F + 1) == 0:
T = (L - 1) // (F + 1)
b = p[:T]; W = p[T:T + T * F].reshape(T, F); a = float(p[-1])
return (Xc ** a) @ W.T + b
# Linear fallback: L = (F+1)*T
if L > 0 and L % (F + 1) == 0:
T = L // (F + 1)
B = p.reshape(T, F + 1)
ZA = np.concatenate([np.ones((N, 1)), X], 1)
return ZA @ B.T
T = min(5, L if L > 0 else 1)
return np.tile(p[:T][None, :], (N, 1))
def fit_scaling_law(data_points, loss_values):
X = np.atleast_2d(np.asarray(data_points, float))
Y = np.asarray(loss_values, float)
if Y.ndim == 1: Y = Y[:, None]
N, F = X.shape; T = Y.shape[1]
Xc = np.clip(X, 1e-12, 1.0)
lam_b = 1e-8
lam_grid = np.array([1e-6, 3e-6, 1e-5, 3e-5, 1e-4, 3e-4], float)
a_min, a_max = 0.2, 3.0
phi = (1 + 5 ** 0.5) / 2
def nmse_cols(y, yhat):
v = np.var(y, axis=0, ddof=0) + 1e-12
return np.mean(((yhat - y) ** 2) / v)
def solve_scaled(Z, Ymat, lam):
s = np.sqrt((Z ** 2).mean(0)) + 1e-12
Zs = Z / s
ZA = np.concatenate([np.ones((N, 1)), Zs], 1)
A = ZA.T @ ZA
A[0, 0] += lam_b
A[1:, 1:] += lam * np.eye(F)
try:
B = np.linalg.solve(A, ZA.T @ Ymat)
except np.linalg.LinAlgError:
B = np.linalg.lstsq(A + 1e-12 * np.eye(F + 1), ZA.T @ Ymat, rcond=None)[0]
b = B[0, :]
W = (B[1:, :].T) / s
return W, b
def best_ridge(Z, Ymat):
best, Wb = np.inf, None
for lam in lam_grid:
W, b = solve_scaled(Z, Ymat, lam)
val = nmse_cols(Ymat, Z @ W.T + b)
if val < best:
best, Wb = val, (W, b)
return best, Wb
def shared_obj(a):
Z = Xc ** float(a)
v, _ = best_ridge(Z, Y)
return v
# Shared alpha (multi-output) via coarse grid + golden section
grid = np.linspace(0.3, 2.5, 9)
a_shared = grid[np.argmin([shared_obj(a) for a in grid])]
Lb, Ub = max(a_min, a_shared - 0.7), min(a_max, a_shared + 0.7)
c = Ub - (Ub - Lb) / phi
d = Lb + (Ub - Lb) / phi
vc, vd = shared_obj(c), shared_obj(d)
for _ in range(22):
if vc < vd:
Ub, d, vd = d, c, vc
c = Ub - (Ub - Lb) / phi
vc = shared_obj(c)
else:
Lb, c, vc = c, d, vd
d = Lb + (Ub - Lb) / phi
vd = shared_obj(d)
a_shared = c if vc < vd else d
# Per-output refinement with coupling penalty to a_shared and lam selection
rho = 1e-3
bs, Ws, alphas = [], [], []
def fit_for_alpha(y, a):
Z = Xc ** float(a)
best, Wb = np.inf, None
for lam in lam_grid:
W, b = solve_scaled(Z, y, lam)
pred = Z @ W.T + b
v = float(np.var(y, ddof=0) + 1e-12)
nm = float(np.mean((pred - y) ** 2) / v)
if nm < best:
best, Wb = nm, (b[0], W[0])
return best, Wb
for t in range(T):
y = Y[:, t:t+1]
# coarse search
vals = [fit_for_alpha(y, a) for a in grid]
idx = np.argmin([nm + rho * (a - a_shared) ** 2 for (nm, _), a in zip(vals, grid)])
a0 = grid[idx]
# refine by golden section
Lb, Ub = max(a_min, a0 - 0.7), min(a_max, a0 + 0.7)
c = Ub - (Ub - Lb) / phi; d = Lb + (Ub - Lb) / phi
nc, Bc = fit_for_alpha(y, c); vc = nc + rho * (c - a_shared) ** 2
nd, Bd = fit_for_alpha(y, d); vd = nd + rho * (d - a_shared) ** 2
for _ in range(22):
if vc < vd:
Ub, d, vd, Bd = d, c, vc, Bc
c = Ub - (Ub - Lb) / phi
nc, Bc = fit_for_alpha(y, c); vc = nc + rho * (c - a_shared) ** 2
else:
Lb, c, vc, Bc = c, d, vd, Bd
d = Lb + (Ub - Lb) / phi
nd, Bd = fit_for_alpha(y, d); vd = nd + rho * (d - a_shared) ** 2
a_opt, B_opt = (c, Bc) if vc < vd else (d, Bd)
alphas.append(a_opt); bs.append(B_opt[0]); Ws.append(B_opt[1])
b = np.asarray(bs)
W = np.vstack(Ws)
a = np.asarray(alphas)
return np.concatenate([b, W.ravel(), a])
# EVOLVE-BLOCK-END