# EVOLVE-BLOCK-START
import numpy as np
from scipy.optimize import minimize
_P0 = 1e8
def scaling_law_func(data_points, params):
X = np.atleast_2d(np.asarray(data_points, dtype=float))
e, p = X[:, 0], X[:, 1]
p_norm = np.clip(p / _P0, 1e-12, None)
e_eff = np.clip(np.log1p(e), 1e-12, None)
pr = np.asarray(params, dtype=float)
if pr.ndim == 1: pr = pr[None, :]
T, K = pr.shape
if K < 6: pr = np.pad(pr, ((0, 0), (0, 6 - K)), mode='constant')
elif K > 6: pr = pr[:, :6]
a, b, d, alpha, beta, m = (pr[:, i] for i in range(6))
alpha = np.maximum(alpha, 1e-12)
beta = np.maximum(beta, 1e-12)
m = np.maximum(m, 0.0)
Xp = p_norm[:, None] ** (-alpha[None, :])
G = (e_eff[:, None] + m[None, :]) ** (-beta[None, :])
pred = a[None, :] + b[None, :]*Xp + d[None, :]*Xp*G
return pred[:, 0] if pred.shape[1] == 1 else pred
def fit_scaling_law(data_points, loss_values):
X = np.atleast_2d(np.asarray(data_points, dtype=float))
y = np.asarray(loss_values, dtype=float)
if y.ndim == 1: y = y[:, None]
N, T = y.shape
p_norm = np.clip(X[:, 1] / _P0, 1e-12, None)
e_eff = np.clip(np.log1p(X[:, 0]), 1e-12, None)
def pseudo_huber(r, d): return d*d * (np.sqrt(1.0 + (r/d)**2) - 1.0)
def mad(x):
m = np.median(x)
return np.median(np.abs(x - m))
def solve_abd(alpha, beta, m, yt, lam=1e-6, delta=None, iters=2):
Xp = p_norm ** (-alpha)
G = (e_eff + m) ** (-beta)
Xpg = Xp * G
F = np.column_stack([np.ones(N), Xp, Xpg])
w = np.ones(N, dtype=float)
for _ in range(max(1, iters)):
W = np.diag(w)
C = F.T @ W @ F + lam * np.eye(3)
rhs = F.T @ W @ yt
try: a_, b_, d_ = map(float, np.linalg.solve(C, rhs))
except np.linalg.LinAlgError: a_, b_, d_ = map(float, np.linalg.lstsq(C, rhs, rcond=None)[0])
b_ = max(b_, 0.0); d_ = max(d_, 0.0)
r = yt - (a_ + b_*Xp + d_*Xpg)
di = max(0.12, 1.4826 * mad(r)) if delta is None else delta
w = 1.0 / np.maximum(1.0, np.abs(r) / di)
a_ = float(np.clip(np.average(yt - (b_*Xp + d_*Xpg), weights=w), 1.2, 5.0))
return a_, b_, d_, Xp, G, Xpg
def make_obj(yt):
base = max(0.15, 0.5 * 1.4826 * mad(yt))
def obj(x):
alpha = np.clip(x[0], 1e-6, 2.5)
beta = np.clip(x[1], 1e-6, 2.5)
m = np.clip(x[2], 0.0, 2.0)
a, b, d, Xp, G, Xpg = solve_abd(alpha, beta, m, yt, lam=1e-6, delta=base, iters=2)
r = (a + b*Xp + d*Xpg) - yt
reg = 1e-4*(alpha*alpha + beta*beta) + 1e-5*m + 1e-7*(b*b + d*d)
return float(np.mean(pseudo_huber(r, base))) + reg
return obj
grid_a = np.array([0.1, 0.2, 0.3, 0.5, 0.8, 1.2])
grid_b = np.array([0.1, 0.2, 0.3, 0.5, 0.8, 1.2])
grid_m = np.array([0.0, 0.2, 0.5, 1.0])
params_out = np.zeros((T, 6), dtype=float)
rng = np.random.default_rng(123)
for t in range(T):
yt = y[:, t]
obj = make_obj(yt)
best = np.inf; x_best = np.array([0.5, 0.5, 0.25], dtype=float)
for aa in grid_a:
for bb in grid_b:
for mm in grid_m:
x0 = np.array([aa, bb, mm], dtype=float)
v = obj(x0)
if v < best: best, x_best = v, x0
bounds = [(1e-6, 2.5), (1e-6, 2.5), (0.0, 2.0)]
starts = [x_best, [0.3,0.4,0.2], [0.8,0.6,0.5], [1.2,0.8,0.3]] + \
[rng.uniform([0.1,0.1,0.0],[1.5,1.5,1.2]) for _ in range(6)]
best = np.inf; xb = x_best.copy()
for s in starts:
x0 = np.array(s, dtype=float)
res = minimize(obj, x0, method='L-BFGS-B', bounds=bounds,
options={'maxiter': 400, 'ftol': 1e-9})
val = res.fun if res.success else obj(x0)
if val < best: best, xb = val, (res.x if res.success else x0)
alpha = float(np.clip(xb[0], 1e-6, 2.5))
beta = float(np.clip(xb[1], 1e-6, 2.5))
m = float(np.clip(xb[2], 0.0, 2.0))
a, b, d, _, _, _ = solve_abd(alpha, beta, m, yt, lam=1e-6, iters=3)
params_out[t] = np.array([a, b, d, alpha, beta, m], dtype=float)
return params_out[0] if T == 1 else params_out
# EVOLVE-BLOCK-END