# EVOLVE-BLOCK-START
import numpy as np
_P0 = 1e9 # parameter scale for conditioning
def scaling_law_func(data_points, params):
X = np.atleast_2d(np.asarray(data_points, dtype=float))
P = np.clip(X[:, 0] / _P0, 1e-12, None)[:, None]
S = np.clip(X[:, 1], 1.0, None)[:, None]
p = np.asarray(params, dtype=float)
if p.ndim == 1: p = p[None, :]
T = p.shape[0]
tmp = np.zeros((T, 4), dtype=float); tmp[:, :p.shape[1]] = p
L0 = tmp[:, 0][None, :]
A = np.clip(tmp[:, 1][None, :], 0.0, None)
beta = np.clip(tmp[:, 2][None, :], 0.0, None)
C = np.clip(tmp[:, 3][None, :], 0.0, None)
pred = L0 + A * (P ** (-beta)) / (1.0 + C * np.log(S))
return pred[:, 0] if pred.shape[1] == 1 else pred
def fit_scaling_law(data_points, loss_values):
X = np.atleast_2d(np.asarray(data_points, dtype=float))
y = np.asarray(loss_values, dtype=float)
y2d = y[:, None] if y.ndim == 1 else y
P = np.clip(X[:, 0] / _P0, 1e-12, None)
S = np.clip(X[:, 1], 1.0, None)
ls = np.log(S)
N, T = y2d.shape
out = np.zeros((T, 4), dtype=float)
betas = np.array([0.05, 0.08, 0.1, 0.15, 0.2, 0.3, 0.5], dtype=float)
Cs = np.array([0.0, 0.03, 0.05, 0.1, 0.15, 0.2, 0.3], dtype=float)
Pw_all = P[:, None] ** (-betas[None, :]) # (N, B)
Den_all = 1.0 + ls[:, None] * Cs[None, :] # (N, C)
for t in range(T):
yt = y2d[:, t]
ymin = float(np.min(yt)); yrng = float(np.ptp(yt)) or 0.1
L0s = np.linspace(ymin - max(0.5, 0.8 * yrng), ymin - 1e-4, 60)
best_mse = np.inf; best = None
for L0 in L0s:
z = yt - L0
if np.any(z <= 0): continue
# w shape: (N, B, C)
w = Pw_all[:, :, None] / Den_all[:, None, :]
num = (z[:, None, None] * w).sum(axis=0) # (B, C)
den = (w * w).sum(axis=0) + 1e-18 # (B, C)
A = np.clip(num / den, 1e-12, None) # (B, C)
resid = (A[None, :, :] * w - z[:, None, None]) # (N, B, C)
mse = np.mean(resid * resid, axis=0) # (B, C)
i, j = np.unravel_index(np.argmin(mse), mse.shape)
cur_mse = mse[i, j]
if cur_mse < best_mse:
best_mse = cur_mse
best = (L0, float(A[i, j]), float(betas[i]), float(Cs[j]))
if best is None:
L0 = ymin - 0.1
z = np.clip(yt - L0, 1e-12, None)
w_b = Pw_all
A_b = (z[:, None] * w_b).sum(axis=0) / (w_b * w_b).sum(axis=0).clip(min=1e-18)
A_b = np.clip(A_b, 1e-12, None)
mse_b = np.mean((A_b[None, :] * w_b - z[:, None]) ** 2, axis=0)
i = int(np.argmin(mse_b))
best = (L0, float(A_b[i]), float(betas[i]), 0.0)
out[t] = best
return out[0] if T == 1 else out
# EVOLVE-BLOCK-END