# EVOLVE-BLOCK-START
"""
Refined scaling law with SVD initialization and focused optimization
Key improvements over current:
- SVD-based coefficient initialization for numerical robustness
- Log-space exponent estimation with physical constraints
- Focused 5-trial multi-start with strategic perturbations
- Balanced regularization tuned for generalization
- Enhanced numerical stability
Total: 35 parameters (25 coeffs + 5 exponents + 5 biases)
"""
import numpy as np
from scipy.optimize import minimize
def scaling_law_func(data_points, params):
"""
Structured multi-output scaling law: y_i = sum_j(c_ij * x_j^e_j) + b_i
Parameters (35 total):
- coeffs: 25 params (5 outputs × 5 domains)
- exponents: 5 params (shared domain transformations)
- biases: 5 params (output-specific offsets)
"""
X = np.atleast_2d(np.asarray(data_points))
N, F = X.shape
params = np.asarray(params)
if params.ndim == 1:
params = params[None, :]
# Multi-output case with 35 parameters
if params.shape[0] == 1 and F == 5 and params.shape[1] == 35:
coeffs = params[0, :25].reshape(5, 5)
exponents = params[0, 25:30]
biases = params[0, 30:35]
# Enhanced numerical stability with tighter clipping
X_safe = np.clip(X, 1e-10, 1.0)
X_transformed = X_safe ** exponents[None, :]
pred = X_transformed @ coeffs.T + biases[None, :]
else:
# Fallback for non-standard cases
T = params.shape[0]
P = params.shape[1]
if P >= 2 * F + 1:
coeffs = params[:, :F]
exponents = params[:, F:2*F]
bias = params[:, -1]
X_safe = np.clip(X, 1e-10, 1.0)
pred = (coeffs[None, :, :] * (X_safe[:, None, :] ** exponents[None, :, :])).sum(axis=2) + bias[None, :]
pred = pred[:, 0] if pred.shape[1] == 1 else pred
else:
pred = np.zeros((N, T))
return pred
def fit_scaling_law(data_points, loss_values):
"""
Enhanced optimization with SVD initialization and focused multi-start
"""
X = np.atleast_2d(np.asarray(data_points))
y = np.asarray(loss_values)
N, F = X.shape
if y.ndim == 1:
y = y[:, None]
T = y.shape[1]
if F == 5 and T == 5:
X_safe = np.clip(X, 1e-10, 1.0)
# SVD-based coefficient initialization for numerical stability
init_coeffs = np.zeros((5, 5))
for t in range(T):
try:
# SVD provides better conditioning than direct least squares
U, s, Vt = np.linalg.svd(X, full_matrices=False)
# Regularize small singular values to prevent instability
s_reg = np.where(s > 0.01 * s[0], 1.0 / s, 0.0)
X_pinv = Vt.T @ np.diag(s_reg) @ U.T
init_coeffs[t] = X_pinv @ y[:, t]
except:
# Fallback to ridge regression
try:
XtX = X.T @ X + 0.008 * np.eye(F)
Xty = X.T @ y[:, t]
init_coeffs[t] = np.linalg.solve(XtX, Xty)
except:
init_coeffs[t] = np.ones(F) * (np.mean(y[:, t]) / F)
# Log-space exponent estimation for physically grounded initialization
init_exponents = np.ones(5) * 0.72
for f in range(F):
if np.std(X[:, f]) > 1e-6:
sorted_idx = np.argsort(X[:, f])
x_sorted = X[sorted_idx, f]
y_mean_sorted = np.mean(y[sorted_idx], axis=1)
# Log-space regression for power law estimation
valid_mask = x_sorted > 0.05
if np.sum(valid_mask) >= 5:
x_log = np.log(x_sorted[valid_mask] + 1e-10)
y_log = np.log(y_mean_sorted[valid_mask] + 1e-10)
try:
# Fit log(y) ~ exp * log(x) + const
x_mean = np.mean(x_log)
y_mean = np.mean(y_log)
cov = np.sum((x_log - x_mean) * (y_log - y_mean))
var = np.sum((x_log - x_mean) ** 2)
if abs(var) > 1e-8:
exp_est = cov / var
init_exponents[f] = np.clip(exp_est, 0.3, 1.4)
else:
init_exponents[f] = 0.72
except:
init_exponents[f] = 0.72
# Additional curvature refinement
if len(x_sorted) >= 6:
n = len(x_sorted)
q1, q2, q3 = n // 4, n // 2, 3 * n // 4
s1 = (y_mean_sorted[q2] - y_mean_sorted[q1]) / max(x_sorted[q2] - x_sorted[q1], 1e-6)
s2 = (y_mean_sorted[q3] - y_mean_sorted[q2]) / max(x_sorted[q3] - x_sorted[q2], 1e-6)
# Adjust based on curvature pattern
if s1 > s2 * 1.25:
init_exponents[f] = min(init_exponents[f], 0.65)
elif s2 > s1 * 1.25:
init_exponents[f] = max(init_exponents[f], 1.0)
# Robust bias initialization using trimmed mean
init_biases = np.zeros(5)
for t in range(T):
pred_linear = X @ init_coeffs[t]
residuals = y[:, t] - pred_linear
# Trimmed mean for outlier resistance
sorted_res = np.sort(residuals)
trim = max(1, N // 10)
init_biases[t] = np.mean(sorted_res[trim:-trim]) * 0.45
init = np.concatenate([init_coeffs.ravel(), init_exponents, init_biases])
def objective(params_flat):
pred = scaling_law_func(X, params_flat)
mse = np.mean((pred - y) ** 2)
coeffs_part = params_flat[:25]
exponents_part = params_flat[25:30]
# Balanced regularization tuned for generalization
reg_coeffs = 0.00045 * np.sum(coeffs_part ** 2)
# Domain-specific target exponents
target_exp = np.array([0.68, 0.72, 0.75, 0.70, 0.73])
reg_exponents = 0.0020 * np.sum((exponents_part - target_exp) ** 2)
return mse + reg_coeffs + reg_exponents
# Tighter bounds for better stability
bounds = [(None, None)] * 25 + [(0.25, 1.8)] * 5 + [(None, None)] * 5
# Focused 5-trial multi-start with strategic exploration
best_result = None
best_loss = float('inf')
for trial in range(5):
if trial == 0:
# Use smart initialization
init_trial = init.copy()
elif trial == 1:
# Perturb coefficients with moderate noise
init_trial = init.copy()
init_trial[:25] += np.random.randn(25) * 0.075
elif trial == 2:
# Explore different exponent values
init_trial = init.copy()
init_trial[25:30] += np.random.randn(5) * 0.10
init_trial[25:30] = np.clip(init_trial[25:30], 0.25, 1.8)
elif trial == 3:
# Balanced global perturbation
init_trial = init + np.random.randn(35) * 0.09
init_trial[25:30] = np.clip(init_trial[25:30], 0.25, 1.8)
else:
# Conservative exploration with bias focus
init_trial = init.copy()
init_trial[:25] += np.random.randn(25) * 0.05
init_trial[30:35] += np.random.randn(5) * 0.15
try:
result = minimize(
objective,
init_trial,
method='L-BFGS-B',
bounds=bounds,
options={'maxiter': 1700, 'ftol': 1e-11, 'gtol': 1e-9}
)
if result.fun < best_loss:
best_loss = result.fun
best_result = result
except:
continue
params_opt = best_result.x if (best_result is not None and best_result.success) else init
else:
# Fallback for non-standard dimensions
P = 2 * F + 1
init = np.ones((T, P))
init[:, :F] = np.mean(y) / F
init[:, F:2*F] = 0.72
def objective(flat_params):
params = flat_params.reshape(T, P)
pred = scaling_law_func(X, params)
mse = np.mean((pred - y) ** 2)
reg = 0.0007 * np.sum(params ** 2)
return mse + reg
result = minimize(objective, init.ravel(), method='L-BFGS-B')
params_opt = result.x.reshape(T, P) if result.success else init
params_opt = params_opt[0] if T == 1 else params_opt
return params_opt
# EVOLVE-BLOCK-END