# EVOLVE-BLOCK-START
"""
Enhanced scaling law: L(N) = L_inf + A / (N^alpha + B)
Key improvements:
- Multi-scale alpha estimation using quantile-based regression
- Adaptive parameter bounds based on data statistics
- Three-stage optimization with improved convergence
- Robust fallback with strategic perturbations
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution
def scaling_law_func(data_points, params):
"""
Scaling law: L(N) = L_inf + A / (N^alpha + B)
params: [L_inf, A, alpha, B]
- L_inf: asymptotic minimum loss
- A: scale coefficient (positive)
- alpha: power law exponent (positive)
- B: stabilization offset (positive)
"""
data_points = np.atleast_2d(np.asarray(data_points))
N = data_points[:, 0]
params = np.asarray(params)
if params.ndim == 1:
L_inf, A, alpha, B = params
A, alpha, B = np.abs(A), np.abs(alpha), np.abs(B)
return L_inf + A / (np.power(N, alpha) + B + 1e-12)
else:
L_inf = params[:, 0]
A = np.abs(params[:, 1])
alpha = np.abs(params[:, 2])
B = np.abs(params[:, 3])
denom = np.power(N[None, :], alpha[:, None]) + B[:, None] + 1e-12
return (L_inf[:, None] + A[:, None] / denom).T
def fit_scaling_law(data_points, loss_values):
"""
Optimized fitting with multi-scale initialization and hybrid optimization
"""
data_points = np.atleast_2d(np.asarray(data_points))
loss_values = np.asarray(loss_values)
N, y = data_points[:, 0], loss_values
# Robust statistics
y_min, y_max = np.min(y), np.max(y)
y_range = y_max - y_min
y_q25, y_q50, y_q75 = np.percentile(y, [25, 50, 75])
N_min, N_max = np.min(N), np.max(N)
N_med = np.median(N)
# Multi-scale alpha estimation
if len(N) > 5:
log_N = np.log(N)
# Estimate on upper portion (more stable region)
upper_mask = N >= N_med
if np.sum(upper_mask) >= 4:
log_N_upper = log_N[upper_mask]
y_upper = y[upper_mask]
y_shift_upper = np.maximum(y_upper - y_min + 0.005 * y_range, 0.005)
log_y_upper = np.log(y_shift_upper)
alpha_upper = -np.polyfit(log_N_upper, log_y_upper, 1)[0]
else:
alpha_upper = 0.35
# Estimate on full data
y_shift_full = np.maximum(y - y_min + 0.01 * y_range, 0.01)
log_y_full = np.log(y_shift_full)
alpha_full = -np.polyfit(log_N, log_y_full, 1)[0]
# Weighted combination
alpha_est = 0.65 * alpha_upper + 0.35 * alpha_full
alpha_est = np.clip(alpha_est, 0.12, 0.75)
else:
alpha_est = 0.35
# Enhanced initialization using quantiles
init_L_inf = y_q25 - 0.35 * (y_q75 - y_q25)
init_A = (y_q75 - y_q25) * np.power(N_med, alpha_est) * 1.3
init_alpha = alpha_est
init_B = N_max * 0.007
init_params = np.array([init_L_inf, init_A, init_alpha, init_B])
def objective(params):
try:
pred = scaling_law_func(data_points, params)
mse = np.mean((pred - y) ** 2)
# Light adaptive regularization
reg = 1e-9 * (params[1]**2 + params[3]**2)
return mse + reg
except:
return 1e10
# Adaptive bounds
bounds = [
(y_min - 1.6 * y_range, y_q25 + 0.1 * y_range), # L_inf
(1e-6 * y_range, 550 * y_range), # A
(0.07, 1.05), # alpha
(0.15, 0.18 * N_max) # B
]
try:
# Stage 1: Global search with enhanced settings
result_de = differential_evolution(
objective,
bounds=bounds,
maxiter=210,
popsize=15,
seed=42,
atol=1e-9,
tol=1e-9,
mutation=(0.55, 1.15),
recombination=0.8,
strategy='best1bin',
polish=True,
workers=1,
updating='deferred'
)
# Stage 2: Local refinement with L-BFGS-B
result_local1 = minimize(
objective,
result_de.x,
method='L-BFGS-B',
bounds=bounds,
options={'maxiter': 1000, 'ftol': 1e-11, 'gtol': 1e-9}
)
# Stage 3: Fine-tune with TNC if beneficial
best_result = result_local1 if result_local1.success else result_de
try:
result_local2 = minimize(
objective,
best_result.x,
method='TNC',
bounds=bounds,
options={'maxiter': 500, 'ftol': 1e-12}
)
if result_local2.success and result_local2.fun < best_result.fun:
best_result = result_local2
except:
pass
params_opt = best_result.x
except Exception:
# Enhanced fallback with strategic perturbations
best_params = init_params
best_loss = objective(init_params)
# Try multiple initialization strategies
for seed_val, scale in [(0, 0.10), (17, 0.15), (42, 0.22), (99, 0.08), (123, 0.18)]:
try:
rng = np.random.RandomState(seed_val)
perturb = init_params * (1 + scale * rng.randn(4))
# Clip to bounds
for i, (lb, ub) in enumerate(bounds):
perturb[i] = np.clip(perturb[i], lb, ub)
result = minimize(
objective,
perturb,
method='L-BFGS-B',
bounds=bounds,
options={'maxiter': 1500, 'ftol': 1e-11}
)
if result.success and result.fun < best_loss:
best_params = result.x
best_loss = result.fun
except:
continue
params_opt = best_params
return params_opt
# EVOLVE-BLOCK-END