# EVOLVE-BLOCK-START
"""
Simplified U-shaped scaling law for double descent pattern.
Uses shifted quadratic with exponential modulation - optimized for stability and fitting quality.
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution
def scaling_law_func(data_points, params):
"""
U-shaped form: y = a*(x-c)^2 + b*(x-c) + d*exp(-e*|x-c|) + f
Parameters (6):
- a: quadratic strength (U-shape curvature)
- b: linear term (asymmetry)
- c: horizontal shift (minimum location)
- d: exponential amplitude (initial descent)
- e: exponential decay rate
- f: vertical offset (baseline)
"""
X = np.atleast_2d(np.asarray(data_points))
log_flops = X[:, 0]
params = np.asarray(params)
if params.ndim == 1:
params = params[None, :]
if params.shape[1] < 6:
params = np.pad(params, ((0, 0), (0, 6 - params.shape[1])), constant_values=0)
a, b, c, d, e, f = params[0, :6]
# Shifted coordinate for centering
x_shift = log_flops - c
# Quadratic base for U-shape
quadratic = a * x_shift**2 + b * x_shift
# Exponential modulation with numerical stability
exp_arg = np.clip(-np.abs(e) * np.abs(x_shift), -50, 50)
exponential = d * np.exp(exp_arg)
return quadratic + exponential + f
def fit_scaling_law(data_points, loss_values):
"""
Fit using intelligent multi-start local optimization with adaptive fallback
"""
X = np.atleast_2d(np.asarray(data_points))
y = np.asarray(loss_values)
log_flops = X[:, 0]
# Data statistics
y_mean = np.mean(y)
y_std = np.std(y)
y_min = np.min(y)
y_max = np.max(y)
y_range = y_max - y_min
x_min = np.min(log_flops)
x_max = np.max(log_flops)
x_range = x_max - x_min
x_mean = np.mean(log_flops)
# Find empirical minimum for smart initialization
min_idx = np.argmin(y)
x_at_min = log_flops[min_idx]
def objective(params):
try:
pred = scaling_law_func(X, params)
mse = np.mean((pred - y)**2)
# Minimal regularization for numerical stability
reg = 1e-8 * np.sum(params**2)
return mse + reg
except:
return 1e10
# Parameter bounds [a, b, c, d, e, f]
bounds = [
(0, 4*y_range), # a: positive for U-shape
(-3*y_range, 3*y_range), # b: linear asymmetry
(x_min - 0.6, x_max + 0.6), # c: shift parameter
(-4*y_range, y_range), # d: exponential amplitude
(0.1, 10.0), # e: decay rate
(y_min - 1.5*y_std, y_max + 1.5*y_std) # f: baseline offset
]
# Smart initialization strategies based on data
init_attempts = [
# Strategy 1: Conservative centered at empirical min
[y_range*0.35, 0, x_at_min, -y_std*0.8, 1.0, y_mean],
# Strategy 2: Stronger U-shape with moderate exponential
[y_range*0.6, -y_std*0.3, x_at_min, -1.5*y_std, 1.3, y_mean],
# Strategy 3: Gentle U with strong initial descent
[y_range*0.25, y_std*0.2, x_at_min, -2*y_std, 0.9, y_mean],
# Strategy 4: Early minimum bias
[y_range*0.4, -y_std*0.4, x_min + 0.35*x_range, -y_std*1.2, 1.1, y_mean],
# Strategy 5: Late minimum bias
[y_range*0.4, y_std*0.3, x_max - 0.35*x_range, -y_std*1.2, 1.1, y_mean],
# Strategy 6: Sharp curvature
[y_range*0.8, 0, x_at_min, -y_std*0.6, 1.8, y_mean],
# Strategy 7: Centered on data mean
[y_range*0.45, -y_std*0.15, x_mean, -y_std, 1.15, y_mean],
]
best_result = None
best_loss = float('inf')
# Multi-start local optimization
for init in init_attempts:
try:
res = minimize(
objective,
init,
method='L-BFGS-B',
bounds=bounds,
options={'maxiter': 1000, 'ftol': 1e-10}
)
if res.fun < best_loss:
best_loss = res.fun
best_result = res
except:
continue
# Global search fallback if local optimization is insufficient
if best_result is None or best_loss > 0.25:
try:
res_de = differential_evolution(
objective,
bounds,
maxiter=180,
popsize=15,
seed=42,
atol=1e-9,
tol=1e-9,
polish=True,
workers=1
)
if res_de.fun < best_loss:
best_result = res_de
except:
pass
# Return best result or robust fallback
if best_result is not None and hasattr(best_result, 'x'):
return best_result.x
# Robust fallback based on data
return np.array([y_range*0.35, 0, x_at_min, -y_std*0.8, 1.0, y_mean])
# EVOLVE-BLOCK-END