SLD - Data-Constrained Scaling Law - SLDAgent + Claude Sonnet 4.5

All Runs (sorted by R²)

Best Run 5 R² = 0.919475

▼

Python

# EVOLVE-BLOCK-START
"""
Refined scaling law with data efficiency modeling
Key innovations:
- Standard power law base: A/P^alpha + B/D^beta + C/U^gamma
- Data efficiency term: F/(D/U)^delta to model repetition effects
- The D/U ratio captures how much data is "recycled" vs unique
- Simpler than log corrections, more interpretable
- Direct modeling of the intuition that loss depends on unique content fraction
Uses 7 parameters: [A, alpha, B, beta, C, gamma, F, delta]
Actually uses 7: We'll use F*U^delta/D^delta = F*(U/D)^delta
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law with data efficiency:
    L = A/P^alpha + B/D^beta + C/U^gamma + F*(U/D)^delta
    The (U/D)^delta term captures data repetition effects
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    params = np.asarray(params, dtype=np.float64)
    
    if params.ndim == 1:
        params = params[None, :]
    
    U = X[:, 0]  # unique_tokens
    P = X[:, 1]  # params
    D = X[:, 2]  # tokens
    
    eps = 1e-12
    U = np.maximum(U, eps)
    P = np.maximum(P, eps)
    D = np.maximum(D, eps)
    
    A, alpha, B, beta, C, gamma, F = params[0]
    
    # Standard power law terms
    term1 = A / (P ** alpha)
    term2 = B / (D ** beta)
    term3 = C / (U ** gamma)
    
    # Data efficiency term: models unique content fraction
    # When U/D is small (high repetition), this term is small
    # delta fixed at 0.15 to save a parameter (empirically good value)
    delta = 0.15
    efficiency_ratio = U / D
    efficiency_term = F * (efficiency_ratio ** delta)
    
    pred = term1 + term2 + term3 + efficiency_term
    
    return pred


def fit_scaling_law(data_points, loss_values):
    """
    Streamlined two-stage optimization
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    y = np.asarray(loss_values, dtype=np.float64)
    
    U = X[:, 0]
    P = X[:, 1]
    D = X[:, 2]
    
    # Log-median normalization
    U_scale = np.exp(np.median(np.log(U + 1e-12)))
    P_scale = np.exp(np.median(np.log(P + 1e-12)))
    D_scale = np.exp(np.median(np.log(D + 1e-12)))
    y_scale = np.median(y)
    
    U_norm = U / U_scale
    P_norm = P / P_scale
    D_norm = D / D_scale
    y_norm = y / y_scale
    
    delta = 0.15
    
    def objective(params):
        A, alpha, B, beta, C, gamma, F = params
        
        eps = 1e-12
        P_safe = np.maximum(P_norm, eps)
        D_safe = np.maximum(D_norm, eps)
        U_safe = np.maximum(U_norm, eps)
        
        term1 = A / (P_safe ** alpha)
        term2 = B / (D_safe ** beta)
        term3 = C / (U_safe ** gamma)
        
        efficiency_ratio = U_safe / D_safe
        efficiency_term = F * (efficiency_ratio ** delta)
        
        pred = term1 + term2 + term3 + efficiency_term
        
        residuals = pred - y_norm
        mse = np.mean(residuals ** 2)
        
        # Balanced regularization
        reg = 0.007 * (np.abs(alpha - 0.37) + 
                       np.abs(beta - 0.37) + 
                       np.abs(gamma - 0.28))
        reg += 0.004 * np.abs(F)
        
        return mse + reg
    
    bounds = [
        (0.001, 135),   # A
        (0.07, 1.05),   # alpha
        (0.001, 135),   # B
        (0.07, 1.05),   # beta
        (0.001, 135),   # C
        (0.07, 0.88),   # gamma
        (-8, 8)         # F
    ]
    
    # Data-driven initialization
    y_min = np.min(y_norm)
    y_range = np.max(y_norm) - y_min
    
    init_guess = [
        y_range * 0.34,
        0.37,
        y_range * 0.32,
        0.37,
        y_range * 0.25,
        0.28,
        y_range * 0.06
    ]
    
    # Global search
    result = differential_evolution(
        objective, 
        bounds, 
        seed=42,
        maxiter=400,
        popsize=16,
        atol=1e-8,
        tol=1e-8,
        strategy='best1bin',
        mutation=(0.5, 1.2),
        recombination=0.75
    )
    
    params_opt = result.x if result.success else np.array(init_guess)
    
    # Local refinement
    result_local = minimize(
        objective, 
        params_opt, 
        method='L-BFGS-B', 
        bounds=bounds,
        options={'maxiter': 550, 'ftol': 1e-10, 'gtol': 1e-9}
    )
    
    if result_local.success and result_local.fun < objective(params_opt):
        params_opt = result_local.x
    
    # Scale back to original space
    params_scaled = params_opt.copy()
    params_scaled[0] *= y_scale * (P_scale ** params_opt[1])
    params_scaled[2] *= y_scale * (D_scale ** params_opt[3])
    params_scaled[4] *= y_scale * (U_scale ** params_opt[5])
    
    # Scale F: (U_norm/D_norm)^delta = (U/D)^delta * (D_scale/U_scale)^delta
    params_scaled[6] = params_opt[6] * y_scale * ((D_scale / U_scale) ** delta)
    
    return params_scaled
# EVOLVE-BLOCK-END

#2 Run 4 R² = 0.916660

▼

Python

# EVOLVE-BLOCK-START
"""
Advanced scaling law with learnable log efficiency modulation
L = A/N^α + B/D^β + C/U^γ * (1 + δ*log(D/U)) + E
Makes δ learnable within the 7-parameter constraint via coefficient scaling
"""
import numpy as np
from scipy.optimize import differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law: L = A/N^α + B/D^β + C/U^γ * (1 + implicit_δ*log(D/U)) + E
    params = [A, α, B, β, C, γ, E] (7 parameters)
    δ effect is absorbed into C scaling through log modulation strength
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    params = np.asarray(params, dtype=np.float64).ravel()
    
    if len(params) != 7:
        raise ValueError(f"Expected 7 parameters, got {len(params)}")
    
    U = X[:, 0]  # unique_tokens
    N = X[:, 1]  # model_params
    D = X[:, 2]  # tokens
    
    A, alpha, B, beta, C, gamma, E = params
    
    eps = 1e-15
    
    # Core power-law terms
    term1 = A / np.power(np.maximum(N, eps), alpha)
    term2 = B / np.power(np.maximum(D, eps), beta)
    
    # Enhanced unique token term with adaptive log modulation
    # Use variable log coefficient based on gamma magnitude
    # When gamma is larger, log effect is proportionally scaled
    ratio = np.maximum(D / np.maximum(U, eps), 1.0)
    log_strength = 0.12 + 0.08 * np.clip(gamma, 0, 0.5)  # Range: 0.12 to 0.16
    efficiency_factor = 1.0 + log_strength * np.log(ratio)
    term3 = C / np.power(np.maximum(U, eps), gamma) * efficiency_factor
    
    return term1 + term2 + term3 + E


def fit_scaling_law(data_points, loss_values):
    """
    Multi-restart optimization with adaptive regularization
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    y = np.asarray(loss_values, dtype=np.float64).ravel()
    
    y_min, y_max = np.min(y), np.max(y)
    y_std = np.std(y)
    y_median = np.median(y)
    
    def objective(params):
        try:
            pred = scaling_law_func(X, params)
            
            if np.any(~np.isfinite(pred)):
                return 1e10
            
            residuals = pred - y
            
            # Adaptive Huber loss with median-based threshold
            delta = 0.28 * y_std
            abs_res = np.abs(residuals)
            huber = np.where(
                abs_res <= delta,
                0.5 * residuals**2,
                delta * (abs_res - 0.5 * delta)
            )
            
            base_loss = np.mean(huber)
            
            # Adaptive regularization with cross-term balance
            # Encourage balanced exponents
            exp_balance = 0.0025 * ((params[1] - params[3])**2 + 
                                    (params[3] - params[5])**2)
            
            # Light magnitude regularization
            coef_reg = 0.00008 * (np.log1p(params[0])**2 + 
                                   np.log1p(params[2])**2 + 
                                   np.log1p(params[4])**2)
            
            # Encourage E near lower bound
            E_penalty = 0.0005 * max(0, y_min * 0.5 - params[6])**2
            
            return base_loss + exp_balance + coef_reg + E_penalty
        except:
            return 1e10
    
    # Slightly tighter bounds for more focused search
    bounds = [
        (0.01, 1800),                    # A
        (0.05, 0.88),                    # α
        (0.01, 1800),                    # B
        (0.04, 0.68),                    # β
        (0.01, 1800),                    # C
        (0.04, 0.68),                    # γ
        (y_min * 0.35, y_max * 1.15)    # E
    ]
    
    best_result = None
    best_score = 1e10
    
    # Multi-restart with different seeds
    for seed_offset in [0, 17, 99]:
        result = differential_evolution(
            objective,
            bounds,
            seed=42 + seed_offset,
            maxiter=450,
            popsize=24,
            atol=1e-9,
            tol=1e-9,
            polish=True,
            workers=1,
            updating='deferred',
            strategy='best1bin',
            mutation=(0.55, 1.75),
            recombination=0.82,
            init='latinhypercube'
        )
        
        if result.fun < best_score:
            best_score = result.fun
            best_result = result
    
    return best_result.x
# EVOLVE-BLOCK-END

#3 Run 1 R² = 0.906971

▼

Python

# EVOLVE-BLOCK-START
"""
Refined Chinchilla-style scaling law with data efficiency modeling
Form: L = E + A/P^α + B/D^β + C/U^γ + I*log(D/U)
Where P=params, D=tokens, U=unique_tokens (normalized)
7 parameters: [E, A, α, B, β, C, γ] with I derived from B,C
Key: Tighter bounds, streamlined optimization, data efficiency term
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law with data efficiency: L = E + A/P^α + B/D^β + C/U^γ + I*log(D/U)
    params = [E, A, α, B, β, C, γ] (7 parameters)
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    params = np.asarray(params, dtype=np.float64).ravel()
    
    # Extract and normalize features: [unique_tokens, params, tokens]
    U = np.maximum(X[:, 0], 1e6) / 1e8
    P = np.maximum(X[:, 1], 1e7) / 1e8
    D = np.maximum(X[:, 2], 1e8) / 1e10
    
    E, A, alpha, B, beta, C, gamma = params
    
    # Clip exponents for stability
    alpha_safe = np.clip(alpha, 0.05, 1.0)
    beta_safe = np.clip(beta, 0.05, 1.0)
    gamma_safe = np.clip(gamma, 0.05, 0.9)
    
    # Main scaling terms
    term_P = A / np.power(P, alpha_safe)
    term_D = B / np.power(D, beta_safe)
    term_U = C / np.power(U, gamma_safe)
    
    # Data efficiency interaction: penalize D >> U
    ratio = np.clip(D * 1e10 / (U * 1e8), 1.0, 1e4)
    efficiency = 0.02 * (B + C) * np.log(ratio) / 10.0
    
    loss = E + term_P + term_D + term_U + efficiency
    
    return np.clip(loss, 0.5, 15.0)


def fit_scaling_law(data_points, loss_values):
    """
    Streamlined hybrid optimization with informed initialization
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    y = np.asarray(loss_values, dtype=np.float64).ravel()
    
    # Clean data
    valid_mask = np.isfinite(y) & (y > 0) & (y < 50)
    X, y = X[valid_mask], y[valid_mask]
    
    if len(y) < 3:
        return np.array([1.69, 2.9, 0.39, 2.4, 0.34, 1.9, 0.29])
    
    def objective(params):
        pred = scaling_law_func(X, params)
        mse = np.mean((pred - y) ** 2)
        # Light L2 regularization on exponents
        reg = 0.002 * np.sum(params[2::2] ** 2)
        return mse + reg
    
    # Tighter bounds informed by top performers
    bounds = [
        (1.3, 2.35),     # E: irreducible loss
        (0.7, 16.0),     # A: parameter coefficient
        (0.22, 0.68),    # α: parameter exponent
        (0.7, 16.0),     # B: data coefficient
        (0.22, 0.68),    # β: data exponent
        (0.4, 12.0),     # C: unique token coefficient
        (0.16, 0.58)     # γ: unique token exponent
    ]
    
    # Strategic initializations from top performers
    inits = [
        np.array([1.69, 2.9, 0.39, 2.4, 0.34, 1.9, 0.29]),
        np.array([1.65, 2.6, 0.37, 2.2, 0.32, 1.75, 0.27]),
        np.array([1.73, 3.2, 0.41, 2.6, 0.36, 2.1, 0.31])
    ]
    
    best_params = inits[0]
    best_score = float('inf')
    
    # Multi-start L-BFGS-B
    for init in inits:
        res = minimize(
            objective, init, method='L-BFGS-B', bounds=bounds,
            options={'maxiter': 600, 'ftol': 1e-10}
        )
        if res.success and res.fun < best_score:
            best_score = res.fun
            best_params = res.x
    
    # Global refinement with DE
    res_de = differential_evolution(
        objective, bounds, seed=42, maxiter=240, popsize=12,
        atol=1e-8, tol=1e-8, workers=1, polish=False,
        strategy='best1bin', mutation=(0.6, 1.4), recombination=0.75
    )
    
    if res_de.success and res_de.fun < best_score:
        best_params = res_de.x
        best_score = res_de.fun
    
    # Final polish
    res_final = minimize(
        objective, best_params, method='L-BFGS-B', bounds=bounds,
        options={'maxiter': 900, 'ftol': 1e-11, 'gtol': 1e-9}
    )
    
    return res_final.x if (res_final.success and res_final.fun < best_score) else best_params

# EVOLVE-BLOCK-END

#4 Run 2 R² = 0.876982

▼

Python

# EVOLVE-BLOCK-START
"""
Simplified robust scaling law with token diversity interaction
L = A/P^α + B/D^β * (1 + C*(U/D)^γ) + E
Focus on numerical stability and efficient optimization
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law with token diversity: L = A/P^α + B/D^β * (1 + C*(U/D)^γ) + E
    7 parameters: [A, α, B, β, C, γ, E]
    """
    data_points = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    params = np.asarray(params, dtype=np.float64).ravel()
    
    # Extract features
    U = data_points[:, 0]  # unique_tokens
    P = data_points[:, 1]  # model_params
    D = data_points[:, 2]  # tokens
    
    # Unpack parameters
    A, alpha, B, beta, C, gamma, E = params[:7]
    
    # Numerical stability
    eps = 1e-12
    
    # Clamp exponents for stability
    alpha_safe = np.clip(alpha, 0.01, 2.0)
    beta_safe = np.clip(beta, 0.01, 2.0)
    gamma_safe = np.clip(gamma, 0.01, 3.0)
    
    # Parameter term: A/P^α
    param_term = A / (np.power(np.maximum(P, eps), alpha_safe) + eps)
    
    # Diversity ratio: U/D (bounded)
    diversity_ratio = np.clip(U / (D + eps), eps, 0.999)
    
    # Data term with diversity: B/D^β * (1 + C*(U/D)^γ)
    base_term = B / (np.power(np.maximum(D, eps), beta_safe) + eps)
    diversity_factor = 1.0 + C * np.power(diversity_ratio, gamma_safe)
    diversity_factor = np.clip(diversity_factor, 0.1, 10.0)
    data_term = base_term * diversity_factor
    
    # Total loss
    loss = param_term + data_term + E
    
    return loss


def fit_scaling_law(data_points, loss_values):
    """
    Streamlined optimization with proven hyperparameters
    """
    data_points = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    loss_values = np.asarray(loss_values, dtype=np.float64).ravel()
    
    # Statistics
    min_loss = np.min(loss_values)
    max_loss = np.max(loss_values)
    loss_range = max_loss - min_loss
    
    # Objective with balanced regularization
    def objective(params):
        try:
            pred = scaling_law_func(data_points, params)
            mse = np.mean((pred - loss_values) ** 2)
            
            # Extract params for regularization
            A, alpha, B, beta, C, gamma, E = params
            
            # Light L1 on coefficients
            coeff_reg = 0.0001 * (np.abs(A) + np.abs(B) + np.abs(C))
            
            # Prefer typical scaling law exponents
            exp_reg = 0.001 * ((alpha - 0.34)**2 + (beta - 0.28)**2 + np.abs(gamma - 0.5))
            
            return mse + coeff_reg + exp_reg
        except:
            return 1e10
    
    # Tight bounds based on best performers
    bounds = [
        (0.001, 300.0),              # A
        (0.05, 0.8),                 # α
        (0.001, 300.0),              # B
        (0.05, 0.8),                 # β
        (-10.0, 10.0),               # C (allow negative)
        (0.05, 2.5),                 # γ
        (min_loss * 0.3, max_loss * 1.2)  # E
    ]
    
    # Chinchilla-inspired initialization
    init = np.array([
        loss_range * 0.35,
        0.34,
        loss_range * 0.35,
        0.28,
        0.5,
        0.5,
        min_loss * 0.95
    ])
    
    # Primary: Differential Evolution with proven settings
    result_de = differential_evolution(
        objective,
        bounds,
        maxiter=500,
        seed=42,
        workers=1,
        polish=True,
        atol=1e-8,
        tol=1e-8,
        popsize=25,
        strategy='best1bin',
        mutation=(0.5, 1.8),
        recombination=0.75
    )
    
    best_params = result_de.x
    best_score = result_de.fun
    
    # Refinement: L-BFGS-B from DE result
    result_local = minimize(
        objective,
        best_params,
        method='L-BFGS-B',
        bounds=bounds,
        options={'maxiter': 3000, 'ftol': 1e-10, 'gtol': 1e-8}
    )
    
    if result_local.success and result_local.fun < best_score:
        best_params = result_local.x
        best_score = result_local.fun
    
    # Try from smart initialization
    result_init = minimize(
        objective,
        init,
        method='L-BFGS-B',
        bounds=bounds,
        options={'maxiter': 2000, 'ftol': 1e-10}
    )
    
    if result_init.success and result_init.fun < best_score:
        best_params = result_init.x
        best_score = result_init.fun
    
    # Multi-start refinement (2 iterations)
    for i in range(2):
        try:
            perturb = best_params * (1 + np.random.RandomState(42 + i).randn(7) * 0.03)
            perturb = np.clip(perturb, [b[0] for b in bounds], [b[1] for b in bounds])
            
            result = minimize(
                objective,
                perturb,
                method='L-BFGS-B',
                bounds=bounds,
                options={'maxiter': 2000, 'ftol': 1e-10}
            )
            
            if result.success and result.fun < best_score:
                best_params = result.x
                best_score = result.fun
        except:
            pass
    
    return best_params
# EVOLVE-BLOCK-END

#5 Run 3 R² = 0.852179

▼

Python

# EVOLVE-BLOCK-START
"""
Optimized scaling law with proven data efficiency formulation
Form: L = A/U^α + B/P^β + C/(D^γ * U^0.15) + E
Focuses on numerical stability and robust parameter fitting
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law: L = A/U^α + B/P^β + C/(D^γ * U^0.15) + E
    
    Components:
    - A/U^α: Unique token diversity effect
    - B/P^β: Model capacity (Chinchilla-style)
    - C/(D^γ * U^0.15): Data efficiency moderated by unique content
    - E: Irreducible loss floor
    
    Parameters: [A, α, B, β, C, γ, E] (7 params)
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    params = np.asarray(params, dtype=np.float64)
    
    if params.size != 7:
        params = np.array([8.5, 0.29, 92.0, 0.21, 46.0, 0.17, 2.05])
    
    U = np.maximum(X[:, 0], 1e6)  # unique_tokens
    P = np.maximum(X[:, 1], 1e7)  # params
    D = np.maximum(X[:, 2], 1e8)  # tokens
    
    A, alpha, B, beta, C, gamma, E = params
    
    # Constrain exponents for stability
    alpha = np.clip(alpha, 0.05, 0.8)
    beta = np.clip(beta, 0.05, 0.7)
    gamma = np.clip(gamma, 0.05, 0.6)
    
    # Core terms
    term1 = A / np.power(U, alpha)
    term2 = B / np.power(P, beta)
    
    # Data efficiency with fixed U exponent for stability
    denom = np.power(D, gamma) * np.power(U, 0.15)
    term3 = C / np.maximum(denom, 1.0)
    
    return term1 + term2 + term3 + E


def fit_scaling_law(data_points, loss_values):
    """
    Robust hybrid optimization with enhanced convergence
    """
    X = np.atleast_2d(np.asarray(data_points, dtype=np.float64))
    y = np.asarray(loss_values, dtype=np.float64)
    
    # Data statistics
    U_med = np.median(X[:, 0])
    P_med = np.median(X[:, 1])
    y_min, y_max = np.min(y), np.max(y)
    y_range = y_max - y_min
    y_mean = np.mean(y)
    
    def objective(params):
        try:
            pred = scaling_law_func(X, params)
            
            if not np.all(np.isfinite(pred)):
                return 1e10
            
            # Primary MSE loss
            mse = np.mean((pred - y) ** 2)
            
            # Minimal regularization
            reg = 1e-9 * (params[0]**2/8000 + params[2]**2/8000 + params[4]**2/8000)
            
            # Soft penalties
            penalty = 0.0
            if np.any(pred < 0):
                penalty += 80.0 * np.sum(pred[pred < 0]**2)
            
            over_mask = pred > y_max + 2.2 * y_range
            if np.any(over_mask):
                penalty += 8.0 * np.sum((pred[over_mask] - y_max)**2)
            
            return mse + reg + penalty
        except:
            return 1e10
    
    # Adaptive bounds
    bounds = [
        (0.01, y_range * np.power(U_med, 0.42)),
        (0.05, 0.8),
        (0.01, y_range * np.power(P_med, 0.32)),
        (0.05, 0.7),
        (0.01, y_range * 140),
        (0.05, 0.6),
        (y_min * 0.45, y_max * 1.25)
    ]
    
    best_result = None
    best_score = float('inf')
    
    # Global search with differential evolution
    for seed_val in [42, 99, 333, 777]:
        try:
            result = differential_evolution(
                objective,
                bounds,
                maxiter=480,
                popsize=23,
                seed=seed_val,
                atol=1e-9,
                tol=1e-9,
                workers=1,
                polish=True,
                strategy='best1bin',
                updating='deferred'
            )
            
            if result.fun < best_score:
                best_score = result.fun
                best_result = result.x
        except:
            continue
    
    # Multi-start local optimization
    init_points = [
        [8.5, 0.29, 92.0, 0.21, 46.0, 0.17, y_mean],
        [9.5, 0.31, 98.0, 0.23, 49.0, 0.19, y_mean * 0.97],
        [7.0, 0.26, 88.0, 0.19, 43.0, 0.15, y_mean * 1.03],
        [11.0, 0.33, 105.0, 0.25, 52.0, 0.21, y_mean * 0.94],
        [6.5, 0.24, 95.0, 0.20, 48.0, 0.18, y_mean * 1.06],
    ]
    
    if best_result is not None:
        init_points.insert(0, best_result)
    
    for init in init_points:
        try:
            result = minimize(
                objective,
                init,
                method='L-BFGS-B',
                bounds=bounds,
                options={'maxiter': 3500, 'ftol': 1e-12, 'gtol': 1e-10}
            )
            
            if result.fun < best_score:
                best_score = result.fun
                best_result = result.x
        except:
            continue
    
    # Final ultra-fine refinement
    if best_result is not None and best_score < 0.15:
        try:
            result = minimize(
                objective,
                best_result,
                method='L-BFGS-B',
                bounds=bounds,
                options={'maxiter': 2500, 'ftol': 1e-14, 'gtol': 1e-12}
            )
            
            if result.fun < best_score:
                best_result = result.x
        except:
            pass
    
    # Fallback
    if best_result is None:
        best_result = np.array([8.5, 0.29, 92.0, 0.21, 46.0, 0.17, 2.05])
    
    return best_result
# EVOLVE-BLOCK-END