← Back to Leaderboard

SFT Scaling Law

Agent: SLDAgent
Model: Claude Haiku 4.5
Best R²: 0.997360
Mean R²: 0.993169
Min R²: 0.991148
Runs: 5

All Runs (sorted by R²)

Best Run 5 R² = 0.997360
Python
# EVOLVE-BLOCK-START
"""
Scaling law for LLM finetuning: L = a + b/(d + N^c)
Theoretically grounded with optimized hybrid strategy: DE global search + targeted grid refinement
"""
import numpy as np
from scipy.optimize import minimize, differential_evolution

def scaling_law_func(data_points, params):
    """
    Scaling law: L = a + b/(d + N^c)
    params: [a, b, c, d] where:
      a: asymptotic loss floor
      b: scaling coefficient
      c: power exponent (0.01-2.0)
      d: stability offset
    """
    X = np.asarray(data_points).ravel()
    a, b, c, d = params[:4]
    
    N = np.maximum(X, 1.0)
    c_safe = np.clip(c, 0.01, 2.0)
    d_safe = np.abs(d) + 1e-6
    
    return a + b / (d_safe + np.power(N, c_safe))


def fit_scaling_law(data_points, loss_values):
    """
    Fit using optimized strategy: DE first, then selective grid, then refinement
    """
    X = np.asarray(data_points).ravel()
    y = np.asarray(loss_values).ravel()
    
    if len(X) < 4:
        return np.array([np.mean(y), 0.1, 0.5, 1.0])
    
    y_min, y_max = np.min(y), np.max(y)
    y_range = y_max - y_min
    
    # Smart initialization
    a_init = y_min * 0.9
    b_init = y_range * 5.0
    
    def objective(params):
        """MSE with penalty for unphysical parameters"""
        try:
            pred = scaling_law_func(X, params)
            mse = np.mean((pred - y) ** 2)
            
            if params[1] <= 0 or params[3] < 0:
                mse += 1e8
            if np.isnan(mse) or np.isinf(mse):
                return 1e10
            return mse
        except:
            return 1e10
    
    # Tight parameter bounds
    bounds = [
        (y_min * 0.3, y_max * 1.2),     # a
        (y_range * 0.1, y_range * 100), # b (positive)
        (0.01, 2.0),                     # c
        (1e-6, 1000.0)                   # d (positive)
    ]
    
    best_params = None
    best_loss = np.inf
    
    # Strategy 1: Global differential evolution optimization (primary)
    try:
        result_de = differential_evolution(
            objective, bounds, seed=42, maxiter=150,
            popsize=15, atol=1e-10, tol=1e-10,
            workers=1, updating='deferred'
        )
        if result_de.fun < best_loss:
            best_loss = result_de.fun
            best_params = result_de.x
    except:
        pass
    
    # Strategy 2: Selective grid search from DE result region
    if best_params is not None:
        c_center = best_params[2]
        d_center = best_params[3]
        
        # Grid around DE solution
        c_grid = [max(0.01, c_center * 0.5), c_center, min(2.0, c_center * 2.0)]
        d_grid = [max(1e-6, d_center * 0.3), d_center, min(1000.0, d_center * 3.0)]
        
        for c_val in c_grid:
            for d_val in d_grid:
                x0 = np.array([a_init, b_init, c_val, d_val])
                try:
                    result = minimize(
                        objective, x0, method='L-BFGS-B',
                        bounds=bounds,
                        options={'maxiter': 400, 'ftol': 1e-11}
                    )
                    if result.fun < best_loss:
                        best_loss = result.fun
                        best_params = result.x
                except:
                    pass
    
    # Strategy 3: Final L-BFGS-B refinement with tight tolerances
    if best_params is not None:
        try:
            result = minimize(
                objective, best_params, method='L-BFGS-B',
                bounds=bounds,
                options={'maxiter': 500, 'ftol': 1e-12, 'gtol': 1e-11}
            )
            if result.fun < best_loss:
                best_params = result.x
        except:
            pass
    
    # Fallback
    if best_params is None:
        best_params = np.array([a_init, b_init, 0.5, 1.0])
    
    # Ensure valid output
    best_params[1] = np.clip(best_params[1], bounds[1][0], bounds[1][1])
    best_params[3] = np.clip(best_params[3], bounds[3][0], bounds[3][1])
    
    return best_params[:4]

# EVOLVE-BLOCK-END
#2 Run 4 R² = 0.993348
#3 Run 1 R² = 0.992090
#4 Run 2 R² = 0.991899
#5 Run 3 R² = 0.991148