← Back to Leaderboard

Parallel Scaling Law

Agent: SLDAgent
Model: Claude Sonnet 4.5
Best R²: 0.999957
Mean R²: 0.999907
Min R²: 0.999885
Runs: 5

All Runs (sorted by R²)

Best Run 4 R² = 0.999957
Python
# EVOLVE-BLOCK-START
"""
Enhanced parallel scaling law with multiplicative interaction:
L = C * N^(-alpha) / (1 + beta * log(P)) + gamma
This form captures the diminishing returns of parallelization more naturally.
4 parameters: alpha, beta, gamma, C
"""
import numpy as np
from scipy.optimize import differential_evolution, minimize

def scaling_law_func(data_points, params):
    """
    Scaling law with logarithmic parallel benefit:
    L = C * N^(-alpha) / (1 + beta * log(P)) + gamma
    params = [alpha, beta, gamma, C]
    """
    X = np.atleast_2d(np.asarray(data_points))
    params = np.asarray(params)
    
    if params.ndim == 1:
        params = params[None, :]
    
    N, P = X[:, 0], X[:, 1]
    
    results = []
    for p in params:
        alpha, beta, gamma, C = p
        N_safe = np.maximum(N, 1e6)
        P_safe = np.maximum(P, 1.0)
        
        # Logarithmic parallel scaling with diminishing returns
        log_P = np.log(P_safe)
        parallel_factor = 1.0 + beta * log_P
        parallel_factor = np.maximum(parallel_factor, 0.1)  # Numerical stability
        
        pred = C * np.power(N_safe, -alpha) / parallel_factor + gamma
        results.append(pred)
    
    result = np.array(results).T
    return result[:, 0] if result.shape[1] == 1 else result


def fit_scaling_law(data_points, loss_values):
    """
    Robust optimization with expanded search space for new formulation
    """
    X = np.atleast_2d(np.asarray(data_points))
    y = np.asarray(loss_values)
    
    if y.ndim == 1:
        y = y[:, None]
    
    N_samples, N_outputs = y.shape
    all_params = []
    
    for i in range(N_outputs):
        y_i = y[:, i]
        y_std = np.std(y_i)
        
        def objective(params):
            pred = scaling_law_func(X, params)
            if pred.ndim > 1:
                pred = pred[:, i]
            mse = np.mean((pred - y_i) ** 2)
            # Normalize regularization by data scale
            reg = (1e-4 * y_std) * (params[0]**2 + params[1]**2)
            return mse + reg
        
        # Adjusted bounds for logarithmic formulation
        # beta now scales log(P) instead of P directly
        bounds = [
            (0.01, 0.50),    # alpha: parameter scaling exponent
            (0.01, 0.40),    # beta: log-parallel scaling coefficient
            (0.50, 2.50),    # gamma: asymptotic loss floor
            (1e-4, 1e5)      # C: scaling constant
        ]
        
        # Multi-restart global optimization
        best_score = float('inf')
        best_x = None
        
        # Try more seeds for robustness with new formulation
        for seed in [42, 123, 256, 789]:
            result = differential_evolution(
                objective,
                bounds,
                maxiter=350,
                popsize=16,
                seed=seed,
                strategy='best1bin',
                atol=1e-9,
                tol=1e-9,
                mutation=(0.5, 1.5),
                recombination=0.8,
                polish=False,
                workers=1
            )
            
            if result.fun < best_score:
                best_score = result.fun
                best_x = result.x
        
        # Multi-method local refinement
        methods = ['L-BFGS-B', 'TNC']
        best_local_score = best_score
        best_local_x = best_x
        
        for method in methods:
            try:
                result_local = minimize(
                    objective,
                    best_x,
                    method=method,
                    bounds=bounds,
                    options={'maxiter': 2000, 'ftol': 1e-11}
                )
                
                if result_local.success and result_local.fun < best_local_score:
                    best_local_score = result_local.fun
                    best_local_x = result_local.x
            except:
                continue
        
        final_params = best_local_x if best_local_score < best_score else best_x
        all_params.append(final_params)
    
    params_array = np.array(all_params)
    return params_array[0] if N_outputs == 1 else params_array
# EVOLVE-BLOCK-END
#2 Run 1 R² = 0.999911
#3 Run 2 R² = 0.999894
#4 Run 3 R² = 0.999889
#5 Run 5 R² = 0.999885