import numpy as np
def law(input_data: list[dict[str, float]], group: str) -> list[dict[str, float]]:
"""
Predicts output variables based on input variables according to a discovered scaling law.
Args:
input_data: A list of dictionaries, where each dictionary is a single data
point containing input variable names as keys and their
corresponding values.
group: The name of the experimental group for which to make predictions.
The functional form of the law must be the same for all groups,
but the constant parameters/coefficients can differ per group.
Returns:
A list of dictionaries, corresponding to the input_data list, with each
dictionary containing the predicted output variable(s).
"""
# Discovered Global Exponents
ALPHA = -3.9034
GAMMA = -0.1707
# Group-specific coefficients [c0, c1, c2]
# Model: y = c0 + c1 * exp(ALPHA * x) + c2 * exp(GAMMA * x)
COEFFS = {
'mmlu': [-0.837198, -0.000345, 0.362144],
'parsinlu_qa_mc': [-0.551979, -0.007340, 0.156137],
'arithmetic': [-0.300130, -0.018207, 0.140879],
'hindu_knowledge': [-0.873439, -0.003579, 0.474323],
'analogical_similarity': [-0.630591, -0.003660, 0.110499],
'conceptual_combinations': [-0.351057, -0.005183, -0.048191],
'hellaswag': [0.117707, -0.004592, -0.159038],
'arc': [0.161359, -0.005110, -0.239299],
'abstract_narrative_understanding': [0.739952, 0.002573, -1.297015],
}
# Retrieve coefficients for the group
# If group is unknown, we cannot predict accurately.
# We'll return 0.0 or some default, but this case shouldn't happen in valid tests.
c = COEFFS.get(group, [0.0, 0.0, 0.0])
c0, c1, c2 = c
predictions = []
for point in input_data:
x = point.get('log_flops', 0.0)
# Apply formula
y_pred = c0 + c1 * np.exp(ALPHA * x) + c2 * np.exp(GAMMA * x)
predictions.append({'brier_score': float(y_pred)})
return predictions