import itertools
from numbers import Number
import numpy as np
from mot.lib.cl_function import SimpleCLFunction, SimpleCLCodeObject
from mot.configuration import CLRuntimeInfo, config_context, CLRuntimeAction
from mot.lib.kernel_data import Array, Zeros, LocalMemory
from scipy import linalg
__author__ = 'Robbert Harms'
__date__ = '2017-10-16'
__maintainer__ = 'Robbert Harms'
__email__ = 'robbert.harms@maastrichtuniversity.nl'
__licence__ = 'LGPL v3'
[docs]def numerical_hessian(objective_func, parameters,
lower_bounds=None, upper_bounds=None,
step_ratio=2, nmr_steps=15, data=None,
max_step_sizes=None, scaling_factors=None,
step_offset=None, parameter_transform_func=None,
cl_runtime_info=None):
"""Calculate and return the Hessian of the given function at the given parameters.
This calculates the Hessian using central difference (using a 2nd order Taylor expansion) with a Richardson
extrapolation over the proposed sequence of steps.
The Hessian is evaluated at the steps:
.. math::
\quad ((f(x + d_j e_j + d_k e_k) - f(x + d_j e_j - d_k e_k)) -
(f(x - d_j e_j + d_k e_k) - f(x - d_j e_j - d_k e_k)) /
(4 d_j d_k)
where :math:`e_j` is a vector where element :math:`j` is one and the rest are zero
and :math:`d_j` is a scalar spacing :math:`steps_j`.
Steps are generated according to a exponentially diminishing ratio defined as:
steps = max_step * step_ratio**-(i+offset), i=0, 1,.., nmr_steps-1.
Where the max step can be provided. For example, a maximum step of 2 with a step ratio of 2 and with
4 steps gives: [2.0, 1.0, 0.5, 0.25]. If offset would be 2, we would instead get: [0.5, 0.25, 0.125, 0.0625].
If number of steps is 1, we use not Richardson extrapolation and return the results of the first step. If the
number of steps is 2 we use a first order Richardson extrapolation step. For all higher number of steps
we use a second order Richardson extrapolation.
The derivative calculation method uses an adaptive step size to determine the step with the best trade-off between
numerical errors and localization of the derivative.
Args:
objective_func (mot.lib.cl_function.CLFunction): The function we want to differentiate.
A CL function with the signature:
.. code-block:: c
double <func_name>(local const mot_float_type* const x,
void* data,
local mot_float_type* objective_list);
The objective function has the same signature as the minimization function in MOT. For the numerical
hessian, the ``objective_list`` parameter is ignored.
parameters (ndarray): The parameters at which to evaluate the gradient. A (d, p) matrix with d problems,
and p parameters
lower_bounds (list or None): a list of length (p,) for p parameters with the lower bounds.
Each element of the list can be a scalar, a vector (of the same length as the number of problem instances),
or None. For infinity use np.inf, for boundless use None.
upper_bounds (list or None): a list of length (p,) for p parameters with the upper bounds.
Each element of the list can be a scalar, a vector (of the same length as the number of problem instances),
or None. For infinity use np.inf, for boundless use None.
step_ratio (float): the ratio at which the steps diminish.
nmr_steps (int): the number of steps we will generate. We will calculate the derivative for each of these
step sizes and extrapolate the best step size from among them. The minimum number of steps is 2.
data (mot.lib.kernel_data.KernelData): the user provided data for the ``void* data`` pointer.
step_offset (int): the offset in the steps, if set we start the steps from the given offset.
max_step_sizes (float or ndarray or None): the maximum step size, or the maximum step size per parameter.
If None is given, we use 0.1 for all parameters. If a float is given, we use that for all parameters.
If a list is given, it should be of the same length as the number of parameters.
scaling_factors (List[float] or ndarray): per estimable parameter a single float with the parameter scaling
for that parameter. Use 1 as identity.
Since numerical differentiation is sensitive to differences in step sizes, it is better to rescale
the parameters to a unitary range instead of changing the step sizes for the parameters.
This vector should contain scaling factors such that when the parameter is multiplied with this value,
the order of magnitude of the parameter is about one.
parameter_transform_func (mot.lib.cl_function.CLFunction or None): A transformation that can prepare the
parameter plus/minus the proposed step before evaluation.
As an example, suppose we are taking the derivative of a a polar coordinate :math:`\theta` defined on
:math:`[0, 2\pi]`. While taking the derivative, the function might propose positions outside of the range
of :math:`\theta`. This function allows changing the parameter vector before it is put into the model.
Please note that this function should return a parameter vector that is equivalent (but not necessarily
equal) to the provided proposal.
Signature:
.. code-block:: c
void <func_name>(void* data, local mot_float_type* x);
cl_runtime_info (mot.configuration.CLRuntimeInfo): the runtime information
Returns:
ndarray: the gradients for each of the parameters for each of the problems
"""
if len(parameters.shape) == 1:
parameters = parameters[None, :]
nmr_params = parameters.shape[1]
if max_step_sizes is None:
max_step_sizes = 0.1
if isinstance(max_step_sizes, Number):
max_step_sizes = [max_step_sizes] * nmr_params
max_step_sizes = np.array(max_step_sizes)
if scaling_factors is None:
scaling_factors = 1
if isinstance(scaling_factors, Number):
scaling_factors = [scaling_factors] * nmr_params
scaling_factors = np.array(scaling_factors)
def finalize_derivatives(derivatives):
"""Transforms the derivatives from vector to matrix and apply the parameter scalings."""
return _results_vector_to_matrix(derivatives, nmr_params) * np.outer(scaling_factors, scaling_factors)
with config_context(CLRuntimeAction(cl_runtime_info or CLRuntimeInfo())):
derivatives = _compute_derivatives(objective_func, parameters, step_ratio, step_offset, nmr_steps,
lower_bounds, upper_bounds, max_step_sizes, scaling_factors, data=data,
parameter_transform_func=parameter_transform_func)
if nmr_steps == 1:
return finalize_derivatives(derivatives[..., 0])
derivatives, errors = _richardson_extrapolation(derivatives, step_ratio)
if nmr_steps <= 3:
return finalize_derivatives(derivatives[..., 0])
if derivatives.shape[2] > 2:
derivatives, errors = _wynn_extrapolate(derivatives)
if derivatives.shape[2] == 1:
return finalize_derivatives(derivatives[..., 0])
derivatives, errors = _median_outlier_extrapolation(derivatives, errors)
return finalize_derivatives(derivatives)
def _compute_derivatives(objective_func, parameters, step_ratio, step_offset, nmr_steps,
lower_bounds, upper_bounds, max_step_sizes, scaling_factors, data=None,
parameter_transform_func=None):
"""Compute the lower triangular elements of the Hessian using the central difference method.
This will compute the elements of the Hessian multiple times with decreasing step sizes.
Args:
model: the log likelihood model we are trying to differentiate
parameters (ndarray): a (n, p) matrix with for for every problem n, p parameters. These are the points
at which we want to calculate the derivative
step_ratio (float): the ratio at which the steps exponentially diminish
step_offset (int): ignore the first few step sizes by this offset
nmr_steps (int): the number of steps to compute and return (after the step offset)
lower_bounds (list): lower bounds
upper_bounds (list): upper bounds
max_step_sizes (ndarray): the maximum step sizes per parameter
scaling_factors (ndarray): per estimable parameter a single float with the parameter scaling for that parameter.
Use 1 as identity.
data (mot.lib.kernel_data.KernelData): the user provided data for the ``void* data`` pointer.
parameter_transform_func (mot.lib.cl_function.CLFunction): A transformation that can prepare the parameter plus/
minus the proposed step before evaluation.
Signature:
.. code-block:: c
void <func_name>(void* data, local mot_float_type* x);
"""
nmr_params = parameters.shape[1]
nmr_derivatives = (nmr_params ** 2 - nmr_params) // 2 + nmr_params
if parameter_transform_func is None:
parameter_transform_func = SimpleCLFunction.from_string(
'void voidTransform(void* data, local mot_float_type* x){}')
initial_step = _get_initial_step_size(parameters, lower_bounds, upper_bounds, max_step_sizes, scaling_factors)
if step_offset:
initial_step *= float(step_ratio) ** -step_offset
kernel_data = {
'data': data,
'parameters': Array(parameters, ctype='mot_float_type'),
'parameter_scalings_inv': Array(1. / scaling_factors, ctype='float', offset_str='0'),
'initial_step': Array(initial_step, ctype='float'),
'step_evaluates': Zeros((parameters.shape[0], nmr_derivatives, nmr_steps), 'double'),
'x_tmp': LocalMemory('mot_float_type', nmr_params)
}
_derivation_kernel(objective_func, nmr_params, nmr_steps, step_ratio, parameter_transform_func).evaluate(
kernel_data, parameters.shape[0], use_local_reduction=True)
return kernel_data['step_evaluates'].get_data()
def _richardson_extrapolation(derivatives, step_ratio):
"""Apply the Richardson extrapolation to the derivatives computed with different steps.
Having for every problem instance and every Hessian element multiple derivatives computed with decreasing steps,
we can now apply Richardson extrapolation to reduce the error term from :math:`\mathcal{O}(h^{2})` to
:math:`\mathcal{O}(h^{4})` or :math:`\mathcal{O}(h^{6})` depending on how many steps we have calculated.
This method only considers extrapolation up to the sixth error order. For a set of two derivatives we compute
a single fourth order approximation, for three derivatives and up we compute ``n-2`` sixth order approximations.
Expected errors for approximation ``i`` are computed using the ``i+1`` derivative plus a statistical error
based on the machine precision.
Args:
derivatives (ndarray): (n, p, s), a matrix with for n problems and p parameters, s step sizes.
step_ratio (ndarray): the diminishing ratio of the steps used to compute the derivatives.
"""
nmr_problems, nmr_derivatives, nmr_steps = derivatives.shape
richardson_coefficients = _get_richardson_coefficients(step_ratio, min(nmr_steps, 3) - 1)
nmr_convolutions_needed = nmr_steps - (len(richardson_coefficients) - 2)
final_nmr_convolutions = nmr_convolutions_needed - 1
kernel_data = {
'derivatives': Array(derivatives, 'double', offset_str='{problem_id} * ' + str(nmr_steps)),
'richardson_extrapolations': Zeros(
(nmr_problems * nmr_derivatives, nmr_convolutions_needed), 'double', mode='rw'),
'errors': Zeros(
(nmr_problems * nmr_derivatives, final_nmr_convolutions), 'double', mode='rw'),
}
richardson_func = _richardson_error_kernel(nmr_steps, nmr_convolutions_needed, richardson_coefficients)
richardson_func.evaluate(kernel_data, nmr_problems * nmr_derivatives,
use_local_reduction=False)
richardson_extrapolations = np.reshape(kernel_data['richardson_extrapolations'].get_data(),
(nmr_problems, nmr_derivatives, nmr_convolutions_needed))
errors = np.reshape(kernel_data['errors'].get_data(),
(nmr_problems, nmr_derivatives, final_nmr_convolutions))
return richardson_extrapolations[..., :final_nmr_convolutions], errors
def _wynn_extrapolate(derivatives):
nmr_problems, nmr_derivatives, nmr_steps = derivatives.shape
nmr_extrapolations = nmr_steps - 2
kernel_data = {
'derivatives': Array(derivatives, 'double', offset_str='{problem_id} * ' + str(nmr_steps)),
'extrapolations': Zeros((nmr_problems * nmr_derivatives, nmr_extrapolations), 'double', mode='rw'),
'errors': Zeros((nmr_problems * nmr_derivatives, nmr_extrapolations), 'double', mode='rw'),
}
wynn_func = _wynn_extrapolation_kernel(nmr_steps)
wynn_func.evaluate(kernel_data, nmr_problems * nmr_derivatives,
use_local_reduction=False)
extrapolations = np.reshape(kernel_data['extrapolations'].get_data(),
(nmr_problems, nmr_derivatives, nmr_extrapolations))
errors = np.reshape(kernel_data['errors'].get_data(), (nmr_problems, nmr_derivatives, nmr_extrapolations))
return extrapolations, errors
def _median_outlier_extrapolation(derivatives, errors):
"""Add an error to outliers and afterwards return the derivatives with the lowest errors.
This seems to be the slowest function in the library. Perhaps one day update this to OpenCL as well.
Some ideas are in: http://krstn.eu/np.nanpercentile()-there-has-to-be-a-faster-way/
"""
def _get_median_outliers_errors(der, trim_fact=10):
"""Discards any estimate that differs wildly from the median of the estimates (of that derivative).
A factor of 10 to 1 in either direction . The actual trimming factor is
defined as a parameter.
"""
p25, median, p75 = np.nanpercentile(der, q=[25, 50, 75], axis=2)[..., None]
iqr = np.abs(p75 - p25)
a_median = np.abs(median)
outliers = (((abs(der) < (a_median / trim_fact)) +
(abs(der) > (a_median * trim_fact))) * (a_median > 1e-8) +
((der < p25 - 1.5 * iqr) + (p75 + 1.5 * iqr < der)))
return outliers * np.abs(der - median)
all_nan = np.where(np.sum(np.isnan(errors), axis=2) == errors.shape[2])
errors[all_nan[0], all_nan[1], 0] = 0
derivatives[all_nan[0], all_nan[1], 0] = 0
errors += _get_median_outliers_errors(derivatives)
minpos = np.nanargmin(errors, axis=2)
indices = np.indices(minpos.shape)
derivatives_final = derivatives[indices[0], indices[1], minpos]
errors_final = errors[indices[0], indices[1], minpos]
derivatives_final[all_nan[0], all_nan[1]] = np.nan
errors_final[all_nan[0], all_nan[1]] = np.nan
return derivatives_final, errors_final
def _results_vector_to_matrix(vectors, nmr_params):
"""Transform for every problem (and optionally every step size) the vector results to a square matrix.
Since we only process the lower triangular items, we have to convert the 1d vectors into 2d matrices for every
problem. This function does that.
Args:
vectors (ndarray): for every problem (and step size) the 1d vector with results
nmr_params (int): the number of parameters
"""
matrices = np.zeros(vectors.shape[:-1] + (nmr_params, nmr_params), dtype=vectors.dtype)
ltr_ind = 0
for px in range(nmr_params):
matrices[..., px, px] = vectors[..., ltr_ind]
ltr_ind += 1
for py in range(px + 1, nmr_params):
matrices[..., px, py] = vectors[..., ltr_ind]
matrices[..., py, px] = vectors[..., ltr_ind]
ltr_ind += 1
return matrices
def _get_initial_step_size(parameters, lower_bounds, upper_bounds, max_step_sizes, scaling_factors):
"""Get an initial step size to use for every parameter.
This chooses the step sizes based on the maximum step size and the lower and upper bounds.
Args:
parameters (ndarray): The parameters at which to evaluate the gradient. A (d, p) matrix with d problems,
p parameters and n samples.
lower_bounds (list): lower bounds
upper_bounds (list): upper bounds
max_step_sizes (list): the maximum step size, or the maximum step size per parameter.
scaling_factors (ndarray): per estimable parameter a single float with the parameter scaling for that parameter.
Use 1 as identity.
Returns:
ndarray: for every problem instance the vector with the initial step size for each parameter.
"""
if lower_bounds is None and upper_bounds is None:
return np.repeat(max_step_sizes[None, :], parameters.shape[0], axis=0)
initial_step = np.zeros_like(parameters)
for ind in range(parameters.shape[1]):
if lower_bounds[ind] is None and upper_bounds[ind] is None:
initial_step[:, ind] = max_step_sizes[ind]
else:
use_lower = lower_bounds[ind] is not None
use_upper = upper_bounds[ind] is not None
if use_upper and not use_lower:
minimum_allowed_step = np.abs(upper_bounds[ind] - parameters[:, ind]) * scaling_factors[ind]
elif use_lower and not use_upper:
minimum_allowed_step = np.abs(parameters[:, ind] - lower_bounds[ind]) * scaling_factors[ind]
else:
minimum_allowed_step = np.minimum(np.abs(parameters[:, ind] - lower_bounds[ind]),
np.abs(upper_bounds[ind] - parameters[:, ind])) * scaling_factors[ind]
initial_step[:, ind] = np.minimum(minimum_allowed_step, max_step_sizes[ind])
return initial_step
def _get_richardson_coefficients(step_ratio, nmr_extrapolations):
"""Get the matrix with the convolution rules.
In the kernel we will extrapolate the sequence based on the Richardsons method.
This method assumes we have a series expansion like:
L = f(h) + a0 * h^p_0 + a1 * h^p_1+ a2 * h^p_2 + ...
where p_i = order + step * i and f(h) -> L as h -> 0, but f(0) != L.
If we evaluate the right hand side for different stepsizes h we can fit a polynomial to that sequence
of approximations.
Instead of using all the generated points at the same time, we convolute the Richardson method over the
acquired steps to approximate the higher order error terms.
Args:
step_ratio (float): the ratio at which the steps diminish.
nmr_extrapolations (int): the number of extrapolations we want to do. Each extrapolation requires
an evaluation at an exponentially decreasing step size.
Returns:
ndarray: a vector with the extrapolation coefficients.
"""
if nmr_extrapolations == 0:
return np.array([1])
error_diminishing_per_step = 2
taylor_expansion_order = 2
def r_matrix(num_terms):
i, j = np.ogrid[0:num_terms + 1, 0:num_terms]
r_mat = np.ones((num_terms + 1, num_terms + 1))
r_mat[:, 1:] = (1.0 / step_ratio) ** (i * (error_diminishing_per_step * j + taylor_expansion_order))
return r_mat
return linalg.pinv(r_matrix(nmr_extrapolations))[0]
def _get_compute_functions_cl(objective_func, nmr_params, nmr_steps, step_ratio, parameter_transform_func):
func = objective_func.get_cl_code()
func += parameter_transform_func.get_cl_code()
return func + '''
double _calculate_function(void* data, local mot_float_type* x){
return ''' + objective_func.get_cl_function_name() + '''(x, data, 0);
}
/**
* Evaluate the model with a perturbation in two dimensions.
*
* Args:
* data: the data container
* x_input: the array with the input parameters
* perturb_dim_0: the index (into the x_input parameters) of the first parameter to perturbate
* perturb_0: the added perturbation of the index corresponding to ``perturb_dim_0``
* perturb_dim_1: the index (into the x_input parameters) of the second parameter to perturbate
* perturb_1: the added perturbation of the index corresponding to ``perturb_dim_1``
*
* Returns:
* the function evaluated at the parameters plus their perturbation.
*/
double _eval_step(void* data, local mot_float_type* x_input,
uint perturb_dim_0, mot_float_type perturb_0,
uint perturb_dim_1, mot_float_type perturb_1,
local mot_float_type* x_tmp){
if(get_local_id(0) == 0){
for(uint i = 0; i < ''' + str(nmr_params) + '''; i++){
x_tmp[i] = x_input[i];
}
x_tmp[perturb_dim_0] += perturb_0;
x_tmp[perturb_dim_1] += perturb_1;
}
barrier(CLK_LOCAL_MEM_FENCE);
''' + parameter_transform_func.get_cl_function_name() + '''(data, x_tmp);
return _calculate_function(data, x_tmp);
}
/**
* Compute one element of the Hessian for a number of steps.
*
* This uses the initial steps in the data structure, indexed by the parameters to change (px, py).
*/
void _compute_steps(void* data, local mot_float_type* x_input, mot_float_type f_x_input,
uint px, uint py, global double* step_evaluates,
global float* parameter_scalings_inv,
global float* initial_step,
local mot_float_type* x_tmp){
double step_x;
double step_y;
double tmp;
bool is_first_workitem = get_local_id(0) == 0;
if(px == py){
for(uint step_ind = 0; step_ind < ''' + str(nmr_steps) + '''; step_ind++){
step_x = initial_step[px] / pown(''' + str(float(step_ratio)) + ''', step_ind);
tmp = (
_eval_step(data, x_input,
px, 2 * (step_x * parameter_scalings_inv[px]),
0, 0, x_tmp)
+ _eval_step(data, x_input,
px, -2 * (step_x * parameter_scalings_inv[px]),
0, 0, x_tmp)
- 2 * f_x_input
) / (4 * step_x * step_x);
if(is_first_workitem){
step_evaluates[step_ind] = tmp;
}
}
}
else{
for(uint step_ind = 0; step_ind < ''' + str(nmr_steps) + '''; step_ind++){
step_x = initial_step[px] / pown(''' + str(float(step_ratio)) + ''', step_ind);
step_y = initial_step[py] / pown(''' + str(float(step_ratio)) + ''', step_ind);
tmp = (
_eval_step(data, x_input,
px, step_x * parameter_scalings_inv[px],
py, step_y * parameter_scalings_inv[py], x_tmp)
- _eval_step(data, x_input,
px, step_x * parameter_scalings_inv[px],
py, -step_y * parameter_scalings_inv[py], x_tmp)
- _eval_step(data, x_input,
px, -step_x * parameter_scalings_inv[px],
py, step_y * parameter_scalings_inv[py], x_tmp)
+ _eval_step(data, x_input,
px, -step_x * parameter_scalings_inv[px],
py, -step_y * parameter_scalings_inv[py], x_tmp)
) / (4 * step_x * step_y);
if(is_first_workitem){
step_evaluates[step_ind] = tmp;
}
}
}
}
'''
def _get_error_estimate_functions_cl(nmr_steps, nmr_convolutions,
richardson_coefficients):
func = '''
/**
* Apply a simple kernel convolution over the results from each row of steps.
*
* This applies a convolution starting from the starting step index that contained a valid move.
* It uses the mode 'reflect' to deal with outside points.
*
* Please note that this kernel is hard coded to work with 2nd order Taylor expansions derivatives only.
* Args:
* step_evaluates: the step evaluates for each row of the step sizes
* richardson_extrapolations: the array to place the convoluted results in
*/
void _apply_richardson_convolution(
global double* derivatives,
global double* richardson_extrapolations){
double convolution_kernel[''' + str(len(richardson_coefficients)) + '''] = {''' + \
', '.join(map(str, richardson_coefficients)) + '''};
for(uint step_ind = 0; step_ind < ''' + str(nmr_convolutions) + '''; step_ind++){
// convolute the Richardson coefficients
for(uint kernel_ind = 0; kernel_ind < ''' + str(len(richardson_coefficients)) + '''; kernel_ind++){
uint kernel_step_ind = step_ind + kernel_ind;
// reflect
if(kernel_step_ind >= ''' + str(nmr_steps) + '''){
kernel_step_ind -= 2 * (kernel_step_ind - ''' + str(nmr_steps) + ''') + 1;
}
richardson_extrapolations[step_ind] +=
derivatives[kernel_step_ind] * convolution_kernel[kernel_ind];
}
}
}
/**
* Compute the errors from using the Richardson extrapolation.
*
* "A neat trick to compute the statistical uncertainty in the estimate of our desired derivative is to use
* statistical methodology for that error estimate. While I do appreciate that there is nothing truly
* statistical or stochastic in this estimate, the approach still works nicely, providing a very
* reasonable estimate in practice. A three term Richardson-like extrapolant, then evaluated at four
* distinct values for \delta, will yield an estimate of the standard error of the constant term, with one
* spare degree of freedom. The uncertainty is then derived by multiplying that standard error by the
* appropriate percentile from the Students-t distribution."
*
* Cited from https://numdifftools.readthedocs.io/en/latest/src/numerical/derivest.html
*
* In addition to the error derived from the various estimates, we also approximate the numerical round-off
* errors in this method. All in all, the resulting errors should reflect the absolute error of the
* estimates.
*/
void _compute_richardson_errors(
global double* derivatives,
global double* richardson_extrapolations,
global double* errors){
// The magic number 12.7062... follows from the student T distribution with one dof.
// >>> import scipy.stats as ss
// >>> allclose(ss.t.cdf(12.7062047361747, 1), 0.975) # True
double fact = max(
(mot_float_type)''' + str(12.7062047361747 * np.sqrt(np.sum(richardson_coefficients**2))) + ''',
(mot_float_type)MOT_EPSILON * 10);
double tolerance;
double error;
for(uint conv_ind = 0; conv_ind < ''' + str(nmr_convolutions - 1) + '''; conv_ind++){
tolerance = max(fabs(richardson_extrapolations[conv_ind + 1]),
fabs(richardson_extrapolations[conv_ind])
) * MOT_EPSILON * fact;
error = fabs(richardson_extrapolations[conv_ind] - richardson_extrapolations[conv_ind + 1]) * fact;
if(error <= tolerance){
error += tolerance * 10;
}
else{
error += fabs(richardson_extrapolations[conv_ind] -
derivatives[''' + str(nmr_steps - nmr_convolutions + 1) + ''' + conv_ind]) * fact;
}
errors[conv_ind] = error;
}
}
'''
return func
def _derivation_kernel(objective_func, nmr_params, nmr_steps, step_ratio, parameter_transform_func):
coords = [(x, y) for x, y in itertools.combinations_with_replacement(range(nmr_params), 2)]
func = _get_compute_functions_cl(objective_func, nmr_params, nmr_steps, step_ratio, parameter_transform_func)
return SimpleCLFunction.from_string('''
void compute(local mot_float_type* parameters,
global float* parameter_scalings_inv,
global float* initial_step,
global double* step_evaluates,
local mot_float_type* x_tmp,
void* data){
double f_x_input = _calculate_function(data, parameters);
uint coords[''' + str(len(coords)) + '''][2] = {
''' + ', '.join('{{{}, {}}}'.format(*c) for c in coords) + '''
};
for(uint coord_ind = 0; coord_ind < ''' + str(len(coords)) + '''; coord_ind++){
_compute_steps(data, parameters, f_x_input, coords[coord_ind][0], coords[coord_ind][1],
step_evaluates + coord_ind * ''' + str(nmr_steps) + ''', parameter_scalings_inv,
initial_step, x_tmp);
}
}
''', dependencies=[SimpleCLCodeObject(func)])
def _richardson_error_kernel(nmr_steps, nmr_convolutions, richardson_coefficients):
func = _get_error_estimate_functions_cl(nmr_steps, nmr_convolutions, richardson_coefficients)
return SimpleCLFunction.from_string('''
void convolute(global double* derivatives, global double* richardson_extrapolations, global double* errors){
_apply_richardson_convolution(derivatives, richardson_extrapolations);
_compute_richardson_errors(derivatives, richardson_extrapolations, errors);
}
''', dependencies=[SimpleCLCodeObject(func)])
def _wynn_extrapolation_kernel(nmr_steps):
"""OpenCL kernel for extrapolating a slowly convergent sequence.
This algorithm, known in the Python Numdifftools as DEA3, attempts to extrapolate nonlinearly to a better
estimate of the sequence's limiting value, thus improving the rate of convergence. The routine is based on the
epsilon algorithm of P. Wynn [1].
References:
- [1] C. Brezinski and M. Redivo Zaglia (1991)
"Extrapolation Methods. Theory and Practice", North-Holland.
- [2] C. Brezinski (1977)
"Acceleration de la convergence en analyse numerique",
"Lecture Notes in Math.", vol. 584,
Springer-Verlag, New York, 1977.
- [3] E. J. Weniger (1989)
"Nonlinear sequence transformations for the acceleration of
convergence and the summation of divergent series"
Computer Physics Reports Vol. 10, 189 - 371
http://arxiv.org/abs/math/0306302v1
"""
return SimpleCLFunction.from_string('''
void compute(global double* derivatives, global double* extrapolations, global double* errors){
double v0, v1, v2;
double delta0, delta1;
double err0, err1;
double tol0, tol1;
double ss;
bool converged;
double result;
for(uint i = 0; i < ''' + str(nmr_steps - 2) + '''; i++){
v0 = derivatives[i];
v1 = derivatives[i + 1];
v2 = derivatives[i + 2];
delta0 = v1 - v0;
delta1 = v2 - v1;
err0 = fabs(delta0);
err1 = fabs(delta1);
tol0 = max(fabs(v0), fabs(v1)) * MOT_EPSILON;
tol1 = max(fabs(v1), fabs(v2)) * MOT_EPSILON;
// avoid division by zero and overflow
if(err0 < MOT_MIN){
delta0 = MOT_MIN;
}
if(err1 < MOT_MIN){
delta1 = MOT_MIN;
}
ss = 1.0 / delta1 - 1.0 / delta0 + MOT_MIN;
converged = ((err0 <= tol0) && (err1 <= tol1)) || (fabs(ss * v1) <= 1e-3);
result = (converged ? v2 : v1 + 1.0/ss);
extrapolations[i] = result;
errors[i] = err0 + err1 + (converged ? tol1 * 10 : fabs(result - v2));
}
}
''')