Source code for larray.core.ufuncs

# numpy ufuncs
# http://docs.scipy.org/doc/numpy/reference/routines.math.html
import numpy as np

from larray.core.array import Array, make_args_broadcastable


[docs]def wrap_elementwise_array_func(func, doc=None): r""" Wrap a function using numpy arrays to work with LArray arrays instead. Parameters ---------- func : function A function taking numpy arrays as arguments and returning numpy arrays of the same shape. If the function takes several arguments, this wrapping code assumes the result will have the combination of all axes present. In numpy talk, arguments will be broadcasted to each other. doc : str, optional The documentation (docstring) for the new function. Defaults to the documentation of the original function, if any. Returns ------- function A function taking larray.Array arguments and returning larray.Arrays. Examples -------- For example, if we want to apply the Hodrick-Prescott filter from statsmodels we can use this: >>> from statsmodels.tsa.filters.hp_filter import hpfilter # doctest: +SKIP >>> hpfilter = wrap_elementwise_array_func(hpfilter) # doctest: +SKIP hpfilter is now a function taking a one dimensional Array as input and returning a one dimensional Array as output Now let us suppose we have a ND array such as: >>> from larray.random import normal >>> arr = normal(axes="sex=M,F;year=2016..2018") # doctest: +SKIP >>> arr # doctest: +SKIP sex\year 2016 2017 2018 M -1.15 0.56 -1.06 F -0.48 -0.39 -0.98 We can apply an Hodrick-Prescott filter to it by using: >>> # 6.25 is the recommended smoothing value for annual data >>> cycle, trend = arr.apply(hpfilter, 6.25, axes="year") # doctest: +SKIP >>> trend # doctest: +SKIP sex\year 2016 2017 2018 M -0.61 -0.52 -0.52 F -0.37 -0.61 -0.87 """ def wrapper(*args, **kwargs): raw_bcast_args, raw_bcast_kwargs, res_axes = make_args_broadcastable(args, kwargs) # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those cases itself # via __array_wrap__ # There is a problem with np.clip though (and possibly other ufuncs): np.clip is roughly equivalent to # np.maximum(np.minimum(np.asarray(la), high), low) # the np.asarray(la) is problematic because it lose original labels # and then tries to get them back from high, where they are possibly # incomplete if broadcasting happened # It fails on "np.minimum(ndarray, Array)" because it calls __array_wrap__(high, result) which cannot work if # there was broadcasting involved (high has potentially less labels than result). # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__ res_data = func(*raw_bcast_args, **raw_bcast_kwargs) if res_axes: if isinstance(res_data, tuple): return tuple(Array(res_arr, res_axes) for res_arr in res_data) else: return Array(res_data, res_axes) else: return res_data # copy function name. We are intentionally not using functools.wraps, because it does not work for wrapping a # function from another module wrapper.__name__ = func.__name__ wrapper.__doc__ = func.__doc__ if doc is None else doc return wrapper
def wrap_numpy_func(func, doc=None): # update documentation by inserting a warning message after the short description of the numpy function # (otherwise the description of ufuncs given in the corresponding API 'autosummary' tables will always # start with 'larray specific variant of ...' without giving a meaningful description of what does the ufunc) if doc is None: if func.__doc__.startswith('\n'): # docstring starts with short description end_signature = 1 end_short_desc = func.__doc__.find('\n\n') else: # docstring starts with signature end_signature = func.__doc__.find('\n\n') + 2 end_short_desc = func.__doc__.find('\n\n', end_signature) short_desc = func.__doc__[:end_short_desc] numpy_doc = func.__doc__[end_short_desc:] ident = ' ' * (len(short_desc[end_signature:]) - len(short_desc[end_signature:].lstrip())) doc = f'{short_desc}\n\n{ident}larray specific variant of ``numpy.{func.__name__}``.\n\n' \ f'{ident}Documentation from numpy:{numpy_doc}' wrapper = wrap_elementwise_array_func(func, doc) # set __qualname__ explicitly (all these functions are supposed to be top-level function in the ufuncs module) wrapper.__qualname__ = func.__name__ # we should not copy __module__ return wrapper where = wrap_numpy_func(np.where, r""" where(condition, x, y) Return elements, either from `x` or `y`, depending on `condition`. Parameters ---------- condition : boolean Array When True, yield `x`, otherwise yield `y`. x, y : Array Values from which to choose. Returns ------- out : Array If both `x` and `y` are specified, the output array contains elements of `x` where `condition` is True, and elements from `y` elsewhere. Examples -------- >>> from larray import Array >>> arr = Array([[10, 7, 5, 9], ... [5, 8, 3, 7], ... [6, 2, 0, 9], ... [9, 10, 5, 6]], "a=a0..a3;b=b0..b3") >>> arr a\b b0 b1 b2 b3 a0 10 7 5 9 a1 5 8 3 7 a2 6 2 0 9 a3 9 10 5 6 Simple use >>> where(arr <= 5, 0, arr) a\b b0 b1 b2 b3 a0 10 7 0 9 a1 0 8 0 7 a2 6 0 0 9 a3 9 10 0 6 With broadcasting >>> mean_by_col = arr.mean('a') >>> mean_by_col b b0 b1 b2 b3 7.5 6.75 3.25 7.75 >>> # for each column, set values below the mean value to the mean value >>> where(arr < mean_by_col, mean_by_col, arr) a\b b0 b1 b2 b3 a0 10.0 7.0 5.0 9.0 a1 7.5 8.0 3.25 7.75 a2 7.5 6.75 3.25 9.0 a3 9.0 10.0 5.0 7.75 """) maximum = wrap_numpy_func(np.maximum, r""" maximum(x1, x2, out=None, dtype=None) Element-wise maximum of array elements. Compare two arrays and returns a new array containing the element-wise maxima. If one of the elements being compared is a NaN, then that element is returned. If both elements are NaNs then the first is returned. The latter distinction is important for complex NaNs, which are defined as at least one of the real or imaginary parts being a NaN. The net effect is that NaNs are propagated. Parameters ---------- x1, x2 : Array The arrays holding the elements to be compared. out : Array, optional An array into which the result is stored. dtype : data-type, optional Overrides the dtype of the output array. Returns ------- y : Array or scalar The maximum of `x1` and `x2`, element-wise. This is a scalar if both `x1` and `x2` are scalars. See Also -------- minimum : Element-wise minimum of two arrays, propagates NaNs. Notes ----- The maximum is equivalent to ``where(x1 >= x2, x1, x2)`` when neither x1 nor x2 are NaNs, but it is faster. Examples -------- >>> from larray import Array >>> arr1 = Array([[10, 7, 5, 9], ... [5, 8, 3, 7]], "a=a0,a1;b=b0..b3") >>> arr2 = Array([[6, 2, 9, 0], ... [9, 10, 5, 6]], "a=a0,a1;b=b0..b3") >>> arr1 a\b b0 b1 b2 b3 a0 10 7 5 9 a1 5 8 3 7 >>> arr2 a\b b0 b1 b2 b3 a0 6 2 9 0 a1 9 10 5 6 >>> maximum(arr1, arr2) a\b b0 b1 b2 b3 a0 10 7 9 9 a1 9 10 5 7 With broadcasting >>> arr2['a0'] b b0 b1 b2 b3 6 2 9 0 >>> maximum(arr1, arr2['a0']) a\b b0 b1 b2 b3 a0 10 7 9 9 a1 6 8 9 7 """) minimum = wrap_numpy_func(np.minimum, r""" minimum(x1, x2, out=None, dtype=None) Element-wise minimum of array elements. Compare two arrays and returns a new array containing the element-wise minima. If one of the elements being compared is a NaN, then that element is returned. If both elements are NaNs then the first is returned. The latter distinction is important for complex NaNs, which are defined as at least one of the real or imaginary parts being a NaN. The net effect is that NaNs are propagated. Parameters ---------- x1, x2 : Array The arrays holding the elements to be compared. out : Array, optional An array into which the result is stored. dtype : data-type, optional Overrides the dtype of the output array. Returns ------- y : Array or scalar The minimum of `x1` and `x2`, element-wise. This is a scalar if both `x1` and `x2` are scalars. See Also -------- maximum : Element-wise maximum of two arrays, propagates NaNs. Notes ----- The minimum is equivalent to ``where(x1 <= x2, x1, x2)`` when neither x1 nor x2 are NaNs, but it is faster. Examples -------- >>> from larray import Array >>> arr1 = Array([[10, 7, 5, 9], ... [5, 8, 3, 7]], "a=a0,a1;b=b0..b3") >>> arr2 = Array([[6, 2, 9, 0], ... [9, 10, 5, 6]], "a=a0,a1;b=b0..b3") >>> arr1 a\b b0 b1 b2 b3 a0 10 7 5 9 a1 5 8 3 7 >>> arr2 a\b b0 b1 b2 b3 a0 6 2 9 0 a1 9 10 5 6 >>> minimum(arr1, arr2) a\b b0 b1 b2 b3 a0 6 2 5 0 a1 5 8 3 6 With broadcasting >>> arr2['a0'] b b0 b1 b2 b3 6 2 9 0 >>> minimum(arr1, arr2['a0']) a\b b0 b1 b2 b3 a0 6 2 5 0 a1 5 2 3 0 """) def _generalized_isnan(arr, out=None, where=True, **kwargs): if isinstance(arr, np.ndarray) and arr.dtype.kind == 'O': if out is not None or where is not True or kwargs: raise ValueError("The 'out', 'where' and other keyword arguments " "are not supported for object arrays.") return arr != arr else: return np.isnan(arr, out=out, where=where, **kwargs) isnan = wrap_elementwise_array_func(_generalized_isnan, r""" Test element-wise for NaN and return result as a boolean array. Parameters ---------- x : array_like Input array. out : ndarray, None, or tuple of ndarray and None, optional A location into which the result is stored. If provided, it must have a shape that the inputs broadcast to. If not provided or None, a freshly-allocated array is returned. A tuple (possible only as a keyword argument) must have length equal to the number of outputs. where : array_like, optional This condition is broadcast over the input. At locations where the condition is True, the `out` array will be set to the ufunc result. Elsewhere, the `out` array will retain its original value. Note that if an uninitialized `out` array is created via the default ``out=None``, locations within it where the condition is False will remain uninitialized. **kwargs For other keyword-only arguments, see the :ref:`ufunc docs <ufuncs.kwargs>`. Returns ------- y : ndarray or bool True where ``x`` is NaN, false otherwise. This is a scalar if `x` is a scalar. See Also -------- isinf, isneginf, isposinf, isfinite, isnat Notes ----- Contrary to the numpy implementation, this function support object arrays. NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. Examples -------- >>> import larray as la >>> la.isnan(la.nan) True >>> la.isnan(la.inf) False >>> arr = la.Array([la.nan, 1, la.inf], ... la.Axis(3, 'values')) >>> la.isnan(arr) values* 0 1 2 True False False >>> arr = la.Array(['abc', 1, la.nan], ... la.Axis(3, 'values'), dtype=object) >>> la.isnan(arr) values* 0 1 2 False False True """) def _generalized_nan_to_num(arr, copy=True, nan=0, posinf=None, neginf=None): if isinstance(arr, np.ndarray) and arr.dtype.kind == 'O': import sys if posinf is None: posinf = sys.float_info.max if neginf is None: neginf = -sys.float_info.max res = arr.copy() if copy else arr is_nan_value = arr != arr is_pos_inf_value = arr == np.inf is_neg_inf_value = arr == -np.inf if isinstance(nan, np.ndarray): # each array argument is reshaped to a compatible shape for # broadcasting by larray machinery but not actually broadcasted yet nan = np.broadcast_to(nan, arr.shape)[is_nan_value] res[is_nan_value] = nan if isinstance(posinf, np.ndarray): posinf = np.broadcast_to(posinf, arr.shape)[is_pos_inf_value] res[is_pos_inf_value] = posinf if isinstance(neginf, np.ndarray): neginf = np.broadcast_to(neginf, arr.shape)[is_neg_inf_value] res[is_neg_inf_value] = neginf return res else: return np.nan_to_num(arr, copy=copy, nan=nan, posinf=posinf, neginf=neginf) nan_to_num = wrap_elementwise_array_func(_generalized_nan_to_num,r""" Replace NaN with zero and infinity with large finite numbers (default behaviour) or with the numbers defined by the user using the `nan`, `posinf` and/or `neginf` keywords. If `x` is inexact or an object array, NaN is replaced by zero or by the user defined value in `nan` keyword, infinity is replaced by the largest finite floating point value representable by ``x.dtype`` or by the user defined value in `posinf` keyword and -infinity is replaced by the most negative finite floating point value representable by ``x.dtype`` or by the user defined value in `neginf` keyword. For complex dtypes, the above is applied to each of the real and imaginary components of `x` separately. If `x` is not inexact or object, then no replacements are made. Parameters ---------- x : scalar or array_like Input data. copy : bool, optional Whether to create a copy of `x` (True) or to replace values in-place (False). The in-place operation only occurs if casting to an array does not require a copy. Default is True. nan : int, float or array_like, optional Value to be used to fill NaN values. If no value is passed then NaN values will be replaced with 0.0. posinf : int, float, optional Value to be used to fill positive infinity values. If no value is passed then positive infinity values will be replaced with the largest finite floating point value representable by ``x.dtype``. neginf : int, float, optional Value to be used to fill negative infinity values. If no value is passed then negative infinity values will be replaced with the most negative finite floating point value representable by ``x.dtype``. Returns ------- out : Array or scalar `x`, with the non-finite values replaced. If `copy` is False, this may be `x` itself. See Also -------- isinf : Shows which elements are positive or negative infinity. isneginf : Shows which elements are negative infinity. isposinf : Shows which elements are positive infinity. isnan : Shows which elements are Not a Number (NaN). isfinite : Shows which elements are finite (not NaN, not infinity) Notes ----- Contrary to the numpy implementation, this function support object arrays. NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. Examples -------- >>> import larray as la >>> la.nan_to_num(la.inf) 1.7976931348623157e+308 >>> la.nan_to_num(-la.inf) -1.7976931348623157e+308 >>> la.nan_to_num(np.nan) 0.0 >>> x = la.Array([-la.inf, 1, la.nan, 2, la.inf], la.Axis(5, 'values')) >>> la.nan_to_num(x) values* 0 1 2 3 4 -1.7976931348623157e+308 1.0 0.0 2.0 1.7976931348623157e+308 >>> la.nan_to_num(x, nan=-1, posinf=999, neginf=-999) values* 0 1 2 3 4 -999.0 1.0 -1.0 2.0 999.0 >>> x = la.Array([1, 'abc', la.nan, 2], la.Axis(4, 'values'), dtype=object) >>> la.nan_to_num(x) values* 0 1 2 3 1 abc 0 2 >>> y = la.Array([complex(la.inf, la.nan), la.nan, complex(la.nan, la.inf)], ... la.Axis(3, 'values')) >>> la.nan_to_num(y) values* 0 1 2 (1.7976931348623157e+308+0j) 0j 1.7976931348623157e+308j """ )