Source code for larray.core.array

"""
Array class.
"""

# ? implement multi group in one axis getitem: lipro['P01,P02;P05'] <=> (lipro['P01,P02'], lipro['P05'])

# * we need an API to get to the "next" label. Sometimes, we want to use label+1, but that is problematic when labels
#   are not numeric, or have not a step of 1.
#       X.agegroup[X.agegroup.after(25):]
#       X.agegroup[X.agegroup[25].next():]

# * implement keepaxes=True for _group_aggregate instead of/in addition to group tuples

# ? implement newaxis

# * Axis.sequence? geo.seq('A31', 'A38') (equivalent to geo['A31..A38'])

# ? re-implement row_totals/col_totals? or what do we do with them?

# * time specific API so that we know if we go for a subclass or not

# * data alignment in arithmetic methods

# * test structured arrays

# * use larray "utils" in LIAM2 (to avoid duplicated code)

from itertools import product, chain, groupby
from collections.abc import Iterable, Sequence
from pathlib import Path
import builtins
import functools
import warnings

from typing import Any, Union, Tuple, List

import numpy as np
import pandas as pd

try:
    import xlwings as xw
except ImportError:
    xw = None

try:
    from numpy import nanprod as np_nanprod
except ImportError:
    np_nanprod = None

from larray.core.abstractbases import ABCArray
from larray.core.constants import nan, inf
from larray.core.metadata import Metadata
from larray.core.expr import ExprNode
from larray.core.group import (Group, IGroup, LGroup, _to_key, _to_keys,
                               _translate_sheet_name, _translate_group_key_hdf)
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis         # noqa: F401
from larray.core.plot import PlotObject
from larray.util.misc import (table2str, size2str, ReprString,
                              float_error_handler_factory, light_product, common_dtype,
                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
                              Repeater, Product, ensure_no_numpy_type, exactly_one, concatenate_ndarrays)
from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
from larray.util.types import Scalar


def all(values, axis=None) -> Union['Array', Scalar]:
    r"""
    Test whether all array elements along a given axis evaluate to True.

    See Also
    --------
    Array.all
    """
    if isinstance(values, Array):
        return values.all(axis)
    else:
        return builtins.all(values)


def any(values, axis=None) -> Union['Array', Scalar]:
    r"""
    Test whether any array elements along a given axis evaluate to True.

    See Also
    --------
    Array.any
    """
    if isinstance(values, Array):
        return values.any(axis)
    else:
        return builtins.any(values)


# commutative modulo float precision errors
def sum(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Sum of array elements.

    See Also
    --------
    Array.sum
    """
    # XXX: we might want to be more aggressive here (more types to convert), however, generators should still be
    #      computed via the builtin.
    if isinstance(array, (np.ndarray, list)):
        array = Array(array)
    if isinstance(array, Array):
        return array.sum(*args, **kwargs)
    else:
        return builtins.sum(array, *args, **kwargs)


def prod(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Product of array elements.

    See Also
    --------
    Array.prod
    """
    return array.prod(*args, **kwargs)


def cumsum(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Return the cumulative sum of array elements.

    See Also
    --------
    Array.cumsum
    """
    return array.cumsum(*args, **kwargs)


def cumprod(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Return the cumulative product of array elements.

    See Also
    --------
    Array.cumprod
    """
    return array.cumprod(*args, **kwargs)


def min(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Minimum of array elements.

    See Also
    --------
    Array.min
    """
    if isinstance(array, Array):
        return array.min(*args, **kwargs)
    else:
        return builtins.min(array, *args, **kwargs)


def max(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Maximum of array elements.

    See Also
    --------
    Array.max
    """
    if isinstance(array, Array):
        return array.max(*args, **kwargs)
    else:
        return builtins.max(array, *args, **kwargs)


def mean(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Compute the arithmetic mean.

    See Also
    --------
    Array.mean
    """
    return array.mean(*args, **kwargs)


def median(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Compute the median.

    See Also
    --------
    Array.median
    """
    return array.median(*args, **kwargs)


def percentile(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Compute the qth percentile of the data along the specified axis.

    See Also
    --------
    Array.percentile
    """
    return array.percentile(*args, **kwargs)


# not commutative
def ptp(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Return the range of values (maximum - minimum).

    See Also
    --------
    Array.ptp
    """
    return array.ptp(*args, **kwargs)


def var(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Compute the variance.

    See Also
    --------
    Array.var
    """
    return array.var(*args, **kwargs)


def std(array, *args, **kwargs) -> Union['Array', Scalar]:
    r"""
    Compute the standard deviation.

    See Also
    --------
    Array.std
    """
    return array.std(*args, **kwargs)


def concat(arrays, axis=0, dtype=None):
    r"""Concatenate arrays along axis.

    Parameters
    ----------
    arrays : tuple of Array
        Arrays to concatenate.
    axis : axis reference (int, str or Axis), optional
        Axis along which to concatenate. All arrays must have that axis. Defaults to the first axis.
    dtype : dtype, optional
        Result data type. Defaults to the "closest" type which can hold all arrays types without loss of information.

    Returns
    -------
    Array

    Examples
    --------
    >>> arr1 = ndtest((2, 3))
    >>> arr1
    a\b  b0  b1  b2
     a0   0   1   2
     a1   3   4   5
    >>> arr2 = ndtest('a=a0,a1;b=b3')
    >>> arr2
    a\b  b3
     a0   0
     a1   1
    >>> arr3 = ndtest('b=b4,b5')
    >>> arr3
    b  b4  b5
        0   1
    >>> concat((arr1, arr2, arr3), 'b')
    a\b  b0  b1  b2  b3  b4  b5
     a0   0   1   2   0   0   1
     a1   3   4   5   1   0   1
    """
    # Get axis by name, so that we do *NOT* check they are "compatible", because it makes sense to append axes of
    # different length
    name = arrays[0].axes[axis].name
    arrays_labels = [array.axes[axis].labels for array in arrays]

    # switch to object dtype if labels are of incompatible types, so that we do not implicitly convert numeric types to
    # strings (numpy should not do this in the first place but that is another story). This can happen for example when
    # we want to add a "total" tick to a numeric axis (eg age).
    combined_axis = Axis(concatenate_ndarrays(arrays_labels), name)

    # combine all axes (using labels from any side if any)
    result_axes = arrays[0].axes.replace(axis, combined_axis).union(*[array.axes - axis for array in arrays[1:]])

    if dtype is None:
        dtype = common_dtype(arrays)

    result = empty(result_axes, dtype=dtype)
    start = 0
    for labels, array in zip(arrays_labels, arrays):
        stop = start + len(labels)
        result[combined_axis.i[start:stop]] = array
        start = stop
    return result


class ArrayIterator:
    __slots__ = ('__next__',)

    def __init__(self, array):
        data_iter = iter(array.data)
        next_data_func = data_iter.__next__
        res_axes = array.axes[1:]
        # this case should not happen (handled by the fastpath in Array.__iter__)
        assert len(res_axes) > 0  # noqa: S101

        def next_func():
            return Array(next_data_func(), res_axes)

        self.__next__ = next_func

    def __iter__(self):
        return self


# TODO: rename to ArrayIndexIndexer or something like that
# TODO: the first slice in the example below should be documented
class ArrayPositionalIndexer:
    r"""
    Allows selection of a subset using indices of labels.

    Notes
    -----
    Using .i[] is equivalent to numpy indexing when indexing along a single axis. However, when indexing along multiple
    axes this indexes the cross product instead of points.

    Examples
    --------
    >>> arr = ndtest((2, 3, 4))
    >>> arr
     a  b\c  c0  c1  c2  c3
    a0   b0   0   1   2   3
    a0   b1   4   5   6   7
    a0   b2   8   9  10  11
    a1   b0  12  13  14  15
    a1   b1  16  17  18  19
    a1   b2  20  21  22  23

    >>> arr.i[:, 0:2, [0, 2]]
     a  b\c  c0  c2
    a0   b0   0   2
    a0   b1   4   6
    a1   b0  12  14
    a1   b1  16  18
    """

    __slots__ = ('array',)

    def __init__(self, array):
        self.array = array

    def __getitem__(self, key):
        array = self.array
        ndim = array.ndim
        full_scalar_key = (
            (isinstance(key, (int, np.integer)) and ndim == 1)
            or (isinstance(key, tuple) and len(key) == ndim and all(isinstance(k, (int, np.integer)) for k in key))
        )
        # fast path when the result is a scalar
        if full_scalar_key:
            return array.data[key]
        else:
            return array.__getitem__(key, translate_key=False)

    def __setitem__(self, key, value):
        array = self.array
        ndim = array.ndim
        full_scalar_key = (
            (isinstance(key, (int, np.integer)) and ndim == 1)
            or (isinstance(key, tuple) and len(key) == ndim and all(isinstance(k, (int, np.integer)) for k in key))
        )
        # fast path when setting a single cell
        if full_scalar_key:
            array.data[key] = value
        else:
            array.__setitem__(key, value, translate_key=False)

    def __len__(self):
        return len(self.array)

    def __iter__(self):
        array = self.array
        # fast path for 1D arrays (where we return scalars)
        if array.ndim <= 1:
            return iter(array.data)
        else:
            return ArrayIterator(array)


class ArrayPointsIndexer:
    r"""
    Allows selection of arbitrary items in the array based on their N-dimensional label index.

    Examples
    --------
    >>> arr = ndtest((2, 3, 4))
    >>> arr
     a  b\c  c0  c1  c2  c3
    a0   b0   0   1   2   3
    a0   b1   4   5   6   7
    a0   b2   8   9  10  11
    a1   b0  12  13  14  15
    a1   b1  16  17  18  19
    a1   b2  20  21  22  23

    To select the two points with label coordinates
    [a0, b0, c0] and [a1, b2, c2], you must do:

    >>> arr.points[['a0', 'a1'], ['b0', 'b2'], ['c0', 'c2']]
    a_b_c  a0_b0_c0  a1_b2_c2
                  0        22
    >>> arr.points['a0,a1', 'b0,b2', 'c0,c2']
    a_b_c  a0_b0_c0  a1_b2_c2
                  0        22

    The number of label(s) on each dimension must be equal:

    >>> arr.points['a0,a1', 'b0,b2', 'c0,c1,c2']  # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
        ...
    ValueError: all combined keys should have the same length
    """

    __slots__ = ('array',)

    def __init__(self, array):
        self.array = array

    def __getitem__(self, key):
        return self.array.__getitem__(key, points=True)

    def __setitem__(self, key, value):
        self.array.__setitem__(key, value, points=True)


# TODO: add support for slices
#     To select the first 4 values across all axes:
#
#     >>> arr.iflat[:4]
#     a_b  a0_b0  a0_b1  a0_b2  a1_b0
#              0     10     20     30
class ArrayFlatIndicesIndexer:
    r"""
    Access the array by index as if it was flat (one dimensional) and all its axes were combined.

    Notes
    -----
    In general arr.iflat[key] should be equivalent to (but much faster than) arr.combine_axes().i[key]

    Examples
    --------
    >>> arr = ndtest((2, 3)) * 10
    >>> arr
    a\b  b0  b1  b2
     a0   0  10  20
     a1  30  40  50

    To select the first, second, fourth and fifth values across all axes:

    >>> arr.combine_axes().i[[0, 1, 3, 4]]
    a_b  a0_b0  a0_b1  a1_b0  a1_b1
             0     10     30     40
    >>> arr.iflat[[0, 1, 3, 4]]
    a_b  a0_b0  a0_b1  a1_b0  a1_b1
             0     10     30     40

    Set the first and sixth values to 42

    >>> arr.iflat[[0, 5]] = 42
    >>> arr
    a\b  b0  b1  b2
     a0  42  10  20
     a1  30  40  42

    When the key is an Array, the result will have the axes of the key

    >>> key = Array([0, 3], 'c=c0,c1')
    >>> key
    c  c0  c1
        0   3
    >>> arr.iflat[key]
    c  c0  c1
       42  30
    """

    __slots__ = ('array',)

    def __init__(self, array):
        self.array = array

    def __getitem__(self, flat_key, sep='_'):
        if isinstance(flat_key, ABCArray):
            flat_np_key = flat_key.data
            res_axes = flat_key.axes
        else:
            flat_np_key = np.asarray(flat_key)
            res_axes = self.array.axes._combined_iflat(flat_np_key, sep=sep)
        return Array(self.array.data.flat[flat_np_key], res_axes)

    def __setitem__(self, flat_key, value):
        # np.ndarray.flat is a flatiter object but it is indexable despite the name
        self.array.data.flat[flat_key] = value

    def __len__(self):
        return self.array.size


# TODO: rename to ArrayIndexPointsIndexer or something like that
# TODO: show that we need to use a "full slice" for leaving the dimension alone
# TODO: document explicitly that axes should be in the correct order and missing axes should be slice None
# (except at the end)
class ArrayPositionalPointsIndexer:
    r"""
    Allows selection of arbitrary items in the array based on their N-dimensional index.

    Examples
    --------
    >>> arr = ndtest((2, 3, 4))
    >>> arr
     a  b\c  c0  c1  c2  c3
    a0   b0   0   1   2   3
    a0   b1   4   5   6   7
    a0   b2   8   9  10  11
    a1   b0  12  13  14  15
    a1   b1  16  17  18  19
    a1   b2  20  21  22  23

    To select the two points with index coordinates
    [0, 0, 0] and [1, 2, 2], you must do:

    >>> arr.ipoints[[0, 1], [0, 2], [0, 2]]
    a_b_c  a0_b0_c0  a1_b2_c2
                  0        22

    The number of index(es) on each dimension must be equal:

    >>> arr.ipoints[[0, 1], [0, 2], [0, 1, 2]]  # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
        ...
    ValueError: all combined keys should have the same length

    >>> arr.ipoints[[0, 1], [0, 2]]
    a_b\c  c0  c1  c2  c3
    a0_b0   0   1   2   3
    a1_b2  20  21  22  23
    """

    __slots__ = ('array',)

    def __init__(self, array):
        self.array = array

    def __getitem__(self, key):
        return self.array.__getitem__(key, translate_key=False, points=True)

    def __setitem__(self, key, value):
        self.array.__setitem__(key, value, translate_key=False, points=True)


def get_axis(obj, i):
    r"""
    Return an axis according to its position.

    Parameters
    ----------
    obj : Array or other array
        Input Array or any array object which has a shape attribute (NumPy or Pandas array).
    i : int
        index of the axis.

    Returns
    -------
    Axis
        Axis corresponding to the given index if input `obj` is an Array. A new anonymous Axis with the length of
        the ith dimension of the input `obj` otherwise.

    Examples
    --------
    >>> arr = ndtest((2, 2, 2))
    >>> arr
     a  b\c  c0  c1
    a0   b0   0   1
    a0   b1   2   3
    a1   b0   4   5
    a1   b1   6   7
    >>> get_axis(arr, 1)
    Axis(['b0', 'b1'], 'b')
    >>> np_arr = np.zeros((2, 2, 2))
    >>> get_axis(np_arr, 1)
    Axis(2, None)
    """
    return obj.axes[i] if isinstance(obj, Array) else Axis(obj.shape[i])


_arg_agg = {
    'q': """
        q : int in range of [0,100] (or sequence of floats)
            Percentile to compute, which must be between 0 and 100 inclusive."""
}

_kwarg_agg = {
    'dtype': {'value': None, 'doc': """
        dtype : dtype, optional
            The data type of the returned array. Defaults to None (the dtype of the input array)."""},
    'out': {'value': None, 'doc': """
        out : Array, optional
            Alternate output array in which to place the result. It must have the same shape as the expected output and
            its type is preserved (e.g., if dtype(out) is float, the result will consist of 0.0's and 1.0's).
            Axes and labels can be different, only the shape matters. Defaults to None (create a new array)."""},
    'ddof': {'value': 1, 'doc': """
        ddof : int, optional
            "Delta Degrees of Freedom": the divisor used in the calculation is ``N - ddof``, where ``N`` represents
            the number of elements. Defaults to 1."""},
    'skipna': {'value': None, 'doc': """
        skipna : bool, optional
            Whether to skip NaN (null) values. If False, resulting cells will be NaN if any of the aggregated
            cells is NaN. Defaults to True."""},
    'keepaxes': {'value': False, 'doc': """
        keepaxes : bool or label-like, optional
            Whether reduced axes are left in the result as dimensions with size one.
            If True, reduced axes will contain a unique label representing the applied aggregation
            (e.g. 'sum', 'prod', ...). It is possible to override this label by passing a specific value
            (e.g. keepaxes='summation'). Defaults to False."""},
    'method': {'value': 'linear', 'doc': """
        method : str, optional
            This parameter specifies the method to use for estimating the
            percentile when the desired percentile lies between two indexes.
            The different methods supported are described in the Notes section. The options are:
                * 'inverted_cdf'
                * 'averaged_inverted_cdf'
                * 'closest_observation'
                * 'interpolated_inverted_cdf'
                * 'hazen'
                * 'weibull'
                * 'linear'  (default)
                * 'median_unbiased'
                * 'normal_unbiased'
                * 'lower'
                * 'higher'
                * 'midpoint'
                * 'nearest'
            The first three and last four methods are discontinuous. Defaults to 'linear'."""}
}

PERCENTILE_NOTES = """Notes
        -----
        Given a vector ``V`` of length ``n``, the q-th percentile of ``V`` is
        the value ``q/100`` of the way from the minimum to the maximum in a
        sorted copy of ``V``. The values and distances of the two nearest
        neighbors as well as the `method` parameter will determine the
        percentile if the normalized ranking does not match the location of
        ``q`` exactly. This function is the same as the median if ``q=50``, the
        same as the minimum if ``q=0`` and the same as the maximum if
        ``q=100``.
    
        The optional `method` parameter specifies the method to use when the
        desired percentile lies between two indexes ``i`` and ``j = i + 1``.
        In that case, we first determine ``i + g``, a virtual index that lies
        between ``i`` and ``j``, where  ``i`` is the floor and ``g`` is the
        fractional part of the index. The final result is, then, an interpolation
        of ``a[i]`` and ``a[j]`` based on ``g``. During the computation of ``g``,
        ``i`` and ``j`` are modified using correction constants ``alpha`` and
        ``beta`` whose choices depend on the ``method`` used. Finally, note that
        since Python uses 0-based indexing, the code subtracts another 1 from the
        index internally.
    
        The following formula determines the virtual index ``i + g``, the location
        of the percentile in the sorted sample:
    
        .. math::
            i + g = (q / 100) * ( n - alpha - beta + 1 ) + alpha
    
        The different methods then work as follows
    
        inverted_cdf:
            method 1 of H&F [1]_.
            This method gives discontinuous results:
    
            * if g > 0 ; then take j
            * if g = 0 ; then take i
    
        averaged_inverted_cdf:
            method 2 of H&F [1]_.
            This method give discontinuous results:
    
            * if g > 0 ; then take j
            * if g = 0 ; then average between bounds
    
        closest_observation:
            method 3 of H&F [1]_.
            This method give discontinuous results:
    
            * if g > 0 ; then take j
            * if g = 0 and index is odd ; then take j
            * if g = 0 and index is even ; then take i
    
        interpolated_inverted_cdf:
            method 4 of H&F [1]_.
            This method give continuous results using:
    
            * alpha = 0
            * beta = 1
    
        hazen:
            method 5 of H&F [1]_.
            This method give continuous results using:
    
            * alpha = 1/2
            * beta = 1/2
    
        weibull:
            method 6 of H&F [1]_.
            This method give continuous results using:
    
            * alpha = 0
            * beta = 0
    
        linear:
            method 7 of H&F [1]_.
            This method give continuous results using:
    
            * alpha = 1
            * beta = 1
    
        median_unbiased:
            method 8 of H&F [1]_.
            This method is probably the best method if the sample
            distribution function is unknown (see reference).
            This method give continuous results using:
    
            * alpha = 1/3
            * beta = 1/3
    
        normal_unbiased:
            method 9 of H&F [1]_.
            This method is probably the best method if the sample
            distribution function is known to be normal.
            This method give continuous results using:
    
            * alpha = 3/8
            * beta = 3/8
    
        lower:
            NumPy method kept for backwards compatibility.
            Takes ``i`` as the interpolation point.
    
        higher:
            NumPy method kept for backwards compatibility.
            Takes ``j`` as the interpolation point.
    
        nearest:
            NumPy method kept for backwards compatibility.
            Takes ``i`` or ``j``, whichever is nearest.
    
        midpoint:
            NumPy method kept for backwards compatibility.
            Uses ``(i + j) / 2``."""


def _doc_agg_method(func, by=False, long_name='', action_verb='perform', extra_args=(), kwargs=()):
    if not long_name:
        long_name = func.__name__

    _args = ','.join(extra_args) + ', ' if len(extra_args) > 0 else ''
    _kwargs = ', '.join([f"{k}={_kwarg_agg[k]['value']!r}" for k in kwargs]) + ', ' if len(kwargs) > 0 else ''
    signature = f'{func.__name__}({_args}*axes_and_groups, {_kwargs}**explicit_axes)'

    if by:
        specific_template = """The {long_name} is {action_verb}ed along all axes except the given one(s).
            For groups, {long_name} is {action_verb}ed along groups and non associated axes."""
    else:
        specific_template = "Axis(es) or group(s) along which the {long_name} is {action_verb}ed."
    doc_specific = specific_template.format(long_name=long_name, action_verb=action_verb)

    doc_args = "".join(_arg_agg[arg] for arg in extra_args)
    doc_kwargs = "".join(_kwarg_agg[kw]['doc'] for kw in kwargs)
    doc_varargs = fr"""
        \*axes_and_groups : None or int or str or Axis or Group or any combination of those
            {doc_specific}
            The default (no axis or group) is to {action_verb} the {long_name} over all the dimensions of the input
            array.

            An axis can be referred by:

            * its index (integer). Index can be a negative integer, in which case it counts from the last to the
              first axis.
            * its name (str or AxisReference). You can use either a simple string ('axis_name') or the special
              variable X (X.axis_name).
            * a variable (Axis). If the axis has been defined previously and assigned to a variable, you can pass it as
              argument.

            You may not want to {action_verb} the {long_name} over a whole axis but over a selection of specific
            labels. To do so, you have several possibilities:

            * (['a1', 'a3', 'a5'], 'b1, b3, b5') : labels separated by commas in a list or a string
            * ('a1:a5:2') : select labels using a slice (general syntax is 'start:end:step' where is 'step' is
              optional and 1 by default).
            * (a='a1, a2, a3', X.b['b1, b2, b3']) : in case of possible ambiguity, i.e. if labels can belong to more
              than one axis, you must precise the axis.
            * ('a1:a3; a5:a7', b='b0,b2; b1,b3') : create several groups with semicolons.
              Names are simply given by the concatenation of labels (here: 'a1,a2,a3', 'a5,a6,a7', 'b0,b2' and 'b1,b3')
            * ('a1:a3 >> a123', 'b[b0,b2] >> b12') : operator ' >> ' allows to rename groups."""
    parameters = f"""Parameters
        ----------{doc_args}{doc_varargs}{doc_kwargs}"""
    func.__doc__ = func.__doc__.format(signature=signature, parameters=parameters, percentile_notes=PERCENTILE_NOTES)


_always_return_float = {np.mean, np.nanmean, np.median, np.nanmedian, np.percentile, np.nanpercentile,
                        np.std, np.nanstd, np.var, np.nanvar}

obj_isnan = np.vectorize(lambda x: x != x, otypes=[bool])


def element_equal(a1, a2, rtol=0, atol=0, nan_equals=False):
    warnings.warn("element_equal() is deprecated. Use array1.eq(array2, rtol, atol, nan_equals) instead.",
                  FutureWarning, stacklevel=2)
    a1 = asarray(a1)
    return a1.eq(a2, rtol, atol, nan_equals)


def nan_equal(a1, a2):
    warnings.warn("nan_equal() is deprecated. Use array1.eq(array2, nans_equal=True) instead.",
                  FutureWarning, stacklevel=2)
    return a1.eq(a2, nans_equal=True)


def _handle_meta(meta, title):
    """
    Make sure meta is either None or a Metadata instance.
    """
    if title is not None:
        if meta is None:
            meta = Metadata()
        warnings.warn("title argument is deprecated. Please use meta argument instead", FutureWarning, stacklevel=2)
        meta['title'] = title
    if meta is None or isinstance(meta, Metadata):
        return meta
    # XXX: move this test in Metadata.__init__?
    if not isinstance(meta, (list, dict)):
        raise TypeError(f"Expected None, list of pairs, dict or Metadata object "
                        f"instead of {type(meta).__name__}")
    return Metadata(meta)

# This prevents a warning in Pandas 1.4 <= version < 2.0 for arrays with object
# dtype which contain only numeric values. We force Pandas 2.0 behavior
# (ie use object dtype instead of inferring). See issue #1061.
def np_array_to_pd_index(array, name=None, tupleize_cols=True):
    dtype = None if array.dtype.kind != 'O' else object
    return pd.Index(array, dtype=dtype, name=name, tupleize_cols=tupleize_cols)


[docs]class Array(ABCArray):
    r"""
    An Array object represents a multidimensional, homogeneous array of fixed-size items with labeled axes.

    The function :func:`asarray` can be used to convert a NumPy array or Pandas DataFrame into an Array.

    Parameters
    ----------
    data : scalar, tuple, list or NumPy ndarray
        Input data.
    axes : collection (tuple, list or AxisCollection) of axes (int, str or Axis), optional
        Axes.
    title : str, optional
        Deprecated. See 'meta' below.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.
    dtype : type, optional
        Datatype for the array. Defaults to None (inferred from the data).

    Attributes
    ----------
    data : NumPy ndarray
        Data.
    axes : AxisCollection
        Axes.
    meta : Metadata
        Metadata (title, description, author, creation_date, ...) associated with the array.

    See Also
    --------
    sequence : Create an Array by sequentially applying modifications to the array along axis.
    ndtest : Create a test Array with increasing elements.
    zeros : Create an Array, each element of which is zero.
    ones : Create an Array, each element of which is 1.
    full : Create an Array filled with a given value.
    empty : Create an Array, but leave its allocated memory unchanged (i.e., it contains “garbage”).

    Warnings
    --------
    Metadata is not kept when actions or methods are applied on an array
    except for operations modifying the object in-place, such as: `pop[age < 10] = 0`.
    Do not add metadata to an array if you know you will apply actions or methods
    on it before dumping it.

    Examples
    --------
    >>> age = Axis([10, 11, 12], 'age')
    >>> sex = Axis('sex=M,F')
    >>> time = Axis([2007, 2008, 2009], 'time')
    >>> axes = [age, sex, time]
    >>> data = np.zeros((len(axes), len(sex), len(time)))

    >>> Array(data, axes)
    age  sex\time  2007  2008  2009
     10         M   0.0   0.0   0.0
     10         F   0.0   0.0   0.0
     11         M   0.0   0.0   0.0
     11         F   0.0   0.0   0.0
     12         M   0.0   0.0   0.0
     12         F   0.0   0.0   0.0
    >>> # with metadata
    >>> arr = Array(data, axes, meta=Metadata(title='my title', author='John Smith'))

    Array creation functions

    >>> full(axes, 10.0)
    age  sex\time  2007  2008  2009
     10         M  10.0  10.0  10.0
     10         F  10.0  10.0  10.0
     11         M  10.0  10.0  10.0
     11         F  10.0  10.0  10.0
     12         M  10.0  10.0  10.0
     12         F  10.0  10.0  10.0
    >>> arr = empty(axes)
    >>> arr['F'] = 1.0
    >>> arr['M'] = -1.0
    >>> arr
    age  sex\time  2007  2008  2009
     10         M  -1.0  -1.0  -1.0
     10         F   1.0   1.0   1.0
     11         M  -1.0  -1.0  -1.0
     11         F   1.0   1.0   1.0
     12         M  -1.0  -1.0  -1.0
     12         F   1.0   1.0   1.0
    >>> bysex = sequence(sex, initial=-1, inc=2)
    >>> bysex
    sex   M  F
         -1  1
    >>> sequence(age, initial=10, inc=bysex)
    sex\age  10  11  12
          M  10   9   8
          F  10  11  12
    """

    __slots__ = ('data', 'axes', '_meta')

[docs]    def __init__(self, data, axes=None, title=None, meta=None, dtype=None):
        data = np.asarray(data, dtype=dtype)
        ndim = data.ndim
        if axes is None:
            axes = AxisCollection(data.shape)
        else:
            if not isinstance(axes, AxisCollection):
                axes = AxisCollection(axes)
            if axes.ndim != ndim:
                raise ValueError(f"number of axes ({axes.ndim}) does not match "
                                 f"number of dimensions of data ({ndim})")
            if axes.shape != data.shape:
                raise ValueError(f"length of axes {axes.shape} does not match "
                                 f"data shape {data.shape}")

        self.data = data
        self.axes = axes

        if meta is not None or title is not None:
            meta = _handle_meta(meta, title)
        self._meta = meta

    @property
    def title(self) -> str:
        warnings.warn("title attribute is deprecated. Please use meta.title instead", FutureWarning, stacklevel=2)
        return self._meta.title if self._meta is not None and 'title' in self._meta else None

    @title.setter
    def title(self, title):
        warnings.warn("title attribute is deprecated. Please use meta.title instead", FutureWarning, stacklevel=2)
        if not isinstance(title, str):
            raise TypeError(f"Expected string value, got {type(title).__name__}")
        self._meta.title = title

    @property
    def meta(self) -> Metadata:
        r"""Return metadata of the array.

        Returns
        -------
        Metadata:
            Metadata of the array.
        """
        if self._meta is None:
            self._meta = Metadata()
        return self._meta

    @meta.setter
    def meta(self, meta):
        self._meta = _handle_meta(meta, None)

    # TODO: rename to inonzero and implement a label version of nonzero
    # TODO: implement wildcard argument to avoid producing the combined labels
[docs]    def nonzero(self) -> Tuple[IGroup, ...]:
        r"""
        Return the indices of the elements that are non-zero.

        Specifically, it returns a tuple of arrays (one for each dimension)
        containing the indices of the non-zero elements in that dimension.

        Returns
        -------
        tuple of arrays : tuple
            Indices of elements that are non-zero.

        Examples
        --------
        >>> arr = ndtest((2, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> cond = arr > 1
        >>> cond
        a\b     b0     b1    b2
         a0  False  False  True
         a1   True   True  True
        >>> a, b = cond.nonzero()
        >>> a
        a.i[a_b  a0_b2  a1_b0  a1_b1  a1_b2
                 0      1      1      1]
        >>> b
        b.i[a_b  a0_b2  a1_b0  a1_b1  a1_b2
                 2      0      1      2]
        >>> # equivalent to arr[cond]
        >>> arr[cond.nonzero()]
        a_b  a0_b2  a1_b0  a1_b1  a1_b2
                 2      3      4      5
        """
        # the next step will be to return a Grid instead so that cond.nonzero() *displays*
        # (however it is stored!) as something like:

        # option a)

        # a_b  a0_b2  a1_b0  a1_b1  a1_b2
        #      a0,b2  a1,b0  a1,b1  a1,b2

        # PRO: * result axes are the same as grid axes
        # CON: * does not support getting the indexing for one axis (or at least it does not make it obvious that it
        #        is supported)
        #      * in the case of ambiguous labels (same label on several axes), this is not explicit enough

        # OR

        # option b)

        # source_axis\a_b  a0_b2  a1_b0  a1_b1  a1_b2
        #               a     a0     a1     a1     a1
        #               b     b2     b0     b1     b2

        # in the presence of duplicate labels on the same axis (e.g. assuming we replace 'b2' by a duplicate 'b1' label)

        # source_axis\a_b  a0_b1#1  a1_b0  a1_b1#0  a1_b1#1
        #               a       a0     a1       a1       a1
        #               b     b1#1     b0     b1#0     b1#1

        # OR

        # option c)

        # a_b\source_axis   a   b
        #           a0_b2  a0  b2
        #           a1_b0  a1  b0
        #           a1_b1  a1  b1
        #           a1_b2  a1  b2

        # Notes
        # -----
        # dtypes of a and b column can be different but since we probably only store indices, we will not even need
        # an LFrame so this shouldn't be a problem.
        ikey = self.data.nonzero()
        la_key = self.axes._adv_keys_to_combined_axis_la_keys(ikey)
        return tuple(IGroup(axis_key, axis=axis) for axis_key, axis in zip(la_key, self.axes))

[docs]    def set_axes(self, axes_to_replace=None, new_axis=None, inplace=False, **kwargs) -> 'Array':
        r"""
        Replace one, several or all axes of the array.

        Parameters
        ----------
        axes_to_replace : axis ref or dict {axis ref: axis} or list of (tuple or Axis) or AxisCollection
            Axes to replace. If a single axis reference is given, the `new_axis` argument must be provided.
            If a list of Axis or an AxisCollection is given, all axes will be replaced by the new ones.
            In that case, the number of new axes must match the number of the old ones.
            If a list of tuple is given, it must be pairs of (reference to old axis, new axis).
        new_axis : Axis, optional
            New axis if `axes_to_replace` contains a single axis reference.
        inplace : bool, optional
            Whether to modify the original object or return a new array and leave the original intact.
            Defaults to False.
        **kwargs : Axis
            New axis for each axis to replace given as a keyword argument.

        Returns
        -------
        Array
            Array with axes replaced.

        See Also
        --------
        rename : rename one of several axes

        Examples
        --------
        >>> arr = ndtest((2, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> row = Axis(['r0', 'r1'], 'row')
        >>> column = Axis(['c0', 'c1', 'c2'], 'column')

        Replace one axis (second argument `new_axis` must be provided)

        >>> arr.set_axes('a', row)
        row\b  b0  b1  b2
           r0   0   1   2
           r1   3   4   5

        Replace several axes (keywords, list of tuple or dictionary)

        >>> arr.set_axes(a=row, b=column) # doctest: +SKIP
        >>> # or
        >>> arr.set_axes([('a', row), ('b', column)]) # doctest: +SKIP
        >>> # or
        >>> arr.set_axes({'a': row, 'b': column})
        row\column  c0  c1  c2
                r0   0   1   2
                r1   3   4   5

        Replace all axes (list of axes or AxisCollection)

        >>> arr.set_axes([row, column])
        row\column  c0  c1  c2
                r0   0   1   2
                r1   3   4   5
        >>> arr2 = ndtest([row, column])
        >>> arr.set_axes(arr2.axes)
        row\column  c0  c1  c2
                r0   0   1   2
                r1   3   4   5
        """
        new_axes = self.axes.replace(axes_to_replace, new_axis, **kwargs)
        if inplace:
            if new_axes.ndim != self.ndim:
                raise ValueError(f"number of axes ({new_axes.ndim}) does not match number of dimensions "
                                 f"of data ({self.ndim})")
            if new_axes.shape != self.data.shape:
                raise ValueError(f"length of axes {new_axes.shape} does not match data shape {self.data.shape}")
            self.axes = new_axes
            return self
        else:
            return Array(self.data, new_axes)

    with_axes = renamed_to(set_axes, 'with_axes', raise_error=True)

    def __getattr__(self, key) -> Axis:
        if key in self.axes:
            return self.axes[key]
        else:
            class_name = self.__class__.__name__
            raise AttributeError(f"'{class_name}' object has no attribute '{key}'")

    # needed to make *un*pickling work (because otherwise, __getattr__ is called before .axes exists, which leads to
    # an infinite recursion)
    def __getstate__(self):
        return self.data, self.axes, self._meta

    def __setstate__(self, d):
        self.data, self.axes, self._meta = d

    def __dir__(self):
        axis_names = set(axis.name for axis in self.axes if axis.name is not None)
        attributes = self.__slots__
        return list(set(dir(self.__class__)) | set(attributes) | axis_names)

    def _ipython_key_completions_(self):
        return list(chain(*[list(labels) for labels in self.axes.labels]))

    @lazy_attribute
    def i(self) -> ArrayPositionalIndexer:
        return ArrayPositionalIndexer(self)
    i.__doc__ = ArrayPositionalIndexer.__doc__

    @lazy_attribute
    def points(self) -> ArrayPointsIndexer:
        return ArrayPointsIndexer(self)
    points.__doc__ = ArrayPointsIndexer.__doc__

    @lazy_attribute
    def ipoints(self) -> ArrayPositionalPointsIndexer:
        return ArrayPositionalPointsIndexer(self)
    ipoints.__doc__ = ArrayPositionalPointsIndexer.__doc__

[docs]    def to_frame(self, fold_last_axis_name=False, dropna=None) -> pd.DataFrame:
        r"""
        Convert an Array into a Pandas DataFrame.

        Parameters
        ----------
        fold_last_axis_name : bool, optional
            Defaults to False.
        dropna : {'any', 'all', None}, optional

            * any : if any NA values are present, drop that label
            * all : if all values are NA, drop that label
            * None by default.

        Returns
        -------
        Pandas DataFrame

        Notes
        -----
        Since pandas does not provide a way to handle metadata (yet), all metadata associated with
        the array will be lost.

        Examples
        --------
        >>> arr = ndtest((2, 2, 2))
        >>> arr
         a  b\c  c0  c1
        a0   b0   0   1
        a0   b1   2   3
        a1   b0   4   5
        a1   b1   6   7
        >>> arr.to_frame()                                                             # doctest: +NORMALIZE_WHITESPACE
        c      c0  c1
        a  b
        a0 b0   0   1
           b1   2   3
        a1 b0   4   5
           b1   6   7
        >>> arr.to_frame(fold_last_axis_name=True)                                     # doctest: +NORMALIZE_WHITESPACE
                c0  c1
        a  b\c
        a0 b0    0   1
           b1    2   3
        a1 b0    4   5
           b1    6   7
        """
        last_name = self.axes[-1].name
        columns_name = None if fold_last_axis_name else last_name
        columns = np_array_to_pd_index(self.axes[-1].labels, name=columns_name)
        if self.ndim > 1:
            axes_names = self.axes.names[:-1]
            if fold_last_axis_name:
                tmp = axes_names[-1] if axes_names[-1] is not None else ''
                if last_name:
                    axes_names[-1] = f"{tmp}\\{last_name}"
            if self.ndim == 2:
                index = np_array_to_pd_index(self.axes[0].labels, name=axes_names[0])
            else:
                index = pd.MultiIndex.from_product(self.axes.labels[:-1], names=axes_names)
        else:
            index = pd.Index([''])
            if fold_last_axis_name:
                index.name = self.axes.names[-1]
        data = np.asarray(self).reshape((len(index), len(columns)))
        df = pd.DataFrame(data, index, columns)
        if dropna is not None:
            dropna = dropna if dropna is not True else 'all'
            df.dropna(inplace=True, how=dropna)
        return df
    df = property(to_frame)

[docs]    def to_series(self, name=None, dropna=False) -> pd.Series:
        r"""
        Convert an Array into a Pandas Series.

        Parameters
        ----------
        name : str, optional
            Name of the series. Defaults to None.
        dropna : bool, optional.
            False by default.

        Returns
        -------
        Pandas Series

        Notes
        -----
        Since pandas does not provide a way to handle metadata (yet), all metadata associated with
        the array will be lost.

        Examples
        --------
        >>> arr = ndtest((2, 3), dtype=float)
        >>> arr
        a\b   b0   b1   b2
         a0  0.0  1.0  2.0
         a1  3.0  4.0  5.0
        >>> arr.to_series() # doctest: +NORMALIZE_WHITESPACE
        a   b
        a0  b0    0.0
            b1    1.0
            b2    2.0
        a1  b0    3.0
            b1    4.0
            b2    5.0
        dtype: float64

        Set a name

        >>> arr.to_series('my_name') # doctest: +NORMALIZE_WHITESPACE
                a   b
        a0  b0    0.0
            b1    1.0
            b2    2.0
        a1  b0    3.0
            b1    4.0
            b2    5.0
        Name: my_name, dtype: float64

        Drop NaN values

        >>> arr['b1'] = nan
        >>> arr
        a\b   b0   b1   b2
         a0  0.0  nan  2.0
         a1  3.0  nan  5.0
        >>> arr.to_series(dropna=True) # doctest: +NORMALIZE_WHITESPACE
        a   b
        a0  b0    0.0
            b2    2.0
        a1  b0    3.0
            b2    5.0
        dtype: float64
        """
        if self.ndim == 0:
            raise ValueError('cannot convert 0D array to Series')
        elif self.ndim == 1:
            axis = self.axes[0]
            # Note that string labels will be converted to object dtype in the process
            # and label arrays with object dtype containing only numeric values will keep
            # the object dtype.
            index = np_array_to_pd_index(axis.labels, name=axis.name, tupleize_cols=False)
        else:
            index = pd.MultiIndex.from_product(self.axes.labels, names=self.axes.names)
        series = pd.Series(self.data.reshape(-1), index, name=name)
        if dropna:
            series.dropna(inplace=True)
        return series
    series = property(to_series)

[docs]    def describe(self, *args, percentiles=None) -> 'Array':
        r"""
        Descriptive summary statistics, excluding NaN values.

        By default, it includes the number of non-NaN values, the mean, standard deviation, minimum, maximum and
        the 25, 50 and 75 percentiles.

        Parameters
        ----------
        *args : int or str or Axis or Group or any combination of those, optional
            Axes or groups along which to compute the aggregates. Defaults to aggregate over the whole array.
        percentiles : array-like, optional.
            List of integer percentiles to include. Defaults to [25, 50, 75].

        Returns
        -------
        Array

        See Also
        --------
        Array.describe_by

        Examples
        --------
        >>> arr = Array([0, 6, 2, 5, 4, 3, 1, 3], 'year=2013..2020')
        >>> arr
        year  2013  2014  2015  2016  2017  2018  2019  2020
                 0     6     2     5     4     3     1     3
        >>> arr.describe()
        statistic  count  mean  std  min   25%  50%   75%  max
                     8.0   3.0  2.0  0.0  1.75  3.0  4.25  6.0
        >>> arr.describe(percentiles=[50, 90])
        statistic  count  mean  std  min  50%  90%  max
                     8.0   3.0  2.0  0.0  3.0  5.3  6.0
        """
        if percentiles is None:
            percentiles = [25, 50, 75]

        # TODO: we should use the commented code below to compute all percentiles in one shot but this does not work
        #       when *args is not empty (see https://github.com/larray-project/larray/issues/192)
        # return stack({
        #     ...,
        #     **arr.percentile(percentiles, *args).set_labels({p: f'{p}%' for p in percentiles}),
        #     ...
        # }, 'statistic')
        return stack({
            # Note that np.isnan works as well as la.isnan thanks to __array_wrap__
            # and using it avoids having a cyclic import
            'count': (~np.isnan(self)).sum(*args),
            'mean': self.mean(*args),
            'std': self.std(*args),
            'min': self.min(*args),
            **{f'{p}%': self.percentile(p, *args) for p in percentiles},
            'max': self.max(*args)
        }, 'statistic')

[docs]    def describe_by(self, *args, percentiles=None) -> 'Array':
        r"""
        Descriptive summary statistics, excluding NaN values, along axes or for groups.

        By default, it includes the number of non-NaN values, the mean, standard deviation, minimum, maximum and
        the 25, 50 and 75 percentiles.

        Parameters
        ----------
        *args : int or str or Axis or Group or any combination of those, optional
            Axes or groups to include in the result after aggregating. Defaults to aggregate over the whole array.
        percentiles : array-like, optional.
            list of integer percentiles to include. Defaults to [25, 50, 75].

        Returns
        -------
        Array

        See Also
        --------
        Array.describe

        Examples
        --------
        >>> data = [[0, 6, 3, 5, 4, 2, 1, 3], [7, 5, 3, 2, 8, 5, 6, 4]]
        >>> arr = Array(data, 'gender=Male,Female;year=2013..2020').astype(float)
        >>> arr
        gender\year  2013  2014  2015  2016  2017  2018  2019  2020
               Male   0.0   6.0   3.0   5.0   4.0   2.0   1.0   3.0
             Female   7.0   5.0   3.0   2.0   8.0   5.0   6.0   4.0
        >>> arr.describe_by('gender')
        gender\statistic  count  mean  std  min   25%  50%   75%  max
                    Male    8.0   3.0  2.0  0.0  1.75  3.0  4.25  6.0
                  Female    8.0   5.0  2.0  2.0  3.75  5.0  6.25  8.0
        >>> arr.describe_by('gender', (X.year[:2015], X.year[2018:]))
        gender  year\statistic  count  mean  std  min  25%  50%  75%  max
          Male           :2015    3.0   3.0  3.0  0.0  1.5  3.0  4.5  6.0
          Male           2018:    3.0   2.0  1.0  1.0  1.5  2.0  2.5  3.0
        Female           :2015    3.0   5.0  2.0  3.0  4.0  5.0  6.0  7.0
        Female           2018:    3.0   5.0  1.0  4.0  4.5  5.0  5.5  6.0
        >>> arr.describe_by('gender', percentiles=[50, 90])
        gender\statistic  count  mean  std  min  50%  90%  max
                    Male    8.0   3.0  2.0  0.0  3.0  5.3  6.0
                  Female    8.0   5.0  2.0  2.0  5.0  7.3  8.0
        """
        args = self._prepare_aggregate(None, args)
        args = self._by_args_to_normal_agg_args(args)
        return self.describe(*args, percentiles=percentiles)

[docs]    def value_counts(self):
        """
        Count number of occurrences of each unique value in array.

        Returns
        -------
        Array of ints
            The number of occurrences of each unique value in the input array.

        See Also
        --------
        Array.unique

        Examples
        --------
        >>> arr = Array([5, 2, 5, 5, 2, 3, 7], "a=a0..a6")
        >>> arr
        a  a0  a1  a2  a3  a4  a5  a6
            5   2   5   5   2   3   7
        >>> arr.value_counts()
        value  2  3  5  7
               2  1  3  1
        """
        unq, counts = np.unique(self.data, return_counts=True)
        return Array(counts, Axis(unq, 'value'))

    # noinspection PyAttributeOutsideInit
    # def __array_finalize__(self, obj):
    #     """
    #     used when arrays are allocated from subclasses of ndarrays
    #     """
    #     return np.ndarray.__array_finalize__(self.data, obj)

    # def __array_prepare__(self, arr, context=None):
    #     """
    #     called before ufuncs (must return an ndarray)
    #     """
    #     return np.ndarray.__array_prepare__(self.data, arr, context)

    def __array_wrap__(self, out_arr, context=None) -> 'Array':
        r"""
        Called after numpy ufuncs. This is never called during our wrapped
        ufuncs, but if somebody uses raw numpy function, this works in some
        cases.
        """
        data = np.ndarray.__array_wrap__(self.data, out_arr, context)
        return Array(data, self.axes)

    def __bool__(self):
        return bool(self.data)

    # TODO: either support a list (of axes names) as first argument here (and set_labels)
    #       or don't support that in set_axes
[docs]    def rename(self, renames=None, to=None, inplace=False, **kwargs) -> 'Array':
        r"""Rename axes of the array.

        Parameters
        ----------
        renames : axis ref or dict {axis ref: str} or list of tuple (axis ref, str)
            Rename to apply. If a single axis reference is given, the `to` argument must be used.
        to : str or Axis
            New name if `renames` contains a single axis reference.
        **kwargs : str or Axis
            New name for each axis given as a keyword argument.

        Returns
        -------
        Array
            Array with axes renamed.

        See Also
        --------
        set_axes : replace one or several axes

        Examples
        --------
        >>> nat = Axis('nat=BE,FO')
        >>> sex = Axis('sex=M,F')
        >>> arr = ndtest([nat, sex])
        >>> arr
        nat\sex  M  F
             BE  0  1
             FO  2  3
        >>> arr.rename(nat, 'nat2')
        nat2\sex  M  F
              BE  0  1
              FO  2  3
        >>> arr.rename(nat='nat2', sex='sex2')
        nat2\sex2  M  F
               BE  0  1
               FO  2  3
        >>> arr.rename([('nat', 'nat2'), ('sex', 'sex2')])
        nat2\sex2  M  F
               BE  0  1
               FO  2  3
        >>> arr.rename({'nat': 'nat2', 'sex': 'sex2'})
        nat2\sex2  M  F
               BE  0  1
               FO  2  3
        """
        axes = self.axes.rename(renames, to, **kwargs)
        if inplace:
            self.axes = axes
            return self
        else:
            return Array(self.data, axes)

[docs]    def reindex(self, axes_to_reindex=None, new_axis=None, fill_value=nan, inplace=False, **kwargs) -> 'Array':
        r"""Reorder and/or add new labels in axes.

        Place NaN or given `fill_value` in locations having no value previously.

        Parameters
        ----------
        axes_to_reindex : axis ref or dict {axis ref: axis} or list of (axis ref, axis) or sequence of Axis
            Axis(es) to reindex. If a single axis reference is given, the `new_axis` argument must be provided.
            If string, Group or Axis object, the corresponding axis is reindexed if found among existing,
            otherwise a new axis is added.
            If a list of Axis or an AxisCollection is given, existing axes are reindexed while missing ones are added.
        new_axis : int, str, list/tuple/array of str, Group or Axis, optional
            List of new labels or new axis if `axes_to_reindex` contains a single axis reference.
        fill_value : scalar or Array, optional
            Value used to fill cells corresponding to label combinations which were not present before reindexing.
            Defaults to NaN.
        inplace : bool, optional
            Whether to modify the original object or return a new array and leave the original intact.
            Defaults to False.
        **kwargs : Axis
            New axis for each axis to reindex given as a keyword argument.

        Returns
        -------
        Array
            Array with reindexed axes.

        Notes
        -----
        When introducing NaNs into an array containing integers via reindex,
        all data will be promoted to float in order to store the NaNs.

        Examples
        --------
        >>> arr = ndtest((2, 2))
        >>> arr
        a\b  b0  b1
         a0   0   1
         a1   2   3
        >>> arr2 = ndtest('a=a1,a2;c=c0;b=b2..b0')
        >>> arr2
         a  c\b  b2  b1  b0
        a1   c0   0   1   2
        a2   c0   3   4   5

        Reindex an axis by passing labels (list or string)

        >>> arr.reindex('b', ['b1', 'b2', 'b0'])
        a\b   b1   b2   b0
         a0  1.0  nan  0.0
         a1  3.0  nan  2.0
        >>> arr.reindex('b', 'b0..b2', fill_value=-1)
        a\b  b0  b1  b2
         a0   0   1  -1
         a1   2   3  -1
        >>> arr.reindex(b='b=b0..b2', fill_value=-1)
        a\b  b0  b1  b2
         a0   0   1  -1
         a1   2   3  -1

        Reindex using an axis from another array

        >>> arr.reindex('b', arr2.b, fill_value=-1)
        a\b  b2  b1  b0
         a0  -1   1   0
         a1  -1   3   2

        Reindex using a subset of an axis

        >>> arr.reindex('b', arr2.b['b1':], fill_value=-1)
        a\b  b1  b0
         a0   1   0
         a1   3   2

        Reindex by passing an axis or a group

        >>> arr.reindex('b=b2..b0', fill_value=-1)
        a\b  b2  b1  b0
         a0  -1   1   0
         a1  -1   3   2
        >>> arr.reindex(arr2.b, fill_value=-1)
        a\b  b2  b1  b0
         a0  -1   1   0
         a1  -1   3   2
        >>> arr.reindex(arr2.b['b1':], fill_value=-1)
        a\b  b1  b0
         a0   1   0
         a1   3   2

        Reindex several axes

        >>> arr.reindex({'a': arr2.a, 'b': arr2.b}, fill_value=-1)
        a\b  b2  b1  b0
         a1  -1   3   2
         a2  -1  -1  -1
        >>> arr.reindex({'a': arr2.a, 'b': arr2.b['b1':]}, fill_value=-1)
        a\b  b1  b0
         a1   3   2
         a2  -1  -1
        >>> arr.reindex(a=arr2.a, b=arr2.b, fill_value=-1)
        a\b  b2  b1  b0
         a1  -1   3   2
         a2  -1  -1  -1

        Reindex by passing a collection of axes

        >>> arr.reindex(arr2.axes, fill_value=-1)
         a  b\c  c0
        a1   b2  -1
        a1   b1   3
        a1   b0   2
        a2   b2  -1
        a2   b1  -1
        a2   b0  -1
        >>> arr2.reindex(arr.axes, fill_value=-1)
         a  c\b  b0  b1
        a0   c0  -1  -1
        a1   c0   2   1
        """
        def labels_def_and_name_to_axis(labels_def, axis_name=None):
            # TODO: the rename functionality seems weird to me.
            #       I think we should either raise an error if the axis name
            #       is different (force using new_axis=other_axis.labels instead
            #       of new_axis=other_axis) OR do not do use the old name
            #       (and make sure this effectively does a rename).
            #       it might have been the unintended consequence of supporting a
            #       list of labels as new_axis
            axis = labels_def if isinstance(labels_def, Axis) else Axis(labels_def)
            return axis.rename(axis_name) if axis_name is not None else axis

        def axis_ref_to_axis(axes, axis_ref):
            if isinstance(axis_ref, Axis) or is_axis_ref(axis_ref):
                return axes[axis_ref]
            else:
                raise TypeError(
                    "In Array.reindex, source axes must be Axis objects or axis references ('axis name', "
                    "X.axis_name or axis_integer_position) but got object of "
                    f"type {type(axis_ref).__name__} instead."
                )

        def is_axis_ref(axis_ref):
            return isinstance(axis_ref, (int, str, AxisReference))

        def is_axis_def(axis_def):
            return ((isinstance(axis_def, str) and '=' in axis_def)
                    or isinstance(axis_def, Group))

        if new_axis is None:
            if isinstance(axes_to_reindex, Axis) and not isinstance(axes_to_reindex, AxisReference):
                axes_to_reindex = {axes_to_reindex: axes_to_reindex}
            elif is_axis_def(axes_to_reindex):
                axis = Axis(axes_to_reindex)
                axes_to_reindex = {axis: axis}
            elif is_axis_ref(axes_to_reindex):
                raise TypeError("In Array.reindex, when using an axis reference ('axis name', X.axis_name or "
                                "axis_integer_position) as axes_to_reindex, you must provide a value for `new_axis`.")
            # otherwise axes_to_reindex should be None (when kwargs are used),
            # a dict or a sequence of axes
            # axes_to_reindex can be None when kwargs are used
            assert (axes_to_reindex is None or
                    isinstance(axes_to_reindex, (tuple, list, dict, AxisCollection)))
        else:
            if not (isinstance(axes_to_reindex, Axis) or is_axis_ref(axes_to_reindex)):
                raise TypeError(
                    "In Array.reindex, when `new_axis` is used, `axes_to_reindex` "
                    "must be an Axis object or an axis reference ('axis name', "
                    f"X.axis_name or axis_integer_position) but got {axes_to_reindex} "
                    f"(which is of type {type(axes_to_reindex).__name__}) instead."
                )
            axes_to_reindex = {axes_to_reindex: new_axis}
            new_axis = None

        if isinstance(axes_to_reindex, (list, tuple)):
            axes_to_reindex = AxisCollection(axes_to_reindex)

        assert new_axis is None
        assert axes_to_reindex is None or isinstance(axes_to_reindex, (dict, AxisCollection))

        if isinstance(axes_to_reindex, AxisCollection):
            # | axes_to_reindex is needed because axes_to_reindex can contain more axes than self.axes
            res_axes = AxisCollection([axes_to_reindex.get(axis, axis) for axis in self.axes]) | axes_to_reindex
        else:
            # TODO: move this to AxisCollection.replace
            if isinstance(axes_to_reindex, dict):
                new_axes_to_reindex = {}
                for k, v in axes_to_reindex.items():
                    src_axis = axis_ref_to_axis(self.axes, k)
                    dst_axis = labels_def_and_name_to_axis(v, src_axis.name)
                    new_axes_to_reindex[src_axis] = dst_axis
                axes_to_reindex = new_axes_to_reindex

            res_axes = self.axes.replace(axes_to_reindex, **kwargs)
        res = full(res_axes, fill_value, dtype=common_dtype((self.data, fill_value)))

        def get_group(res_axes, self_axis):
            res_axis = res_axes[self_axis]
            if res_axis.equals(self_axis):
                return self_axis[:]
            else:
                return self_axis[self_axis.intersection(res_axis).labels]
        self_groups = tuple(get_group(res_axes, axis) for axis in self.axes)
        res_groups = tuple(res_axes[group.axis][group] for group in self_groups)
        res[res_groups] = self[self_groups]
        if inplace:
            self.axes = res.axes
            self.data = res.data
            return self
        else:
            return res

[docs]    def align(self, other, join='outer', fill_value=nan, axes=None) -> Tuple['Array', 'Array']:
        r"""Align two arrays on their axes with the specified join method.

        In other words, it ensure all common axes are compatible. Those arrays can then be used in binary operations.

        Parameters
        ----------
        other : Array-like
        join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
            Join method. For each axis common to both arrays:
              - outer: will use a label if it is in either arrays axis (ordered like the first array).
                       This is the default as it results in no information loss.
              - inner: will use a label if it is in both arrays axis (ordered like the first array).
              - left: will use the first array axis labels.
              - right: will use the other array axis labels.
              - exact: instead of aligning, raise an error when axes to be aligned are not equal.
        fill_value : scalar or Array, optional
            Value used to fill cells corresponding to label combinations which are not common to both arrays.
            Defaults to NaN.
        axes : AxisReference or sequence of them, optional
            Axes to align. Need to be valid in both arrays. Defaults to None (all common axes). This must be specified
            when mixing anonymous and non-anonymous axes.

        Returns
        -------
        (left, right) : (Array, Array)
            Aligned objects

        Notes
        -----
            Arrays with anonymous axes are currently not supported.

        Examples
        --------
        >>> arr1 = ndtest((2, 3))
        >>> arr1
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr2 = -ndtest((3, 2))
        >>> # reorder array to make the test more interesting
        >>> arr2 = arr2[['b1', 'b0']]
        >>> arr2
        a\b  b1  b0
         a0  -1   0
         a1  -3  -2
         a2  -5  -4

        Align arr1 and arr2

        >>> aligned1, aligned2 = arr1.align(arr2)
        >>> aligned1
        a\b   b0   b1   b2
         a0  0.0  1.0  2.0
         a1  3.0  4.0  5.0
         a2  nan  nan  nan
        >>> aligned2
        a\b    b0    b1   b2
         a0   0.0  -1.0  nan
         a1  -2.0  -3.0  nan
         a2  -4.0  -5.0  nan

        After aligning all common axes, one can then do operations between the two arrays

        >>> aligned1 + aligned2
        a\b   b0   b1   b2
         a0  0.0  0.0  nan
         a1  1.0  1.0  nan
         a2  nan  nan  nan

        Other kinds of joins are supported

        >>> aligned1, aligned2 = arr1.align(arr2, join='inner')
        >>> aligned1
        a\b   b0   b1
         a0  0.0  1.0
         a1  3.0  4.0
        >>> aligned2
        a\b    b0    b1
         a0   0.0  -1.0
         a1  -2.0  -3.0
        >>> aligned1, aligned2 = arr1.align(arr2, join='left')
        >>> aligned1
        a\b   b0   b1   b2
         a0  0.0  1.0  2.0
         a1  3.0  4.0  5.0
        >>> aligned2
        a\b    b0    b1   b2
         a0   0.0  -1.0  nan
         a1  -2.0  -3.0  nan
        >>> aligned1, aligned2 = arr1.align(arr2, join='right')
        >>> aligned1
        a\b   b1   b0
         a0  1.0  0.0
         a1  4.0  3.0
         a2  nan  nan
        >>> aligned2
        a\b    b1    b0
         a0  -1.0   0.0
         a1  -3.0  -2.0
         a2  -5.0  -4.0

        The fill value for missing labels defaults to nan but can be changed to any compatible value.

        >>> aligned1, aligned2 = arr1.align(arr2, fill_value=0)
        >>> aligned1
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
         a2   0   0   0
        >>> aligned2
        a\b  b0  b1  b2
         a0   0  -1   0
         a1  -2  -3   0
         a2  -4  -5   0
        >>> aligned1 + aligned2
        a\b  b0  b1  b2
         a0   0   0   2
         a1   1   1   5
         a2  -4  -5   0

        It also works when either arrays (or both) have extra axes

        >>> arr3 = ndtest((3, 2, 2))
        >>> arr1
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr3
         a  b\c  c0  c1
        a0   b0   0   1
        a0   b1   2   3
        a1   b0   4   5
        a1   b1   6   7
        a2   b0   8   9
        a2   b1  10  11
        >>> aligned1, aligned2 = arr1.align(arr3, join='inner')
        >>> aligned1
        a\b   b0   b1
         a0  0.0  1.0
         a1  3.0  4.0
        >>> aligned2
         a  b\c   c0   c1
        a0   b0  0.0  1.0
        a0   b1  2.0  3.0
        a1   b0  4.0  5.0
        a1   b1  6.0  7.0
        >>> aligned1 + aligned2
         a  b\c    c0    c1
        a0   b0   0.0   1.0
        a0   b1   3.0   4.0
        a1   b0   7.0   8.0
        a1   b1  10.0  11.0

        One can also align only some specific axes (but in that case arrays might not be compatible)

        >>> aligned1, aligned2 = arr1.align(arr2, axes='b')
        >>> aligned1
        a\b   b0   b1   b2
         a0  0.0  1.0  2.0
         a1  3.0  4.0  5.0
        >>> aligned2
        a\b    b0    b1   b2
         a0   0.0  -1.0  nan
         a1  -2.0  -3.0  nan
         a2  -4.0  -5.0  nan

        Test if two arrays are aligned

        >>> arr1.align(arr2, join='exact')   # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
        ...
        ValueError: Both arrays are not aligned because align method with join='exact'
        expected Axis(['a0', 'a1'], 'a') to be equal to Axis(['a0', 'a1', 'a2'], 'a')
        """
        other = asarray(other)
        # reindex does not currently support anonymous axes
        if any(name is None for name in self.axes.names) or any(name is None for name in other.axes.names):
            raise ValueError("arrays with anonymous axes are currently not supported by Array.align")
        try:
            left_axes, right_axes = self.axes.align(other.axes, join=join, axes=axes)
        except ValueError as e:
            raise ValueError(f"Both arrays are not aligned because {e}")
        return self.reindex(left_axes, fill_value=fill_value), other.reindex(right_axes, fill_value=fill_value)

[docs]    @deprecate_kwarg('reverse', 'ascending', {True: False, False: True})
    def sort_values(self, key=None, axis=None, ascending=True) -> 'Array':
        r"""Sort values of the array.

        Parameters
        ----------
        key : scalar or tuple or Group
            Key along which to sort. Must have exactly one dimension less than ndim.
            Cannot be used in combination with `axis` argument.
            If both `key` and `axis` are None, sort array with all axes combined.
            Defaults to None.
        axis : int or str or Axis
            Axis along which to sort. Cannot be used in combination with `key` argument.
            Defaults to None.
        ascending : bool, optional
            Sort values in ascending order. Defaults to True.

        Returns
        -------
        Array
            Array with sorted values.

        Examples
        --------
        sort the whole array (no key or axis given)

        >>> arr_1D = Array([10, 2, 4], 'a=a0..a2')
        >>> arr_1D
        a  a0  a1  a2
           10   2   4
        >>> arr_1D.sort_values()
        a  a1  a2  a0
            2   4  10
        >>> arr_2D = Array([[10, 2, 4], [3, 7, 1]], 'a=a0,a1; b=b0..b2')
        >>> arr_2D
        a\b  b0  b1  b2
         a0  10   2   4
         a1   3   7   1
        >>> # if the array has more than one dimension, sort array with all axes combined
        >>> arr_2D.sort_values()
        a_b  a1_b2  a0_b1  a1_b0  a0_b2  a1_b1  a0_b0
                 1      2      3      4      7     10

        Sort along a given key

        >>> # sort columns according to the values of the row associated with the label 'a1'
        >>> arr_2D.sort_values('a1')
        a\b  b2  b0  b1
         a0   4  10   2
         a1   1   3   7
        >>> arr_2D.sort_values('a1', ascending=False)
        a\b  b1  b0  b2
         a0   2  10   4
         a1   7   3   1
        >>> arr_3D = Array([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]],
        ...            'a=a0,a1; b=b0,b1; c=c0..c2')
        >>> arr_3D
         a  b\c  c0  c1  c2
        a0   b0  10   2   4
        a0   b1   3   7   1
        a1   b0   5   1   6
        a1   b1   2   8   9
        >>> # sort columns according to the values of the row associated with the labels 'a0' and 'b1'
        >>> arr_3D.sort_values(('a0', 'b1'))
         a  b\c  c2  c0  c1
        a0   b0   4  10   2
        a0   b1   1   3   7
        a1   b0   6   5   1
        a1   b1   9   2   8

        Sort along an axis

        >>> arr_2D
        a\b  b0  b1  b2
         a0  10   2   4
         a1   3   7   1
        >>> # sort values along axis 'a'
        >>> # equivalent to sorting the values of each column of the array
        >>> arr_2D.sort_values(axis='a')
        a*\b  b0  b1  b2
           0   3   2   1
           1  10   7   4
        >>> # sort values along axis 'b'
        >>> # equivalent to sorting the values of each row of the array
        >>> arr_2D.sort_values(axis='b')
        a\b*  0  1   2
          a0  2  4  10
          a1  1  3   7
        """
        if key is not None and axis is not None:
            raise ValueError("Arguments key and axis are exclusive and cannot be used in combination")
        if axis is not None:
            axis = self.axes[axis]
            axis_idx = self.axes.index(axis)
            data = np.sort(self.data, axis_idx)
            new_axes = self.axes.replace(axis_idx, Axis(len(axis), axis.name))
            res = Array(data, new_axes)
        elif key is not None:
            subset = self[key]
            if subset.ndim > 1:
                raise NotImplementedError("sort_values key must have one dimension less than array.ndim")
            assert subset.ndim == 1
            axis = subset.axes[0]
            indicesofsorted = subset.indicesofsorted()

            # FIXME: .data shouldn't be necessary, but currently, if we do not do it, we get
            # IGroup(nat  EU  FO  BE
            #              1   2   0, axis='nat')
            # which sorts the *data* correctly, but the labels on the nat axis are not sorted
            # (because the __getitem__ in that case reuse the key axis as-is -- like it should).
            # Both use cases have value, but I think reordering the ticks should be the default.
            # Now, I am unsure where to change this. Probably in IGroupMaker.__getitem__,
            # but then how do I get the "not reordering labels" behavior that I have now?
            # FWIW, using .data, I get IGroup([1, 2, 0], axis='nat'), which works.
            sorter = axis.i[indicesofsorted.data]
            res = self[sorter]
        else:
            res = self.combine_axes()
            indicesofsorted = np.argsort(res.data)
            res = res.i[indicesofsorted]
            axis = res.axes[0]
        return res[axis[::-1]] if not ascending else res

[docs]    @deprecate_kwarg('reverse', 'ascending', {True: False, False: True})
    def sort_labels(self, axes=None, ascending=True) -> 'Array':
        r"""Sort labels of axes of the array.

        Parameters
        ----------
        axes : axis reference (Axis, str, int) or list of them, optional
            Axes to sort the labels of. Defaults None (all axes).
        ascending : bool, optional
            Sort labels in ascending order. Defaults to True.

        Returns
        -------
        Array
            Array with sorted labels.

        Examples
        --------
        >>> a = ndtest("nat=EU,FO,BE; sex=M,F")
        >>> a
        nat\sex  M  F
             EU  0  1
             FO  2  3
             BE  4  5
        >>> a.sort_labels('sex')
        nat\sex  F  M
             EU  1  0
             FO  3  2
             BE  5  4
        >>> a.sort_labels()
        nat\sex  F  M
             BE  5  4
             EU  1  0
             FO  3  2
        >>> a.sort_labels(('sex', 'nat'))
        nat\sex  F  M
             BE  5  4
             EU  1  0
             FO  3  2
        >>> a.sort_labels(ascending=False)
        nat\sex  M  F
             FO  2  3
             EU  0  1
             BE  4  5
        """
        if axes is None:
            axes = self.axes
        elif not isinstance(axes, (tuple, list, AxisCollection)):
            axes = [axes]

        if not isinstance(axes, AxisCollection):
            axes = self.axes[axes]

        def sort_key(axis):
            key = np.argsort(axis.labels)
            if not ascending:
                key = key[::-1]
            return axis.i[key]

        return self[tuple(sort_key(axis) for axis in axes)]

    sort_axis = renamed_to(sort_labels, 'sort_axis', raise_error=True)
    sort_axes = renamed_to(sort_labels, 'sort_axes')

    # TODO: set returned type to Union['Array', np.ndarray, Scalar] ?
    def __getitem__(self, key, collapse_slices=False, translate_key=True, points=False) -> Union['Array', Scalar]:
        raw_broadcasted_key, res_axes, transpose_indices = \
            self.axes._key_to_raw_and_axes(key, collapse_slices, translate_key, points, wildcard=False)
        res_data = self.data[raw_broadcasted_key]
        if res_axes:
            # if some axes have been moved in front because of advanced indexing, we transpose them back to their
            # original position. We do not use Array.transpose because that creates another Array object which is costly
            if transpose_indices is not None:
                res_data = res_data.transpose(transpose_indices)
                res_axes = res_axes[transpose_indices]
            return Array(res_data, res_axes)
        else:
            return res_data

    def __setitem__(self, key, value, collapse_slices=True, translate_key=True, points=False) -> None:
        # TODO: if key or value has more axes than self, we could use
        # total_axes = self.axes + key.axes + value.axes
        # expanded = self.expand(total_axes)
        # data = np.asarray(expanded.data)
        raw_broadcasted_key, target_axes, _ = \
            self.axes._key_to_raw_and_axes(key, collapse_slices, translate_key, points, wildcard=True)
        if isinstance(value, Array):
            # None target_axes can happen when setting a single "cell"/value with an Array (of size 1)
            if target_axes is not None:
                value = value.broadcast_with(target_axes, check_compatible=True)
            else:
                target_axes = []
            # replace incomprehensible error message "could not broadcast input array from shape XX into shape YY"
            # for users by "incompatible axes"
            extra_axes = [axis for axis in value.axes - target_axes if len(axis) > 1]
            if extra_axes:
                extra_axes = AxisCollection(extra_axes)
                axes = AxisCollection(target_axes)
                text = 'axes are' if len(extra_axes) > 1 else 'axis is'
                raise ValueError(f"Value {extra_axes!s} {text} not present in target subset {axes!s}. A value can only "
                                 f"have the same axes or fewer axes than the subset being targeted")
        self.data[raw_broadcasted_key] = value

        # concerning keys this can make sense in several cases:
        # single bool Array key with extra axes.
        # tuple of bool Array keys (eg one for each axis). each could have extra axes. Common axes between keys are
        # not a problem, we can simply "and" them. Though we should avoid explicitly "and"ing them if there is no
        # common axis because that is less efficient than the implicit "and" that is done by numpy __getitem__ (and
        # the fact we need to combine dimensions when any key has more than 1 dim).

        # the bool value represents whether the axis label is taken or not if any bool key (part) has more than one
        # axis, we get combined dimensions out of it.

[docs]    def set(self, value, **kwargs) -> None:
        r"""
        Set a subset of array to value.

        * all common axes must be either of length 1 or the same length
        * extra axes in value must be of length 1
        * extra axes in current array can have any length

        Parameters
        ----------
        value : scalar or Array

        Examples
        --------
        >>> arr = ndtest((3, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
         a2   6   7   8
        >>> arr['a1:', 'b1:'].set(10)
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3  10  10
         a2   6  10  10
        >>> arr['a1:', 'b1:'].set(ndtest("a=a1,a2;b=b1,b2"))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   0   1
         a2   6   2   3
        """
        self.__setitem__(kwargs, value)

    # TODO: this should be a private method
[docs]    def reshape(self, target_axes) -> 'Array':
        r"""
        Given a list of new axes, changes the shape of the array.
        The size of the array (= number of elements) must be equal
        to the product of length of target axes.

        Parameters
        ----------
        target_axes : iterable of Axis
            New axes. The size of the array (= number of stored data)
            must be equal to the product of length of target axes.

        Returns
        -------
        Array
            New array with new axes but same data.

        Examples
        --------
        >>> arr = ndtest((2, 2, 2))
        >>> arr
         a  b\c  c0  c1
        a0   b0   0   1
        a0   b1   2   3
        a1   b0   4   5
        a1   b1   6   7
        >>> new_arr = arr.reshape([Axis('a=a0,a1'),
        ...                        Axis(['b0c0', 'b0c1', 'b1c0', 'b1c1'], 'bc')])
        >>> new_arr
        a\bc  b0c0  b0c1  b1c0  b1c1
          a0     0     1     2     3
          a1     4     5     6     7
        """
        # this is a dangerous operation, because except for adding length 1 axes (which is safe), it potentially
        # modifies data
        # TODO: add a check/flag? for "unsafe" reshapes (but allow merging
        # several axes & "splitting" axes) etc.
        # eg 4, 3, 2 -> 2, 3, 4 is wrong (even if size is respected)
        #    4, 3, 2 -> 12, 2 is potentially ok (merging adjacent dimensions)
        #            -> 4, 6 is potentially ok (merging adjacent dimensions)
        #            -> 24 is potentially ok (merging adjacent dimensions)
        #            -> 3, 8 WRONG (non adjacent dimensions)
        #            -> 8, 3 WRONG
        #    4, 3, 2 -> 2, 2, 3, 2 is potentially ok (splitting dim)
        if not isinstance(target_axes, AxisCollection):
            target_axes = AxisCollection(target_axes)
        data = self.data.reshape(target_axes.shape)
        return Array(data, target_axes)

    # TODO: this should be a private method
[docs]    def reshape_like(self, target) -> 'Array':
        r"""
        Same as reshape but with an array as input.
        Total size (= number of stored data) of the two arrays must be equal.

        See Also
        --------
        reshape : returns an Array with a new shape given a list of axes.

        Examples
        --------
        >>> arr = zeros((2, 2, 2), dtype=int)
        >>> arr
        {0}*  {1}*\{2}*  0  1
           0          0  0  0
           0          1  0  0
           1          0  0  0
           1          1  0  0
        >>> new_arr = arr.reshape_like(ndtest((2, 4)))
        >>> new_arr
        a\b  b0  b1  b2  b3
         a0   0   0   0   0
         a1   0   0   0   0
        """
        return self.reshape(target.axes)

[docs]    def broadcast_with(self, target, check_compatible=False) -> 'Array':
        r"""
        Return an array that is (NumPy) broadcastable with target.

        * all common axes must be either of length 1 or the same length
        * extra axes in source can have any length and will be moved to the
          front
        * extra axes in target can have any length and the result will have axes
          of length 1 for those axes

        This is different from reshape which ensures the result has exactly the
        shape of the target.

        Parameters
        ----------
        target : Array or collection of Axis

        check_compatible : bool, optional
            Whether to check that common axes are compatible. Defaults to False.

        Returns
        -------
        Array
        """
        if isinstance(target, Array):
            target_axes = target.axes
        else:
            target_axes = target
            if not isinstance(target_axes, (tuple, list, AxisCollection)):
                target_axes = AxisCollection(target_axes)
        if self.axes == target_axes:
            return self
        # determine real target order (= left_only then target_axes)
        # (we will add length one axes to the left like numpy just below)
        target_axes = (self.axes - target_axes) | target_axes

        # XXX: this breaks la['1,5,9'] = la['2,7,3']
        # but that use case should use ignore_labels
        # self.axes.check_compatible(target_axes)

        # 1) reorder axes to target order
        array = self.transpose(target_axes & self.axes)

        # 2) add length one axes
        res_axes = array.axes.get_all(target_axes)
        if check_compatible:
            res_axes.check_compatible(target_axes)
        return array.reshape(res_axes)

    # XXX: I wonder if effectively dropping the labels is necessary or not
    # we could perfectly only mark the axis as being a wildcard axis and keep
    # the labels intact. These wildcard axes with labels
    # could be useful in a few situations. For example, Excel sheets could
    # have such behavior: you can slice columns using letters, but that
    # wouldn't prevent doing computation between arrays using different
    # columns. On the other hand, it makes wild axes less obvious and I
    # wonder if there would be a risk of wildcard axes inadvertently leaking.
    # plus it might be confusing if incompatible labels "work".
[docs]    def ignore_labels(self, axes=None) -> 'Array':
        r"""Ignore labels from axes (replace those axes by "wildcard" axes).

        Useful when you want to apply operations between two arrays
        or subarrays with same shape but incompatible axes
        (different labels).

        Parameters
        ----------
        axes : Axis or list/tuple/AxisCollection of Axis, optional
            Axis(es) on which you want to drop the labels.

        Returns
        -------
        Array

        Notes
        -----
        Use it at your own risk.

        Examples
        --------
        >>> a = Axis('a=a1,a2')
        >>> b = Axis('b=b1,b2')
        >>> b2 = Axis('b=b2,b3')
        >>> arr1 = ndtest([a, b])
        >>> arr1
        a\b  b1  b2
         a1   0   1
         a2   2   3
        >>> arr1.ignore_labels(b)
        a\b*  0  1
          a1  0  1
          a2  2  3
        >>> arr1.ignore_labels([a, b])
        a*\b*  0  1
            0  0  1
            1  2  3
        >>> arr2 = ndtest([a, b2])
        >>> arr2
        a\b  b2  b3
         a1   0   1
         a2   2   3
        >>> arr1 * arr2
        Traceback (most recent call last):
        ...
        ValueError: incompatible axes:
        Axis(['b2', 'b3'], 'b')
        vs
        Axis(['b1', 'b2'], 'b')
        >>> arr1 * arr2.ignore_labels()
        a\b  b1  b2
         a1   0   1
         a2   4   9
        >>> arr1.ignore_labels() * arr2
        a\b  b2  b3
         a1   0   1
         a2   4   9
        >>> arr1.ignore_labels('a') * arr2.ignore_labels('b')
        a\b  b1  b2
         a1   0   1
         a2   4   9
        """
        if axes is None:
            axes = self.axes
        elif not isinstance(axes, (tuple, list, AxisCollection)):
            axes = self.axes[[axes]]
        else:
            axes = self.axes[axes]
        res_axes = self.axes.replace({axis: axis.ignore_labels() for axis in axes})
        return Array(self.data, res_axes)
    drop_labels = renamed_to(ignore_labels, 'drop_labels', raise_error=True)

    def __str__(self) -> str:
        if not self.ndim:
            return str(np.asscalar(self))
        elif not len(self):
            return 'Array([])'
        else:
            table = self.dump(maxlines=_OPTIONS[DISPLAY_MAXLINES], edgeitems=_OPTIONS[DISPLAY_EDGEITEMS],
                              _axes_display_names=True)
            return table2str(table, 'nan', maxwidth=_OPTIONS[DISPLAY_WIDTH], keepcols=self.ndim - 1,
                             precision=_OPTIONS[DISPLAY_PRECISION])
    __repr__ = __str__

    def __iter__(self):
        # fast path for 1D arrays where we return elements
        if self.ndim <= 1:
            return iter(self.data)
        else:
            return ArrayIterator(self)

    def __contains__(self, key) -> bool:
        return any(key in axis for axis in self.axes)

    # XXX: dump as a 2D Array with row & col dims?
[docs]    def dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
             maxlines=-1, edgeitems=5, _axes_display_names=False) -> List[List[str]]:
        r"""dump(self, header=True, wide=True, value_name='value', light=False, axes_names=True, na_repr='as_is',
             maxlines=-1, edgeitems=5)

        Dump array as a 2D nested list. This is especially useful when writing to an Excel sheet via open_excel().

        Parameters
        ----------
        header : bool
            Whether to output axes names and labels.
        wide : boolean, optional
            Whether to write arrays in "wide" format. If True, arrays are exported with the last axis
            represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
            value column. Not used if header=False. Defaults to True.
        value_name : str, optional
            Name of the column containing the values (last column) when `wide=False` (see above).
            Not used if header=False. Defaults to 'value'.
        light : bool, optional
            Whether to hide repeated labels. In other words, only show a label if it is different from the
            previous one. Defaults to False.
        axes_names : bool or 'except_last', optional
            Assuming header is True, whether to include axes names. If axes_names is 'except_last',
            all axes names will be included except the last. Defaults to True.
        na_repr : any scalar, optional
            Replace missing values (NaN floats) by this value. Defaults to 'as_is' (do not do any replacement).
        maxlines : int, optional
            Maximum number of lines to show. Defaults to -1 (all lines are shown).
        edgeitems : int, optional
            If number of lines to display is greater than `maxlines`, only the first and last `edgeitems` lines are
            displayed. Only active if `maxlines` is not -1. Defaults to 5.

        Returns
        -------
        2D nested list of builtin Python values or None for 0d arrays

        Examples
        --------
        >>> arr = ndtest((2, 2, 2))
        >>> arr.dump()                               # doctest: +NORMALIZE_WHITESPACE
        [['a',  'b\\c', 'c0', 'c1'],
         ['a0',   'b0',    0,    1],
         ['a0',   'b1',    2,    3],
         ['a1',   'b0',    4,    5],
         ['a1',   'b1',    6,    7]]
        >>> arr.dump(axes_names=False)               # doctest: +NORMALIZE_WHITESPACE
        [['',       '', 'c0', 'c1'],
         ['a0',   'b0',    0,    1],
         ['a0',   'b1',    2,    3],
         ['a1',   'b0',    4,    5],
         ['a1',   'b1',    6,    7]]
        >>> arr.dump(axes_names='except_last')       # doctest: +NORMALIZE_WHITESPACE
        [['a',     'b', 'c0', 'c1'],
         ['a0',   'b0',    0,    1],
         ['a0',   'b1',    2,    3],
         ['a1',   'b0',    4,    5],
         ['a1',   'b1',    6,    7]]
        >>> arr.dump(light=True)                     # doctest: +NORMALIZE_WHITESPACE
        [['a',  'b\\c', 'c0', 'c1'],
         ['a0',   'b0',    0,    1],
         ['',     'b1',    2,    3],
         ['a1',   'b0',    4,    5],
         ['',     'b1',    6,    7]]
        >>> arr.dump(wide=False, value_name='data')  # doctest: +NORMALIZE_WHITESPACE
        [['a',   'b',  'c', 'data'],
         ['a0', 'b0', 'c0',      0],
         ['a0', 'b0', 'c1',      1],
         ['a0', 'b1', 'c0',      2],
         ['a0', 'b1', 'c1',      3],
         ['a1', 'b0', 'c0',      4],
         ['a1', 'b0', 'c1',      5],
         ['a1', 'b1', 'c0',      6],
         ['a1', 'b1', 'c1',      7]]
        >>> arr.dump(maxlines=3, edgeitems=1)        # doctest: +NORMALIZE_WHITESPACE
        [['a',   'b\\c',  'c0',  'c1'],
         ['a0',    'b0',     0,     1],
         ['...',  '...', '...', '...'],
         ['a1',    'b1',     6,     7]]
        """
        # _axes_display_names : bool, optional
        #    Whether to get axes names using AxisCollection.display_names instead of
        #    AxisCollection.names. Defaults to False.

        dump_axes_names = axes_names

        if not header:
            # ensure_no_numpy_type is there mostly to avoid problems with xlwings, but I am unsure where that problem
            # should be fixed: in np.array.tolist, in xlwings, here or in xw_excel.Sheet.__setitem__. Doing it here
            # is uglier than in xw_excel but is faster because nothing (extra) needs to be done when the
            # array is not of object dtype (the usual case).

            # flatten all dimensions except the last one
            res2d = ensure_no_numpy_type(self.data.reshape((-1, self.shape[-1])))
        else:
            if not self.ndim:
                return None

            if wide:
                width = self.shape[-1]
                height = int(np.prod(self.shape[:-1]))
            else:
                width = 1
                height = int(np.prod(self.shape))
            data = self.data.reshape((height, width))

            # get list of names of axes
            if _axes_display_names:
                axes_names = self.axes.display_names[:]
            else:
                axes_names = self.axes.names

            # transforms ['a', 'b', 'c', 'd'] into ['a', 'b', 'c\\d']
            if wide and len(axes_names) > 1:
                if dump_axes_names is True:
                    # combine two last names
                    last_name = axes_names.pop()
                    prev_name = axes_names[-1]
                    # do not combine if last_name is None or ''
                    if last_name:
                        prev_name = prev_name if prev_name is not None else ''
                        combined_name = prev_name + '\\' + last_name
                    else:
                        # whether it is a string or None !
                        combined_name = prev_name
                    axes_names[-1] = combined_name
                elif dump_axes_names == 'except_last':
                    axes_names = axes_names[:-1]
                else:
                    axes_names = [''] * (len(axes_names) - 1)

            axes = self.axes[:-1] if wide else self.axes

            # get list of labels for each axis (except the last one if wide=True)
            labels = [ensure_no_numpy_type(axis.labels) for axis in axes]

            # creates vertical lines (ticks is a list of list)
            if self.ndim == 1 and wide:
                # There is no vertical axis, so the axis name should not have
                # any "tick" below it and we add an empty "tick".
                ticks = [['']]
            elif light:
                ticks = light_product(*labels)
            else:
                ticks = Product(labels)

            # computes the first line
            other_colnames = ensure_no_numpy_type(self.axes[-1].labels) if wide else [value_name]
            res2d = [axes_names + other_colnames]

            # summary if needed
            if maxlines != -1 and height > maxlines:
                # replace middle lines of the table by '...'.
                # We show only the first and last edgeitems lines.
                res2d.extend([list(tick) + dataline
                              for tick, dataline in zip(ticks[:edgeitems], ensure_no_numpy_type(data[:edgeitems]))])
                res2d.append(["..."] * (self.ndim - 1 + width))
                res2d.extend([list(tick) + dataline
                              for tick, dataline in zip(ticks[-edgeitems:], ensure_no_numpy_type(data[-edgeitems:]))])
            else:
                # all other lines (labels of N-1 first axes + data)
                res2d.extend([list(tick) + ensure_no_numpy_type(dataline) for tick, dataline in zip(ticks, data)])

        if na_repr != 'as_is':
            res2d = [[na_repr if value != value else value
                      for value in line]
                     for line in res2d]
        return res2d
    # this is not 100% equivalent (the names of displayed axes is different) but it has been deprecated long enough
    # (since 0.30) that we can afford slightly breaking backward compatibility.
    as_table = renamed_to(dump, 'as_table')

    # XXX: should filter(geo=['W']) return a view by default? (collapse=True)
    # I think it would be dangerous to make it the default
    # behavior, because that would introduce a subtle difference between
    # filter(dim=[a, b]) and filter(dim=[a]) even though it would be faster
    # and uses less memory. Maybe I should have a "view" argument which
    # defaults to 'auto' (ie collapse by default), can be set to False to
    # force a copy and to True to raise an exception if a view is not possible.
[docs]    def filter(self, collapse=False, **kwargs) -> 'Array':
        r"""Filter the array along the axes given as keyword arguments.

        The *collapse* argument determines whether consecutive ranges should
        be collapsed to slices, which is more efficient and returns a view
        (and not a copy) if possible (if all ranges are consecutive).
        Only use this argument if you do not intent to modify the resulting
        array, or if you know what you are doing.

        It is similar to np.take but works with several axes at once.
        """
        return self.__getitem__(kwargs, collapse)

    def _axis_aggregate(self, op, axes=(), keepaxes=False, out=None, **kwargs) -> Union['Array', Scalar]:
        r"""
        Parameters
        ----------
        op : function
            An aggregate function with this signature: func(a, axis=None, dtype=None, out=None, keepdims=False)
        axes : tuple of axes, optional
            Each axis can be an Axis object, str or int.
        out : Array, optional
            Alternative output array in which to place the result. It must have the same shape as the expected output.
        keepaxes : bool or scalar, optional
            If this is set to True, the axes which are reduced are left in the result as dimensions with size one.

        Returns
        -------
        Array or scalar
        """
        src_data = np.asarray(self)
        axes = self.axes[list(axes)] if axes else self.axes
        axes_indices = tuple(self.axes.index(a) for a in axes) if axes != self.axes else None
        if op.__name__ == 'ptp':
            if axes_indices is not None and len(axes) > 1:
                raise ValueError('ptp can only be applied along a single axis or all axes, not multiple arbitrary axes')
            elif axes_indices is not None:
                axes_indices = axes_indices[0]
        else:
            kwargs['keepdims'] = bool(keepaxes)
        if out is not None:
            assert isinstance(out, Array)
            kwargs['out'] = out.data
        res_data = op(src_data, axis=axes_indices, **kwargs)
        if keepaxes:
            label = op.__name__.replace('nan', '') if keepaxes is True else keepaxes
            new_axes = [Axis([label], axis.name) for axis in axes]
            res_axes = self.axes[:]
            res_axes[axes] = new_axes
        else:
            res_axes = self.axes - axes
        if not res_axes:
            # scalars don't need to be wrapped in Array
            return res_data
        else:
            return Array(res_data, res_axes)

    def _cum_aggregate(self, op, axis) -> 'Array':
        r"""
        op is a numpy cumulative aggregate function: func(arr, axis=0).
        axis is an Axis object, a str or an int. Contrary to other aggregate functions this only supports one axis at a
        time.
        """
        # TODO: accept a single group in axis, to filter & aggregate in one shot
        return Array(op(np.asarray(self), axis=self.axes.index(axis)),
                     self.axes)

    # TODO: now that items is never a (k, v), it should be renamed to something else: args?
    #       (groups would be misleading because each "item" can contain several groups)
    # TODO: experiment implementing this using ufunc.reduceat
    # http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.ufunc.reduceat.html
    # XXX: rename keepaxes to label=value? For group_aggregates we might want to keep the LGroup label if any
    def _group_aggregate(self, op, items, keepaxes=False, out=None, **kwargs) -> 'Array':
        assert out is None
        res = self
        # TODO: when working with several "axes" at the same times, we should not produce the intermediary result at
        #       all. It should be faster and consume a bit less memory.
        for item in items:
            res_axes = res.axes[:]
            res_shape = list(res.shape)

            if isinstance(item, tuple):
                assert all(isinstance(g, Group) for g in item)
                groups = item
                axis = groups[0].axis
                # they should all have the same axis (this is already checked
                # in _prepare_aggregate though)
                assert all(g.axis.equals(axis) for g in groups[1:])
                killaxis = False
            else:
                # item is in fact a single group
                assert isinstance(item, Group), type(item)
                groups = (item,)
                axis = item.axis
                # it is easier to kill the axis after the fact
                killaxis = True

            axis, axis_idx = res.axes[axis], res.axes.index(axis)
            # potentially translate axis reference to real axes
            groups = tuple(g.with_axis(axis) for g in groups)
            res_shape[axis_idx] = len(groups)

            # XXX: this code is fragile. I wonder if there isn't a way to ask the function what kind of dtype/shape it
            #      will return given the input we are going to give it. My first search for this found nothing. One
            #      way to do this would be to create one big mapping: {(op, input dtype): res dtype}
            res_dtype = float if op in _always_return_float else res.dtype
            if op in (np.sum, np.nansum) and res.dtype in (bool, np.bool_):
                res_dtype = int
            res_data = np.empty(res_shape, dtype=res_dtype)

            group_idx = [slice(None) for _ in res_shape]
            for i, group in enumerate(groups):
                group_idx[axis_idx] = i
                # this is only useful for ndim == 1 because a[(0,)] (equivalent to a[0] which kills the axis)
                # is different from a[[0]] (which does not kill the axis)
                idx = tuple(group_idx)

                # we need only lists of ticks, not single ticks, otherwise the dimension is discarded too early
                # (in __getitem__ instead of in the aggregate func)
                if isinstance(group, IGroup) and np.isscalar(group.key):
                    group = IGroup([group.key], axis=group.axis)
                elif isinstance(group, LGroup):
                    key = _to_key(group.key)
                    assert not isinstance(key, Group)
                    if np.isscalar(key):
                        key = [key]
                    # we do not care about the name at this point
                    group = LGroup(key, axis=group.axis)

                arr = res.__getitem__(group, collapse_slices=True)
                if res_data.ndim == 1:
                    assert len(idx) == 1 and idx[0] == i

                    # res_data[idx] but instead of returning a scalar (eg np.int32), it returns a 0d array which is a
                    # view on res_data, which can thus be used as out
                    out = res_data[i:i + 1].reshape(())
                else:
                    out = res_data[idx]

                arr = np.asarray(arr)
                op(arr, axis=axis_idx, out=out, **kwargs)
                del arr

            if killaxis:
                assert group_idx[axis_idx] == 0
                res_data = res_data[idx]
                del res_axes[axis_idx]
            else:
                # We do NOT modify the axis name (eg append "_agg" or "*") even though this creates a new axis that is
                # independent from the original one because the original name is what users will want to use to access
                # that axis (eg in .filter kwargs)
                res_axes[axis_idx] = Axis(groups, axis.name)

            if isinstance(res_data, np.ndarray):
                res = Array(res_data, res_axes)
            else:
                res = res_data
        return res

    # TODO: not sure about the returned type
    def _prepare_aggregate(self, op, args, kwargs=None, commutative=False, stack_depth=1) \
            -> Union[List[Union[LGroup, Axis]], AxisCollection]:
        r"""Convert args to keys & LGroup and kwargs to LGroup."""
        if kwargs is None:
            kwargs_items = []
        else:
            explicit_axis = kwargs.pop('axis', None)
            if explicit_axis is not None:
                explicit_axis = self.axes[explicit_axis]
                if isinstance(explicit_axis, Axis):
                    args += (explicit_axis,)
                else:
                    assert isinstance(explicit_axis, AxisCollection)
                    args += tuple(explicit_axis)
            kwargs_items = kwargs.items()

        # Sort kwargs by axis name so that we have consistent results between runs because otherwise rounding errors
        # could lead to slightly different results even for commutative operations.
        sorted_kwargs = sorted(kwargs_items)

        # convert kwargs to LGroup so that we can only use args afterwards but still keep the axis information
        def standardise_kw_arg(axis_name, key, stack_depth=1):
            if isinstance(key, str):
                key = _to_keys(key, stack_depth + 1)
            if isinstance(key, tuple):
                # XXX +2?
                return tuple(standardise_kw_arg(axis_name, k, stack_depth + 1) for k in key)
            if isinstance(key, LGroup):
                return key
            return self.axes[axis_name][key]

        def to_labelgroup(key, stack_depth=1):
            if isinstance(key, str):
                key = _to_keys(key, stack_depth + 1)
            if isinstance(key, tuple):
                # a tuple is supposed to be several groups on the same axis
                # TODO: it would be better to use self._translate_axis_key directly (so that we do not need to do the
                # label -> position translation twice) but this fails because the groups are also used as ticks on the
                # new axis, and igroups are not the same that LGroups in this regard (I wonder if ideally it shouldn't
                # be the same???)
                # groups = tuple(self._translate_axis_key(k) for k in key)
                groups = tuple(self.axes._guess_axis(_to_key(k, stack_depth + 1)) for k in key)
                first_group_axis = groups[0].axis
                if not all(g.axis.equals(first_group_axis) for g in groups[1:]):
                    raise ValueError(f"group with different axes: {key}")
                return groups
            elif isinstance(key, (Group, int, str, list, slice)):
                return self.axes._guess_axis(key)
            else:
                key_type = type(key).__name__
                raise NotImplementedError(f"{key} has invalid type ({key_type}) for a group aggregate key")

        def standardise_arg(arg, stack_depth=1):
            if self.axes.isaxis(arg):
                return self.axes[arg]
            else:
                return to_labelgroup(arg, stack_depth + 1)

        operations = [standardise_arg(a, stack_depth=stack_depth + 2) for a in args if a is not None] + \
                     [standardise_kw_arg(k, v, stack_depth=stack_depth + 2) for k, v in sorted_kwargs]
        if not operations:
            # op() without args is equal to op(all_axes)
            operations = self.axes
        return operations

    def _by_args_to_normal_agg_args(self, operations) -> List[Union[Axis, Group]]:
        # get axes to aggregate
        flat_op = chain.from_iterable([(o,) if isinstance(o, (Group, Axis)) else o
                                       for o in operations])
        axes = [o.axis if isinstance(o, Group) else o for o in flat_op]
        to_agg = self.axes - axes

        # add groups to axes to aggregate
        def is_or_contains_group(o):
            return isinstance(o, Group) or (isinstance(o, tuple) and isinstance(o[0], Group))

        return list(to_agg) + [o for o in operations if is_or_contains_group(o)]

    def _aggregate(self, op, args, kwargs=None, keepaxes=False, by_agg=False, commutative=False,
                   out=None, extra_kwargs={}) -> Union['Array', Scalar]:
        operations = self._prepare_aggregate(op, args, kwargs, commutative, stack_depth=3)

        total_len_args = len(args) + len(kwargs) if kwargs is not None else 0
        if by_agg and total_len_args:
            operations = self._by_args_to_normal_agg_args(operations)

        res = self
        # group *consecutive* same-type (group vs axis aggregates) operations
        # we do not change the order of operations since we only group consecutive operations.
        for are_axes, axes in groupby(operations, self.axes.isaxis):
            func = res._axis_aggregate if are_axes else res._group_aggregate
            res = func(op, axes, keepaxes=keepaxes, out=out, **extra_kwargs)
        return res

[docs]    def with_total(self, *args, op=sum, label='total', **kwargs) -> 'Array':
        r"""Add aggregated values (sum by default) along each axis.

        A user defined label can be given to specified the computed values.

        Parameters
        ----------
        *args : int or str or Axis or Group or any combination of those, optional
            Axes or groups along which to compute the aggregates. Passed groups should be named.
            Defaults to aggregate over the whole array.
        op : aggregate function, optional
            Available aggregate functions are: `sum`, `prod`, `min`, `max`, `mean`, `ptp`, `var`, `std`,
            `median` and `percentile`. Defaults to `sum`.
        label : scalar value, optional
            Label to use for the total. Applies only to aggregated axes, not groups. Defaults to "total".
        \**kwargs : int or str or Group or any combination of those, optional
            Axes or groups along which to compute the aggregates.

        Returns
        -------
        Array

        Examples
        --------
        >>> arr = ndtest("gender=M,F;time=2013..2016")
        >>> arr
        gender\time  2013  2014  2015  2016
                  M     0     1     2     3
                  F     4     5     6     7
        >>> arr.with_total()
        gender\time  2013  2014  2015  2016  total
                  M     0     1     2     3      6
                  F     4     5     6     7     22
              total     4     6     8    10     28

        Using another function and label

        >>> arr.with_total(op=mean, label='mean')
        gender\time  2013  2014  2015  2016  mean
                  M   0.0   1.0   2.0   3.0   1.5
                  F   4.0   5.0   6.0   7.0   5.5
               mean   2.0   3.0   4.0   5.0   3.5

        Specifying an axis and a label

        >>> arr.with_total('gender', label='U')
        gender\time  2013  2014  2015  2016
                  M     0     1     2     3
                  F     4     5     6     7
                  U     4     6     8    10

        Using groups

        >>> time_groups = (arr.time[:2014] >> 'before_2015',
        ...                arr.time[2015:] >> 'after_2015')
        >>> arr.with_total(time_groups)
        gender\time  2013  2014  2015  2016  before_2015  after_2015
                  M     0     1     2     3            1           5
                  F     4     5     6     7            9          13
        >>> # or equivalently
        >>> # arr.with_total('time[:2014] >> before_2015; time[2015:] >> after_2015')
        """
        # TODO: make label default to op.__name__
        npop = {
            sum: np.sum,
            prod: np.prod,
            min: np.min,
            max: np.max,
            mean: np.mean,
            ptp: np.ptp,
            var: np.var,
            std: np.std,
            median: np.median,
            percentile: np.percentile,
        }
        # TODO: commutative should be known for usual ops
        operations = self._prepare_aggregate(op, args, kwargs, False, stack_depth=2)
        res = self
        # TODO: we should allocate the final result directly and fill it progressively, so that the original array is
        #       only copied once
        for axis in operations:
            # TODO: append/extend first with an empty array then _aggregate with out=
            if self.axes.isaxis(axis):
                value = res._axis_aggregate(npop[op], (axis,), keepaxes=label)
            else:
                # groups
                if not isinstance(axis, tuple):
                    # assume a single group
                    axis = (axis,)
                lgkey = axis
                axis = lgkey[0].axis
                value = res._aggregate(npop[op], (lgkey,))
            res = res.append(axis, value)
        return res

    # TODO: make sure we can do
    # arr[X.sex.i[arr.indexofmin(X.sex)]] <- fails
    # and
    # arr[arr.labelofmin(X.sex)] <- fails
    # should both be equal to arr.min(X.sex)
    # the versions where axis is None already work as expected in the simple
    # case (no ambiguous labels):
    # arr.i[arr.indexofmin()]
    # arr[arr.labelofmin()]
    # for the case where axis is None, we should return an NDGroup
    # so that arr[arr.labelofmin()] works even if the minimum is on ambiguous labels
[docs]    def labelofmin(self, axis=None) -> Union['Array', Tuple[Scalar, ...]]:
        r"""Return labels of the minimum values along a given axis.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to work. If not specified, works on the full array.

        Returns
        -------
        Array

        Notes
        -----
        In case of multiple occurrences of the minimum values, the indices corresponding to the first occurrence are
        returned.

        Examples
        --------
        >>> nat = Axis('nat=BE,FR,IT')
        >>> sex = Axis('sex=M,F')
        >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex])
        >>> arr
        nat\sex  M  F
             BE  0  1
             FR  3  2
             IT  2  5
        >>> arr.labelofmin('sex')
        nat  BE  FR  IT
              M   F   M
        >>> arr.labelofmin()
        ('BE', 'M')
        """
        if axis is not None:
            axis, axis_idx = self.axes[axis], self.axes.index(axis)
            data = axis.labels[self.data.argmin(axis_idx)]
            return Array(data, self.axes - axis)
        else:
            return self.axes._iflat(self.data.argmin())

    argmin = renamed_to(labelofmin, 'argmin', raise_error=True)

[docs]    def indexofmin(self, axis=None) -> Union['Array', Tuple[int, ...]]:
        r"""Return indices of the minimum values along a given axis.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to work. If not specified, works on the full array.

        Returns
        -------
        Array

        Notes
        -----
        In case of multiple occurrences of the minimum values, the indices corresponding to the first occurrence are
        returned.

        Examples
        --------
        >>> nat = Axis('nat=BE,FR,IT')
        >>> sex = Axis('sex=M,F')
        >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex])
        >>> arr
        nat\sex  M  F
             BE  0  1
             FR  3  2
             IT  2  5
        >>> arr.indexofmin('sex')
        nat  BE  FR  IT
              0   1   0
        >>> arr.indexofmin()
        (0, 0)
        """
        if axis is not None:
            axis, axis_idx = self.axes[axis], self.axes.index(axis)
            return Array(self.data.argmin(axis_idx), self.axes - axis)
        else:
            return np.unravel_index(self.data.argmin(), self.shape)

    posargmin = renamed_to(indexofmin, 'posargmin', raise_error=True)

[docs]    def labelofmax(self, axis=None) -> Union['Array', Tuple[Scalar, ...]]:
        r"""Return labels of the maximum values along a given axis.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to work. If not specified, works on the full array.

        Returns
        -------
        Array

        Notes
        -----
        In case of multiple occurrences of the maximum values, the labels corresponding to the first occurrence are
        returned.

        Examples
        --------
        >>> nat = Axis('nat=BE,FR,IT')
        >>> sex = Axis('sex=M,F')
        >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex])
        >>> arr
        nat\sex  M  F
             BE  0  1
             FR  3  2
             IT  2  5
        >>> arr.labelofmax('sex')
        nat  BE  FR  IT
              F   M   F
        >>> arr.labelofmax()
        ('IT', 'F')
        """
        if axis is not None:
            axis, axis_idx = self.axes[axis], self.axes.index(axis)
            data = axis.labels[self.data.argmax(axis_idx)]
            return Array(data, self.axes - axis)
        else:
            return self.axes._iflat(self.data.argmax())

    argmax = renamed_to(labelofmax, 'argmax', raise_error=True)

[docs]    def indexofmax(self, axis=None) -> Union['Array', Tuple[int, ...]]:
        r"""Return indices of the maximum values along a given axis.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to work. If not specified, works on the full array.

        Returns
        -------
        Array

        Notes
        -----
        In case of multiple occurrences of the maximum values, the labels corresponding to the first occurrence are
        returned.

        Examples
        --------
        >>> nat = Axis('nat=BE,FR,IT')
        >>> sex = Axis('sex=M,F')
        >>> arr = Array([[0, 1], [3, 2], [2, 5]], [nat, sex])
        >>> arr
        nat\sex  M  F
             BE  0  1
             FR  3  2
             IT  2  5
        >>> arr.indexofmax('sex')
        nat  BE  FR  IT
              1   0   1
        >>> arr.indexofmax()
        (2, 1)
        """
        if axis is not None:
            axis, axis_idx = self.axes[axis], self.axes.index(axis)
            return Array(self.data.argmax(axis_idx), self.axes - axis)
        else:
            return np.unravel_index(self.data.argmax(), self.shape)

    posargmax = renamed_to(indexofmax, 'posargmax', raise_error=True)

[docs]    def labelsofsorted(self, axis=None, ascending=True, kind='quicksort') -> 'Array':
        r"""Return the labels that would sort this array.

        Performs an indirect sort along the given axis using the algorithm specified by the `kind` keyword. It returns
        an array of labels of the same shape as `a` that index data along the given axis in sorted order.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to sort. This can be omitted if array has only one axis.
        ascending : bool, optional
            Sort values in ascending order. Defaults to True.
        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
            Sorting algorithm. Defaults to 'quicksort'.

        Returns
        -------
        Array

        Examples
        --------
        >>> arr = Array([[0, 1], [3, 2], [2, 5]], "nat=BE,FR,IT; sex=M,F")
        >>> arr
        nat\sex  M  F
             BE  0  1
             FR  3  2
             IT  2  5
        >>> arr.labelsofsorted('sex')
        nat\sex  0  1
             BE  M  F
             FR  F  M
             IT  M  F
        >>> arr.labelsofsorted('sex', ascending=False)
        nat\sex  0  1
             BE  F  M
             FR  M  F
             IT  F  M
        """
        if axis is None:
            if self.ndim > 1:
                raise ValueError("array has ndim > 1 and no axis specified for labelsofsorted")
            axis = self.axes[0]
        axis = self.axes[axis]
        pos = self.indicesofsorted(axis, ascending=ascending, kind=kind)
        return Array(axis.labels[pos.data], pos.axes)

    argsort = renamed_to(labelsofsorted, 'argsort', raise_error=True)

[docs]    def indicesofsorted(self, axis=None, ascending=True, kind='quicksort') -> 'Array':
        r"""Return the indices that would sort this array.

        Performs an indirect sort along the given axis using the algorithm specified by the `kind` keyword. It returns
        an array of indices with the same axes as `a` that index data along the given axis in sorted order.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to sort. This can be omitted if array has only one axis.
        ascending : bool, optional
            Sort values in ascending order. Defaults to True.
        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
            Sorting algorithm. Defaults to 'quicksort'.

        Returns
        -------
        Array

        Examples
        --------
        >>> arr = Array([[1, 5], [3, 2], [0, 4]], "nat=BE,FR,IT; sex=M,F")
        >>> arr
        nat\sex  M  F
             BE  1  5
             FR  3  2
             IT  0  4
        >>> arr.indicesofsorted('nat')
        nat\sex  M  F
              0  2  1
              1  0  2
              2  1  0
        >>> arr.indicesofsorted('nat', ascending=False)
        nat\sex  M  F
              0  1  0
              1  0  2
              2  2  1
        """
        if axis is None:
            if self.ndim > 1:
                raise ValueError("array has ndim > 1 and no axis specified for indicesofsorted")
            axis = self.axes[0]
        axis, axis_idx = self.axes[axis], self.axes.index(axis)
        data = self.data.argsort(axis_idx, kind=kind)
        if not ascending:
            reverser = tuple(slice(None, None, -1) if i == axis_idx else slice(None)
                             for i in range(self.ndim))
            data = data[reverser]
        new_axis = Axis(np.arange(len(axis)), axis.name)
        return Array(data, self.axes.replace(axis, new_axis))

    posargsort = renamed_to(indicesofsorted, 'posargsort', raise_error=True)

    # TODO: implement keys_by
    # XXX: implement expand=True? Unsure it is necessary now that we have zip_array_*
    # TODO: add support for groups in addition to entire axes
[docs]    def keys(self, axes=None, ascending=True) -> Product:
        r"""Return a view on the array labels along axes.

        Parameters
        ----------
        axes : int, str or Axis or tuple of them, optional
            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
            in the array).
        ascending : bool, optional
            Whether to iterate the axes in ascending order (from start to end). Defaults to True.

        Returns
        -------
        Sequence
            An object you can iterate (loop) on and index by position to get the Nth label along axes.

        Examples
        --------
        First, define a small helper function to make the following examples more readable.

        >>> def str_key(key):
        ...     return tuple(str(k) for k in key)

        Then create a test array:

        >>> arr = ndtest((2, 2))
        >>> arr
        a\b  b0  b1
         a0   0   1
         a1   2   3

        By default it iterates on all axes, in the order they are in the array.

        >>> for key in arr.keys():
        ...     # print both the actual key object, and a (nicer) string representation
        ...     print(key, "->", str_key(key))
        (a.i[0], b.i[0]) -> ('a0', 'b0')
        (a.i[0], b.i[1]) -> ('a0', 'b1')
        (a.i[1], b.i[0]) -> ('a1', 'b0')
        (a.i[1], b.i[1]) -> ('a1', 'b1')
        >>> for key in arr.keys(ascending=False):
        ...     print(str_key(key))
        ('a1', 'b1')
        ('a1', 'b0')
        ('a0', 'b1')
        ('a0', 'b0')

        but you can specify another axis order:

        >>> for key in arr.keys(('b', 'a')):
        ...     print(str_key(key))
        ('b0', 'a0')
        ('b0', 'a1')
        ('b1', 'a0')
        ('b1', 'a1')

        One can specify less axes than the array has:

        >>> # iterate on the "b" axis, that is return each label along the "b" axis
        ... for key in arr.keys('b'):
        ...     print(str_key(key))
        ('b0',)
        ('b1',)

        One can also access elements of the key sequence directly, instead of iterating over it. Say we want to
        retrieve the first and last keys of our array, we could write:

        >>> keys = arr.keys()
        >>> first_key = keys[0]
        >>> str_key(first_key)
        ('a0', 'b0')
        >>> last_key = keys[-1]
        >>> str_key(last_key)
        ('a1', 'b1')
        """
        return self.axes.iter_labels(axes, ascending=ascending)

    # TODO: implement values_by
    # TODO: add support for groups in addition to entire axes
    # TODO : not sure about the returned type
[docs]    def values(self, axes=None, ascending=True) -> Union[np.ndarray, List['Array'], ArrayPositionalIndexer]:
        r"""Return a view on the values of the array along axes.

        Parameters
        ----------
        axes : int, str or Axis or tuple of them, optional
            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
            in the array).
        ascending : bool, optional
            Whether to iterate the axes in ascending order (from start to end). Defaults to True.

        Returns
        -------
        Sequence
            An object you can iterate (loop) on and index by position.

        Examples
        --------
        >>> arr = ndtest((2, 2))
        >>> arr
        a\b  b0  b1
         a0   0   1
         a1   2   3

        By default it iterates on all axes, in the order they are in the array.

        >>> for value in arr.values():
        ...     print(value)
        0
        1
        2
        3
        >>> for value in arr.values(ascending=False):
        ...     print(value)
        3
        2
        1
        0

        but you can specify another axis order:

        >>> for value in arr.values(('b', 'a')):
        ...     print(value)
        0
        2
        1
        3

        When you specify less axes than the array has, you get arrays back:

        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
        ... for value in arr.values('b'):
        ...     print(value)
        a  a0  a1
            0   2
        a  a0  a1
            1   3
        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
        ... for value in arr.values('b', ascending=False):
        ...     print(value)
        a  a0  a1
            1   3
        a  a0  a1
            0   2

        One can also access elements of the value sequence directly, instead of iterating over it. Say we want to
        retrieve the first and last values of our array, we could write:

        >>> values = arr.values()
        >>> values[0]
        0
        >>> values[-1]
        3
        """
        if axes is None:
            combined = np.ravel(self.data)
            # combined[::-1] *is* indexable
            return combined if ascending else combined[::-1]
        elif not axes:
            # empty axes list
            return [self]

        if not isinstance(axes, (tuple, list, AxisCollection)):
            axes = (axes,)

        axes = self.axes[axes]
        # move axes in front
        transposed = self.transpose(axes)
        # combine axes if necessary
        combined = transposed.combine_axes(axes, wildcard=True) if len(axes) > 1 else transposed
        # trailing .i is to support the case where axis < self.axes (ie the elements of the result are arrays)
        return combined.i if ascending else combined.i[::-1].i

    # TODO: we currently return a tuple of groups even for 1D arrays, which can be both a bad or a good thing.
    #       if we returned an NDGroup in all cases, it would solve the problem
[docs]    def items(self, axes=None, ascending=True) -> SequenceZip:
        r"""Return a (label, value) view of the array along axes.

        Parameters
        ----------
        axes : int, str or Axis or tuple of them, optional
            Axis or axes along which to iterate and in which order. Defaults to None (all axes in the order they are
            in the array).
        ascending : bool, optional
            Whether to iterate the axes in ascending order (from start to end). Defaults to True.

        Returns
        -------
        Sequence
            An object you can iterate (loop) on and index by position to get the Nth (label, value) couple along axes.

        Examples
        --------
        First, define a small helper function to make the following examples more readable.

        >>> def str_key(key):
        ...     return tuple(str(k) for k in key)

        Then create a test array:

        >>> arr = ndtest((2, 2))
        >>> arr
        a\b  b0  b1
         a0   0   1
         a1   2   3

        By default it iterates on all axes, in the order they are in the array.

        >>> for key, value in arr.items():
        ...     print(str_key(key), "->", value)
        ('a0', 'b0') -> 0
        ('a0', 'b1') -> 1
        ('a1', 'b0') -> 2
        ('a1', 'b1') -> 3
        >>> for key, value in arr.items(ascending=False):
        ...     print(str_key(key), "->", value)
        ('a1', 'b1') -> 3
        ('a1', 'b0') -> 2
        ('a0', 'b1') -> 1
        ('a0', 'b0') -> 0

        but you can specify another axis order:

        >>> for key, value in arr.items(('b', 'a')):
        ...     print(str_key(key), "->", value)
        ('b0', 'a0') -> 0
        ('b0', 'a1') -> 2
        ('b1', 'a0') -> 1
        ('b1', 'a1') -> 3

        When you specify less axes than the array has, you get arrays back:

        >>> # iterate on the "b" axis, that is return the (sub)array for each label along the "b" axis
        ... for key, value in arr.items('b'):
        ...     print(str_key(key), value, sep="\n")
        ('b0',)
        a  a0  a1
            0   2
        ('b1',)
        a  a0  a1
            1   3

        One can also access elements of the items sequence directly, instead of iterating over it. Say we want to
        retrieve the first and last key-value pairs of our array, we could write:

        >>> items = arr.items()
        >>> first_key, first_value = items[0]
        >>> str_key(first_key)
        ('a0', 'b0')
        >>> first_value
        0
        >>> last_key, last_value = items[-1]
        >>> str_key(last_key)
        ('a1', 'b1')
        >>> last_value
        3
        """
        return SequenceZip((self.keys(axes, ascending=ascending), self.values(axes, ascending=ascending)))

    @lazy_attribute
    def iflat(self) -> ArrayFlatIndicesIndexer:
        return ArrayFlatIndicesIndexer(self)
    iflat.__doc__ = ArrayFlatIndicesIndexer.__doc__

[docs]    def copy(self) -> 'Array':
        r"""Return a copy of the array."""
        return Array(self.data.copy(), axes=self.axes[:], meta=self.meta)

    # XXX: we might want to implement this using .groupby().first()
[docs]    def unique(self, axes=None, sort=False, sep='_') -> 'Array':
        r"""Return unique values (optionally along axes).

        Parameters
        ----------
        axes : axis reference (int, str, Axis) or sequence of them, optional
            Axis or axes along which to compute unique values. Defaults to None (all axes).
        sort : bool, optional
            Whether to sort unique values. Defaults to False. Sorting is not implemented yet for unique() along
            multiple axes.
        sep : str, optional
            Separator when several labels need to be combined. Defaults to '_'.

        Returns
        -------
        Array
            array with unique values

        Examples
        --------
        >>> arr = Array([[0, 2, 0, 0],
        ...              [1, 1, 1, 0]], 'a=a0,a1;b=b0..b3')
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   2   0   0
         a1   1   1   1   0

        By default unique() returns the first occurrence of each unique value in the order it appears:

        >>> arr.unique()
        a_b  a0_b0  a0_b1  a1_b0
                 0      2      1

        To sort the unique values, use the sort argument:

        >>> arr.unique(sort=True)
        a_b  a0_b0  a1_b0  a0_b1
                 0      1      2

        One can also compute unique sub-arrays (i.e. combination of values) along axes. In our example the a0=0, a1=1
        combination appears twice along the 'b' axis, so 'b2' is not returned:

        >>> arr.unique('b')
        a\b  b0  b1  b3
         a0   0   2   0
         a1   1   1   0
        >>> arr.unique('b', sort=True)
        a\b  b3  b0  b1
         a0   0   0   2
         a1   0   1   1
        """
        if axes is not None:
            axes = self.axes[axes]

        assert axes is None or isinstance(axes, (Axis, AxisCollection))

        if not isinstance(axes, AxisCollection):
            axis_idx = self.axes.index(axes) if axes is not None else None
            # axis needs np >= 1.13
            _, unq_index = np.unique(self, axis=axis_idx, return_index=True)
            if not sort:
                unq_index = np.sort(unq_index)
            if axes is None:
                return self.iflat.__getitem__(unq_index, sep=sep)
            else:
                return self[axes.i[unq_index]]
        else:
            if sort:
                raise NotImplementedError('sort=True is not implemented for unique along multiple axes')
            unq_list = []
            seen = set()
            list_append = unq_list.append
            seen_add = seen.add
            sep_join = sep.join
            axis_name = sep_join(a.name for a in axes)
            first_axis_idx = self.axes.index(axes[0])
            # XXX: use combine_axes(axes).items() instead?
            for labels, value in self.items(axes):
                hashable_value = value.data.tobytes() if isinstance(value, Array) else value
                if hashable_value not in seen:
                    list_append((sep_join(str(label) for label in labels), value))
                    seen_add(hashable_value)
            res_arr = stack(unq_list, axis_name)
            # transpose the combined axis at the position where the first of the combined axes was
            # TODO: use res_arr.transpose(res_arr.axes.move_axis(-1, first_axis_idx)) once #564 is implemented:
            #       https://github.com/larray-project/larray/issues/564
            # stack adds the stacked axes at the end
            combined_axis = res_arr.axes[-1]
            assert combined_axis.name == axis_name
            new_axes_order = res_arr.axes - combined_axis
            new_axes_order.insert(first_axis_idx, combined_axis)
            return res_arr.transpose(new_axes_order)

    @property
    def info(self) -> str:
        r"""Describe an Array (metadata + shape and labels for each axis).

        Returns
        -------
        str
            Description of the array (metadata + shape and labels for each axis).

        Examples
        --------
        >>> mat0 = Array([[2.0, 5.0], [8.0, 6.0]], "nat=BE,FO; sex=F,M")
        >>> mat0.info
        2 x 2
         nat [2]: 'BE' 'FO'
         sex [2]: 'F' 'M'
        dtype: float64
        memory used: 32 bytes
        >>> mat0.meta.title = 'test matrix'
        >>> mat0.info
        title: test matrix
        2 x 2
         nat [2]: 'BE' 'FO'
         sex [2]: 'F' 'M'
        dtype: float64
        memory used: 32 bytes
        """
        str_info = ''
        if len(self.meta):
            str_info += f'{self.meta}\n'
        str_info += f'{self.axes.info}\ndtype: {self.dtype.name}\nmemory used: {self.memory_used}'
        return ReprString(str_info)

[docs]    def ratio(self, *axes) -> 'Array':
        r"""Return an array with all values divided by the sum of values along given axes.

        Parameters
        ----------
        *axes

        Returns
        -------
        Array
            array / array.sum(axes)

        Examples
        --------
        >>> nat = Axis('nat=BE,FO')
        >>> sex = Axis('sex=M,F')
        >>> a = Array([[4, 6], [2, 8]], [nat, sex])
        >>> a
        nat\sex  M  F
             BE  4  6
             FO  2  8
        >>> a.sum()
        20
        >>> a.ratio()
        nat\sex    M    F
             BE  0.2  0.3
             FO  0.1  0.4
        >>> a.ratio('sex')
        nat\sex    M    F
             BE  0.4  0.6
             FO  0.2  0.8
        >>> a.ratio('M')
        nat\sex    M    F
             BE  1.0  1.5
             FO  1.0  4.0
        """
        # # this does not work, but I am unsure it should
        # # >>> a.sum(age[[0, 1]], age[2]) / a.sum(age)
        # >>> a.sum(([0, 1], 2)) / a.sum(age)
        # # >>> a / a.sum(([0, 1], 2))
        # >>> a.sum(X.sex)
        # >>> a.sum(X.age)
        # >>> a.sum(X.sex) / a.sum(X.age)
        # >>> a.ratio('F')
        # could mean
        # >>> a.sum('F') / a.sum(a.get_axis('F'))
        # >>> a.sum('F') / a.sum(X.sex)
        # age    0    1               2
        #      1.0  0.6  0.555555555556
        # OR (current meaning)
        # >>> a / a.sum('F')
        # age\sex               M    F
        #       0             0.0  1.0
        #       1  0.666666666667  1.0
        #       2             0.8  1.0
        # One solution is to add an argument
        # >>> a.ratio(what='F', by=X.sex)
        # age    0    1               2
        #      1.0  0.6  0.555555555556
        # >>> a.sum('F') / a.sum(X.sex)

        # >>> a.sum((age[[0, 1]], age[[1, 2]])) / a.sum(age)
        # >>> a.ratio((age[[0, 1]], age[[1, 2]]), by=age)

        # >>> a.sum((X.age[[0, 1]], X.age[[1, 2]])) / a.sum(X.age)
        # >>> a.ratio((X.age[[0, 1]], X.age[[1, 2]], by=X.age)

        # >>> lalala.sum(([0, 1], [1, 2])) / lalala.sum(X.age)
        # >>> lalala.ratio(([0, 1], [1, 2]), by=X.age)

        # >>> b = a.sum((age[[0, 1]], age[[1, 2]]))
        # >>> b
        # age\sex  M  F
        #   [0 1]  2  4
        #   [1 2]  6  8
        # >>> b / b.sum(X.age)
        # age\sex     M               F
        #   [0 1]  0.25  0.333333333333
        #   [1 2]  0.75  0.666666666667
        # >>> b / a.sum(X.age)
        # age\sex               M               F
        #   [0 1]  0.333333333333  0.444444444444
        #   [1 2]             1.0  0.888888888889
        # # >>> a.ratio([0, 1], [2])
        # # >>> a.ratio(X.age[[0, 1]], X.age[2])
        # >>> a.ratio((X.age[[0, 1]], X.age[2]))
        # nat\sex             M    F
        #      BE           0.0  1.0
        #      FO  0.6666666666  1.0
        return self / self.sum(*axes)

[docs]    def rationot0(self, *axes) -> 'Array':
        # part of the doctest is skipped because it produces a warning we do not want to have to handle within the
        # doctest and cannot properly ignore
        r"""Return an Array with values array / array.sum(axes) where the sum is not 0, 0 otherwise.

        Parameters
        ----------
        *axes

        Returns
        -------
        Array
            array / array.sum(axes)

        Examples
        --------
        >>> a = Axis('a=a0,a1')
        >>> b = Axis('b=b0,b1,b2')
        >>> arr = Array([[6, 0, 2],
        ...              [4, 0, 8]], [a, b])
        >>> arr
        a\b  b0  b1  b2
         a0   6   0   2
         a1   4   0   8
        >>> arr.sum()
        20
        >>> arr.rationot0()
        a\b   b0   b1   b2
         a0  0.3  0.0  0.1
         a1  0.2  0.0  0.4
        >>> arr.rationot0('a')
        a\b   b0   b1   b2
         a0  0.6  0.0  0.2
         a1  0.4  0.0  0.8

        for reference, the normal ratio method would produce a warning message and return:

        >>> arr.ratio('a')                                          # doctest: +SKIP
        a\b   b0   b1   b2
         a0  0.6  nan  0.2
         a1  0.4  nan  0.8
        """
        return self.divnot0(self.sum(*axes))

[docs]    def percent(self, *axes) -> 'Array':
        r"""Return an array with values given as percent of the total of all values along given axes.

        Parameters
        ----------
        *axes

        Returns
        -------
        Array
            array / array.sum(axes) * 100

        Examples
        --------
        >>> nat = Axis('nat=BE,FO')
        >>> sex = Axis('sex=M,F')
        >>> a = Array([[4, 6], [2, 8]], [nat, sex])
        >>> a
        nat\sex  M  F
             BE  4  6
             FO  2  8
        >>> a.percent()
        nat\sex     M     F
             BE  20.0  30.0
             FO  10.0  40.0
        >>> a.percent('sex')
        nat\sex     M     F
             BE  40.0  60.0
             FO  20.0  80.0
        """
        return self * 100.0 / self.sum(*axes)

    # aggregate method decorator
    def _decorate_agg_method(npfunc, nanfunc=None, commutative=False, by_agg=False, extra_kwargs=[],
                             long_name='', action_verb='perform'):
        def decorated(func) -> Union['Array', Scalar]:
            _doc_agg_method(func, by_agg, long_name, action_verb, kwargs=extra_kwargs + ['out', 'skipna', 'keepaxes'])

            @functools.wraps(func)
            def wrapper(self, *args, keepaxes=_kwarg_agg['keepaxes']['value'], skipna=_kwarg_agg['skipna']['value'],
                        out=_kwarg_agg['out']['value'], **kwargs):
                if skipna is None:
                    skipna = nanfunc is not None
                if skipna and nanfunc is None:
                    raise ValueError(f"skipna is not available for {func.__name__}")
                _npfunc = nanfunc if skipna else npfunc
                _extra_kwargs = {}
                for k in extra_kwargs:
                    _extra_kwargs[k] = kwargs.pop(k, _kwarg_agg[k]['value'])
                return self._aggregate(_npfunc, args, kwargs, by_agg=by_agg, keepaxes=keepaxes,
                                       commutative=commutative, out=out, extra_kwargs=_extra_kwargs)
            return wrapper
        return decorated

[docs]    @_decorate_agg_method(np.all, commutative=True, long_name="AND reduction")
    def all(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Test whether all selected elements evaluate to True.

        {parameters}

        Returns
        -------
        Array of bool or bool

        See Also
        --------
        Array.all_by, Array.any, Array.any_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> barr = arr < 6
        >>> barr
        a\b     b0     b1     b2     b3
         a0   True   True   True   True
         a1   True   True  False  False
         a2  False  False  False  False
         a3  False  False  False  False
        >>> barr.all()
        False
        >>> # along axis 'a'
        >>> barr.all('a')
        b     b0     b1     b2     b3
           False  False  False  False
        >>> # along axis 'b'
        >>> barr.all('b')
        a    a0     a1     a2     a3
           True  False  False  False

        Select some rows only

        >>> barr.all(['a0', 'a1'])
        b    b0    b1     b2     b3
           True  True  False  False
        >>> # or equivalently
        >>> # barr.all('a0,a1')

        Split an axis in several parts

        >>> barr.all((['a0', 'a1'], ['a2', 'a3']))
          a\b     b0     b1     b2     b3
        a0,a1   True   True  False  False
        a2,a3  False  False  False  False
        >>> # or equivalently
        >>> # barr.all('a0,a1;a2,a3')

        Same with renaming

        >>> barr.all((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b     b0     b1     b2     b3
        a01   True   True  False  False
        a23  False  False  False  False
        >>> # or equivalently
        >>> # barr.all('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.all, commutative=True, by_agg=True, long_name="AND reduction")
    def all_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Test whether all selected elements evaluate to True.

        {parameters}

        Returns
        -------
        Array of bool or bool

        See Also
        --------
        Array.all, Array.any, Array.any_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> barr = arr < 6
        >>> barr
        a\b     b0     b1     b2     b3
         a0   True   True   True   True
         a1   True   True  False  False
         a2  False  False  False  False
         a3  False  False  False  False
        >>> barr.all_by()
        False
        >>> # by axis 'a'
        >>> barr.all_by('a')
        a    a0     a1     a2     a3
           True  False  False  False
        >>> # by axis 'b'
        >>> barr.all_by('b')
        b     b0     b1     b2     b3
           False  False  False  False

        Select some rows only

        >>> barr.all_by(['a0', 'a1'])
        False
        >>> # or equivalently
        >>> # barr.all_by('a0,a1')

        Split an axis in several parts

        >>> barr.all_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
           False  False
        >>> # or equivalently
        >>> # barr.all_by('a0,a1;a2,a3')

        Same with renaming

        >>> barr.all_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a    a01    a23
           False  False
        >>> # or equivalently
        >>> # barr.all_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.any, commutative=True, long_name="OR reduction")
    def any(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Test whether any selected elements evaluate to True.

        {parameters}

        Returns
        -------
        Array of bool or bool

        See Also
        --------
        Array.any_by, Array.all, Array.all_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> barr = arr < 6
        >>> barr
        a\b     b0     b1     b2     b3
         a0   True   True   True   True
         a1   True   True  False  False
         a2  False  False  False  False
         a3  False  False  False  False
        >>> barr.any()
        True
        >>> # along axis 'a'
        >>> barr.any('a')
        b    b0    b1    b2    b3
           True  True  True  True
        >>> # along axis 'b'
        >>> barr.any('b')
        a    a0    a1     a2     a3
           True  True  False  False

        Select some rows only

        >>> barr.any(['a0', 'a1'])
        b    b0    b1    b2    b3
           True  True  True  True
        >>> # or equivalently
        >>> # barr.any('a0,a1')

        Split an axis in several parts

        >>> barr.any((['a0', 'a1'], ['a2', 'a3']))
          a\b     b0     b1     b2     b3
        a0,a1   True   True   True   True
        a2,a3  False  False  False  False
        >>> # or equivalently
        >>> # barr.any('a0,a1;a2,a3')

        Same with renaming

        >>> barr.any((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b     b0     b1     b2     b3
        a01   True   True   True   True
        a23  False  False  False  False
        >>> # or equivalently
        >>> # barr.any('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.any, commutative=True, by_agg=True, long_name="OR reduction")
    def any_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Test whether any selected elements evaluate to True.

        {parameters}

        Returns
        -------
        Array of bool or bool

        See Also
        --------
        Array.any, Array.all, Array.all_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> barr = arr < 6
        >>> barr
        a\b     b0     b1     b2     b3
         a0   True   True   True   True
         a1   True   True  False  False
         a2  False  False  False  False
         a3  False  False  False  False
        >>> barr.any_by()
        True
        >>> # by axis 'a'
        >>> barr.any_by('a')
        a    a0    a1     a2     a3
           True  True  False  False
        >>> # by axis 'b'
        >>> barr.any_by('b')
        b    b0    b1    b2    b3
           True  True  True  True

        Select some rows only

        >>> barr.any_by(['a0', 'a1'])
        True
        >>> # or equivalently
        >>> # barr.any_by('a0,a1')

        Split an axis in several parts

        >>> barr.any_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
            True  False
        >>> # or equivalently
        >>> # barr.any_by('a0,a1;a2,a3')

        Same with renaming

        >>> barr.any_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a   a01    a23
           True  False
        >>> # or equivalently
        >>> # barr.any_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

    # commutative modulo float precision errors

[docs]    @_decorate_agg_method(np.sum, np.nansum, commutative=True, extra_kwargs=['dtype'])
    def sum(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the sum of array elements along given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.sum_by, Array.prod, Array.prod_by,
        Array.cumsum, Array.cumprod

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.sum()
        120
        >>> # along axis 'a'
        >>> arr.sum('a')
        b  b0  b1  b2  b3
           24  28  32  36
        >>> # along axis 'b'
        >>> arr.sum('b')
        a  a0  a1  a2  a3
            6  22  38  54

        Select some rows only

        >>> arr.sum(['a0', 'a1'])
        b  b0  b1  b2  b3
            4   6   8  10
        >>> # or equivalently
        >>> # arr.sum('a0,a1')

        Split an axis in several parts

        >>> arr.sum((['a0', 'a1'], ['a2', 'a3']))
          a\b  b0  b1  b2  b3
        a0,a1   4   6   8  10
        a2,a3  20  22  24  26
        >>> # or equivalently
        >>> # arr.sum('a0,a1;a2,a3')

        Same with renaming

        >>> arr.sum((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b  b0  b1  b2  b3
        a01   4   6   8  10
        a23  20  22  24  26
        >>> # or equivalently
        >>> # arr.sum('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.sum, np.nansum, commutative=True, by_agg=True, extra_kwargs=['dtype'], long_name="sum")
    def sum_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the sum of array elements for the given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.sum, Array.prod, Array.prod_by,
        Array.cumsum, Array.cumprod

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.sum_by()
        120
        >>> # along axis 'a'
        >>> arr.sum_by('a')
        a  a0  a1  a2  a3
            6  22  38  54
        >>> # along axis 'b'
        >>> arr.sum_by('b')
        b  b0  b1  b2  b3
           24  28  32  36

        Select some rows only

        >>> arr.sum_by(['a0', 'a1'])
        28
        >>> # or equivalently
        >>> # arr.sum_by('a0,a1')

        Split an axis in several parts

        >>> arr.sum_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
              28     92
        >>> # or equivalently
        >>> # arr.sum_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.sum_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01  a23
            28   92
        >>> # or equivalently
        >>> # arr.sum_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

    # nanprod needs numpy 1.10
[docs]    @_decorate_agg_method(np.prod, np_nanprod, commutative=True, extra_kwargs=['dtype'], long_name="product")
    def prod(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the product of array elements along given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.prod_by, Array.sum, Array.sum_by,
        Array.cumsum, Array.cumprod

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.prod()
        0
        >>> # along axis 'a'
        >>> arr.prod('a')
        b  b0   b1    b2    b3
            0  585  1680  3465
        >>> # along axis 'b'
        >>> arr.prod('b')
        a  a0   a1    a2     a3
            0  840  7920  32760

        Select some rows only

        >>> arr.prod(['a0', 'a1'])
        b  b0  b1  b2  b3
            0   5  12  21
        >>> # or equivalently
        >>> # arr.prod('a0,a1')

        Split an axis in several parts

        >>> arr.prod((['a0', 'a1'], ['a2', 'a3']))
          a\b  b0   b1   b2   b3
        a0,a1   0    5   12   21
        a2,a3  96  117  140  165
        >>> # or equivalently
        >>> # arr.prod('a0,a1;a2,a3')

        Same with renaming

        >>> arr.prod((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b  b0   b1   b2   b3
        a01   0    5   12   21
        a23  96  117  140  165
        >>> # or equivalently
        >>> # arr.prod('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.prod, np_nanprod, commutative=True, by_agg=True, extra_kwargs=['dtype'],
                          long_name="product")
    def prod_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the product of array elements for the given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.prod, Array.sum, Array.sum_by,
        Array.cumsum, Array.cumprod

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.prod_by()
        0
        >>> # along axis 'a'
        >>> arr.prod_by('a')
        a  a0   a1    a2     a3
            0  840  7920  32760
        >>> # along axis 'b'
        >>> arr.prod_by('b')
        b  b0   b1    b2    b3
            0  585  1680  3465

        Select some rows only

        >>> arr.prod_by(['a0', 'a1'])
        0
        >>> # or equivalently
        >>> # arr.prod_by('a0,a1')

        Split an axis in several parts

        >>> arr.prod_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1      a2,a3
               0  259459200
        >>> # or equivalently
        >>> # arr.prod_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.prod_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01        a23
             0  259459200
        >>> # or equivalently
        >>> # arr.prod_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.min, np.nanmin, commutative=True, long_name="minimum", action_verb="search")
    def min(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Get minimum of array elements along given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.min_by, Array.max, Array.max_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.min()
        0
        >>> # along axis 'a'
        >>> arr.min('a')
        b  b0  b1  b2  b3
            0   1   2   3
        >>> # along axis 'b'
        >>> arr.min('b')
        a  a0  a1  a2  a3
            0   4   8  12

        Select some rows only

        >>> arr.min(['a0', 'a1'])
        b  b0  b1  b2  b3
            0   1   2   3
        >>> # or equivalently
        >>> # arr.min('a0,a1')

        Split an axis in several parts

        >>> arr.min((['a0', 'a1'], ['a2', 'a3']))
          a\b  b0  b1  b2  b3
        a0,a1   0   1   2   3
        a2,a3   8   9  10  11
        >>> # or equivalently
        >>> # arr.min('a0,a1;a2,a3')

        Same with renaming

        >>> arr.min((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b  b0  b1  b2  b3
        a01   0   1   2   3
        a23   8   9  10  11
        >>> # or equivalently
        >>> # arr.min('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.min, np.nanmin, commutative=True, by_agg=True, long_name="minimum", action_verb="search")
    def min_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Get minimum of array elements for the given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.min, Array.max, Array.max_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.min_by()
        0
        >>> # along axis 'a'
        >>> arr.min_by('a')
        a  a0  a1  a2  a3
            0   4   8  12
        >>> # along axis 'b'
        >>> arr.min_by('b')
        b  b0  b1  b2  b3
            0   1   2   3

        Select some rows only

        >>> arr.min_by(['a0', 'a1'])
        0
        >>> # or equivalently
        >>> # arr.min_by('a0,a1')

        Split an axis in several parts

        >>> arr.min_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
               0      8
        >>> # or equivalently
        >>> # arr.min_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.min_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01  a23
             0    8
        >>> # or equivalently
        >>> # arr.min_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.max, np.nanmax, commutative=True, long_name="maximum", action_verb="search")
    def max(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Get maximum of array elements along given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.max_by, Array.min, Array.min_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.max()
        15
        >>> # along axis 'a'
        >>> arr.max('a')
        b  b0  b1  b2  b3
           12  13  14  15
        >>> # along axis 'b'
        >>> arr.max('b')
        a  a0  a1  a2  a3
            3   7  11  15

        Select some rows only

        >>> arr.max(['a0', 'a1'])
        b  b0  b1  b2  b3
            4   5   6   7
        >>> # or equivalently
        >>> # arr.max('a0,a1')

        Split an axis in several parts

        >>> arr.max((['a0', 'a1'], ['a2', 'a3']))
          a\b  b0  b1  b2  b3
        a0,a1   4   5   6   7
        a2,a3  12  13  14  15
        >>> # or equivalently
        >>> # arr.max('a0,a1;a2,a3')

        Same with renaming

        >>> arr.max((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b  b0  b1  b2  b3
        a01   4   5   6   7
        a23  12  13  14  15
        >>> # or equivalently
        >>> # arr.max('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.max, np.nanmax, commutative=True, by_agg=True, long_name="maximum", action_verb="search")
    def max_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Get maximum of array elements for the given axes/groups.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.max, Array.min, Array.min_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.max_by()
        15
        >>> # along axis 'a'
        >>> arr.max_by('a')
        a  a0  a1  a2  a3
            3   7  11  15
        >>> # along axis 'b'
        >>> arr.max_by('b')
        b  b0  b1  b2  b3
           12  13  14  15

        Select some rows only

        >>> arr.max_by(['a0', 'a1'])
        7
        >>> # or equivalently
        >>> # arr.max_by('a0,a1')

        Split an axis in several parts

        >>> arr.max_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
               7     15
        >>> # or equivalently
        >>> # arr.max_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.max_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01  a23
             7   15
        >>> # or equivalently
        >>> # arr.max_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.mean, np.nanmean, commutative=True, extra_kwargs=['dtype'])
    def mean(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the arithmetic mean.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.mean_by, Array.median, Array.median_by,
        Array.var, Array.var_by, Array.std, Array.std_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.mean()
        7.5
        >>> # along axis 'a'
        >>> arr.mean('a')
        b   b0   b1   b2   b3
           6.0  7.0  8.0  9.0
        >>> # along axis 'b'
        >>> arr.mean('b')
        a   a0   a1   a2    a3
           1.5  5.5  9.5  13.5

        Select some rows only

        >>> arr.mean(['a0', 'a1'])
        b   b0   b1   b2   b3
           2.0  3.0  4.0  5.0
        >>> # or equivalently
        >>> # arr.mean('a0,a1')

        Split an axis in several parts

        >>> arr.mean((['a0', 'a1'], ['a2', 'a3']))
          a\b    b0    b1    b2    b3
        a0,a1   2.0   3.0   4.0   5.0
        a2,a3  10.0  11.0  12.0  13.0
        >>> # or equivalently
        >>> # arr.mean('a0,a1;a2,a3')

        Same with renaming

        >>> arr.mean((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b    b0    b1    b2    b3
        a01   2.0   3.0   4.0   5.0
        a23  10.0  11.0  12.0  13.0
        >>> # or equivalently
        >>> # arr.mean('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.mean, np.nanmean, commutative=True, by_agg=True, extra_kwargs=['dtype'], long_name="mean")
    def mean_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the arithmetic mean.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.mean, Array.median, Array.median_by,
        Array.var, Array.var_by, Array.std, Array.std_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.mean()
        7.5
        >>> # along axis 'a'
        >>> arr.mean_by('a')
        a   a0   a1   a2    a3
           1.5  5.5  9.5  13.5
        >>> # along axis 'b'
        >>> arr.mean_by('b')
        b   b0   b1   b2   b3
           6.0  7.0  8.0  9.0

        Select some rows only

        >>> arr.mean_by(['a0', 'a1'])
        3.5
        >>> # or equivalently
        >>> # arr.mean_by('a0,a1')

        Split an axis in several parts

        >>> arr.mean_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
             3.5   11.5
        >>> # or equivalently
        >>> # arr.mean_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.mean_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01   a23
           3.5  11.5
        >>> # or equivalently
        >>> # arr.mean_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.median, np.nanmedian, commutative=True)
    def median(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the arithmetic median.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.median_by, Array.mean, Array.mean_by,
        Array.var, Array.var_by, Array.std, Array.std_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr[:,:] = [[10, 7, 5, 9],
        ...             [5, 8, 3, 7],
        ...             [6, 2, 0, 9],
        ...             [9, 10, 5, 6]]
        >>> arr
        a\b  b0  b1  b2  b3
         a0  10   7   5   9
         a1   5   8   3   7
         a2   6   2   0   9
         a3   9  10   5   6
        >>> arr.median()
        6.5
        >>> # along axis 'a'
        >>> arr.median('a')
        b   b0   b1   b2   b3
           7.5  7.5  4.0  8.0
        >>> # along axis 'b'
        >>> arr.median('b')
        a   a0   a1   a2   a3
           8.0  6.0  4.0  7.5

        Select some rows only

        >>> arr.median(['a0', 'a1'])
        b   b0   b1   b2   b3
           7.5  7.5  4.0  8.0
        >>> # or equivalently
        >>> # arr.median('a0,a1')

        Split an axis in several parts

        >>> arr.median((['a0', 'a1'], ['a2', 'a3']))
          a\b   b0   b1   b2   b3
        a0,a1  7.5  7.5  4.0  8.0
        a2,a3  7.5  6.0  2.5  7.5
        >>> # or equivalently
        >>> # arr.median('a0,a1;a2,a3')

        Same with renaming

        >>> arr.median((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b   b0   b1   b2   b3
        a01  7.5  7.5  4.0  8.0
        a23  7.5  6.0  2.5  7.5
        >>> # or equivalently
        >>> # arr.median('a0,a1>>a01;a2,a3>>a23')
        """
        pass

[docs]    @_decorate_agg_method(np.median, np.nanmedian, commutative=True, by_agg=True, long_name="mediane")
    def median_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the arithmetic median.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.median, Array.mean, Array.mean_by,
        Array.var, Array.var_by, Array.std, Array.std_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr[:,:] = [[10, 7, 5, 9],
        ...             [5, 8, 3, 7],
        ...             [6, 2, 0, 9],
        ...             [9, 10, 5, 6]]
        >>> arr
        a\b  b0  b1  b2  b3
         a0  10   7   5   9
         a1   5   8   3   7
         a2   6   2   0   9
         a3   9  10   5   6
        >>> arr.median_by()
        6.5
        >>> # along axis 'a'
        >>> arr.median_by('a')
        a   a0   a1   a2   a3
           8.0  6.0  4.0  7.5
        >>> # along axis 'b'
        >>> arr.median_by('b')
        b   b0   b1   b2   b3
           7.5  7.5  4.0  8.0

        Select some rows only

        >>> arr.median_by(['a0', 'a1'])
        7.0
        >>> # or equivalently
        >>> # arr.median_by('a0,a1')

        Split an axis in several parts

        >>> arr.median_by((['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
             7.0   5.75
        >>> # or equivalently
        >>> # arr.median_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.median_by((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a  a01   a23
           7.0  5.75
        >>> # or equivalently
        >>> # arr.median_by('a0,a1>>a01;a2,a3>>a23')
        """
        pass

    # XXX: for performance reasons, we should use the fact that the underlying numpy function handles multiple
    #      percentiles in one call. This is easy to implement in _axis_aggregate() but not in _group_aggregate()
    #      since in this case np.percentile() may be called several times.
    # percentile needs an explicit method because it has not the same
    # signature as other aggregate functions (extra argument)
[docs]    @deprecate_kwarg('interpolation', 'method')
    def percentile(self, q, *args,
                   out=_kwarg_agg['out']['value'],
                   method=_kwarg_agg['method']['value'],
                   skipna=_kwarg_agg['skipna']['value'],
                   keepaxes=_kwarg_agg['keepaxes']['value'],
                   **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the qth percentile of the data along the specified axis.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.percentile_by, Array.mean, Array.mean_by,
        Array.median, Array.median_by, Array.var, Array.var_by,
        Array.std, Array.std_by

        {percentile_notes}

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.percentile(25)
        3.75
        >>> # along axis 'a'
        >>> arr.percentile(25, 'a')
        b   b0   b1   b2   b3
           3.0  4.0  5.0  6.0
        >>> # along axis 'b'
        >>> arr.percentile(25, 'b')
        a    a0    a1    a2     a3
           0.75  4.75  8.75  12.75
        >>> # several percentile values
        >>> arr.percentile([25, 50, 75], 'b')
        percentile\a    a0    a1     a2     a3
                  25  0.75  4.75   8.75  12.75
                  50   1.5   5.5    9.5   13.5
                  75  2.25  6.25  10.25  14.25

        Select some rows only

        >>> arr.percentile(25, ['a0', 'a1'])
        b   b0   b1   b2   b3
           1.0  2.0  3.0  4.0
        >>> # or equivalently
        >>> # arr.percentile(25, 'a0,a1')

        Split an axis in several parts

        >>> arr.percentile(25, (['a0', 'a1'], ['a2', 'a3']))
          a\b   b0    b1    b2    b3
        a0,a1  1.0   2.0   3.0   4.0
        a2,a3  9.0  10.0  11.0  12.0
        >>> # or equivalently
        >>> # arr.percentile(25, 'a0,a1;a2,a3')

        Same with renaming

        >>> arr.percentile(25, (X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b   b0    b1    b2    b3
        a01  1.0   2.0   3.0   4.0
        a23  9.0  10.0  11.0  12.0
        >>> # or equivalently
        >>> # arr.percentile(25, 'a0,a1>>a01;a2,a3>>a23')

        References
        ----------
        .. [1] R. J. Hyndman and Y. Fan,
           "Sample quantiles in statistical packages,"
           The American Statistician, 50(4), pp. 361-365, 1996
        """
        if skipna is None:
            skipna = True
        _npfunc = np.nanpercentile if skipna else np.percentile
        def compute_percentile(q):
            extra_kwargs = {'q': q}
            if method != 'linear':
                extra_kwargs['method'] = method
            return self._aggregate(_npfunc, args, kwargs, keepaxes=keepaxes, commutative=True,
                                   extra_kwargs=extra_kwargs)
        if isinstance(q, (list, tuple)):
            res = stack({v: compute_percentile(v) for v in q}, 'percentile')
            return res.transpose()
        else:
            return compute_percentile(q)

    _doc_agg_method(percentile, False, "qth percentile", extra_args=['q'],
                    kwargs=['out', 'method', 'skipna', 'keepaxes'])

[docs]    @deprecate_kwarg('interpolation', 'method')
    def percentile_by(self, q, *args,
                      out=_kwarg_agg['out']['value'],
                      method=_kwarg_agg['method']['value'],
                      skipna=_kwarg_agg['skipna']['value'],
                      keepaxes=_kwarg_agg['keepaxes']['value'],
                      **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the qth percentile of the data for the specified axis.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.percentile, Array.mean, Array.mean_by,
        Array.median, Array.median_by, Array.var, Array.var_by,
        Array.std, Array.std_by

        {percentile_notes}

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.percentile_by(25)
        3.75
        >>> # along axis 'a'
        >>> arr.percentile_by(25, 'a')
        a    a0    a1    a2     a3
           0.75  4.75  8.75  12.75
        >>> # along axis 'b'
        >>> arr.percentile_by(25, 'b')
        b   b0   b1   b2   b3
           3.0  4.0  5.0  6.0
        >>> # several percentile values
        >>> arr.percentile_by([25, 50, 75], 'b')
        percentile\b   b0    b1    b2    b3
                  25  3.0   4.0   5.0   6.0
                  50  6.0   7.0   8.0   9.0
                  75  9.0  10.0  11.0  12.0

        Select some rows only

        >>> arr.percentile_by(25, ['a0', 'a1'])
        1.75
        >>> # or equivalently
        >>> # arr.percentile_by('a0,a1')

        Split an axis in several parts

        >>> arr.percentile_by(25, (['a0', 'a1'], ['a2', 'a3']))
        a  a0,a1  a2,a3
            1.75   9.75
        >>> # or equivalently
        >>> # arr.percentile_by('a0,a1;a2,a3')

        Same with renaming

        >>> arr.percentile_by(25, (X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a   a01   a23
           1.75  9.75
        >>> # or equivalently
        >>> # arr.percentile_by('a0,a1>>a01;a2,a3>>a23')

        References
        ----------
        .. [1] R. J. Hyndman and Y. Fan,
           "Sample quantiles in statistical packages,"
           The American Statistician, 50(4), pp. 361-365, 1996
        """
        if skipna is None:
            skipna = True
        _npfunc = np.nanpercentile if skipna else np.percentile
        def compute_percentile(q):
            extra_kwargs = {'q': q}
            if method != 'linear':
                extra_kwargs['method'] = method
            return self._aggregate(_npfunc, args, kwargs, by_agg=True, keepaxes=keepaxes, commutative=True,
                                   extra_kwargs=extra_kwargs)
        if isinstance(q, (list, tuple)):
            res = stack({v: compute_percentile(v) for v in q}, 'percentile')
            return res.transpose()
        else:
            return compute_percentile(q)

    _doc_agg_method(percentile_by, True, "qth percentile", extra_args=['q'],
                    kwargs=['out', 'method', 'skipna', 'keepaxes'])

    # not commutative

[docs]    def ptp(self, *args, out=_kwarg_agg['out']['value'], **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Return the range of values (maximum - minimum).

        The name of the function comes from the acronym for `peak to peak`.

        {parameters}

        Returns
        -------
        Array or scalar

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.ptp()
        15
        >>> # along axis 'a'
        >>> arr.ptp('a')
        b  b0  b1  b2  b3
           12  12  12  12
        >>> # along axis 'b'
        >>> arr.ptp('b')
        a  a0  a1  a2  a3
            3   3   3   3

        Select some rows only

        >>> arr.ptp(['a0', 'a1'])
        b  b0  b1  b2  b3
            4   4   4   4
        >>> # or equivalently
        >>> # arr.ptp('a0,a1')

        Split an axis in several parts

        >>> arr.ptp((['a0', 'a1'], ['a2', 'a3']))
          a\b  b0  b1  b2  b3
        a0,a1   4   4   4   4
        a2,a3   4   4   4   4
        >>> # or equivalently
        >>> # arr.ptp('a0,a1;a2,a3')

        Same with renaming

        >>> arr.ptp((X.a['a0', 'a1'] >> 'a01', X.a['a2', 'a3'] >> 'a23'))
        a\b  b0  b1  b2  b3
        a01   4   4   4   4
        a23   4   4   4   4
        >>> # or equivalently
        >>> # arr.ptp('a0,a1>>a01;a2,a3>>a23')
        """
        return self._aggregate(np.ptp, args, kwargs, out=out)

    _doc_agg_method(ptp, by=False, kwargs=['out'])

[docs]    @_decorate_agg_method(np.var, np.nanvar, extra_kwargs=['dtype', 'ddof'], long_name="variance")
    def var(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the unbiased variance.

        Normalized by N-1 by default. This can be changed using the ddof argument.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.var_by, Array.std, Array.std_by,
        Array.mean, Array.mean_by, Array.median, Array.median_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((2, 8), dtype=float)
        >>> arr[:,:] = [[0, 3, 5, 6, 4, 2, 1, 3],
        ...             [7, 3, 2, 5, 8, 5, 6, 4]]
        >>> arr
        a\b   b0   b1   b2   b3   b4   b5   b6   b7
         a0  0.0  3.0  5.0  6.0  4.0  2.0  1.0  3.0
         a1  7.0  3.0  2.0  5.0  8.0  5.0  6.0  4.0
        >>> arr.var()
        4.7999999999999998
        >>> # along axis 'b'
        >>> arr.var('b')
        a   a0   a1
           4.0  4.0

        Select some columns only

        >>> arr.var(['b0', 'b1', 'b3'])
        a   a0   a1
           9.0  4.0
        >>> # or equivalently
        >>> # arr.var('b0,b1,b3')

        Split an axis in several parts

        >>> arr.var((['b0', 'b1', 'b3'], 'b5:'))
        a\b  b0,b1,b3  b5:
         a0       9.0  1.0
         a1       4.0  1.0
        >>> # or equivalently
        >>> # arr.var('b0,b1,b3;b5:')

        Same with renaming

        >>> arr.var((X.b['b0', 'b1', 'b3'] >> 'b013', X.b['b5:'] >> 'b567'))
        a\b  b013  b567
         a0   9.0   1.0
         a1   4.0   1.0
        >>> # or equivalently
        >>> # arr.var('b0,b1,b3>>b013;b5:>>b567')
        """
        pass

[docs]    @_decorate_agg_method(np.var, np.nanvar, by_agg=True, extra_kwargs=['dtype', 'ddof'], long_name="variance")
    def var_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the unbiased variance.

        Normalized by N-1 by default. This can be changed using the ddof argument.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.var, Array.std, Array.std_by,
        Array.mean, Array.mean_by, Array.median, Array.median_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((2, 8), dtype=float)
        >>> arr[:,:] = [[0, 3, 5, 6, 4, 2, 1, 3],
        ...             [7, 3, 2, 5, 8, 5, 6, 4]]
        >>> arr
        a\b   b0   b1   b2   b3   b4   b5   b6   b7
         a0  0.0  3.0  5.0  6.0  4.0  2.0  1.0  3.0
         a1  7.0  3.0  2.0  5.0  8.0  5.0  6.0  4.0
        >>> arr.var_by()
        4.7999999999999998
        >>> # along axis 'a'
        >>> arr.var_by('a')
        a   a0   a1
           4.0  4.0

        Select some columns only

        >>> arr.var_by('a', ['b0','b1','b3'])
        a   a0   a1
           9.0  4.0
        >>> # or equivalently
        >>> # arr.var_by('a','b0,b1,b3')

        Split an axis in several parts

        >>> arr.var_by('a', (['b0', 'b1', 'b3'], 'b5:'))
        a\b  b0,b1,b3  b5:
         a0       9.0  1.0
         a1       4.0  1.0
        >>> # or equivalently
        >>> # arr.var_by('a','b0,b1,b3;b5:')

        Same with renaming

        >>> arr.var_by('a', (X.b['b0', 'b1', 'b3'] >> 'b013', X.b['b5:'] >> 'b567'))
        a\b  b013  b567
         a0   9.0   1.0
         a1   4.0   1.0
        >>> # or equivalently
        >>> # arr.var_by('a','b0,b1,b3>>b013;b5:>>b567')
        """
        pass

[docs]    @_decorate_agg_method(np.std, np.nanstd, extra_kwargs=['dtype', 'ddof'], long_name="standard deviation")
    def std(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the sample standard deviation.

        Normalized by N-1 by default. This can be changed using the ddof argument.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.std_by, Array.var, Array.var_by,
        Array.mean, Array.mean_by, Array.median, Array.median_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((2, 8), dtype=float)
        >>> arr[:,:] = [[0, 3, 5, 6, 4, 2, 1, 3],
        ...             [7, 3, 2, 5, 8, 5, 6, 4]]
        >>> arr
        a\b   b0   b1   b2   b3   b4   b5   b6   b7
         a0  0.0  3.0  5.0  6.0  4.0  2.0  1.0  3.0
         a1  7.0  3.0  2.0  5.0  8.0  5.0  6.0  4.0
        >>> arr.std()
        2.1908902300206643
        >>> # along axis 'b'
        >>> arr.std('b')
        a   a0   a1
           2.0  2.0

        Select some columns only

        >>> arr.std(['b0', 'b1', 'b3'])
        a   a0   a1
           3.0  2.0
        >>> # or equivalently
        >>> # arr.std('b0,b1,b3')

        Split an axis in several parts

        >>> arr.std((['b0', 'b1', 'b3'], 'b5:'))
        a\b  b0,b1,b3  b5:
         a0       3.0  1.0
         a1       2.0  1.0
        >>> # or equivalently
        >>> # arr.std('b0,b1,b3;b5:')

        Same with renaming

        >>> arr.std((X.b['b0', 'b1', 'b3'] >> 'b013', X.b['b5:'] >> 'b567'))
        a\b  b013  b567
         a0   3.0   1.0
         a1   2.0   1.0
        >>> # or equivalently
        >>> # arr.std('b0,b1,b3>>b013;b5:>>b567')
        """
        pass

[docs]    @_decorate_agg_method(np.std, np.nanstd, by_agg=True, extra_kwargs=['dtype', 'ddof'],
                          long_name="standard deviation")
    def std_by(self, *args, **kwargs) -> Union['Array', Scalar]:
        r"""{signature}

        Compute the sample standard deviation.

        Normalized by N-1 by default. This can be changed using the ddof argument.

        {parameters}

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.std_by, Array.var, Array.var_by,
        Array.mean, Array.mean_by, Array.median, Array.median_by,
        Array.percentile, Array.percentile_by

        Examples
        --------
        >>> arr = ndtest((2, 8), dtype=float)
        >>> arr[:,:] = [[0, 3, 5, 6, 4, 2, 1, 3],
        ...             [7, 3, 2, 5, 8, 5, 6, 4]]
        >>> arr
        a\b   b0   b1   b2   b3   b4   b5   b6   b7
         a0  0.0  3.0  5.0  6.0  4.0  2.0  1.0  3.0
         a1  7.0  3.0  2.0  5.0  8.0  5.0  6.0  4.0
        >>> arr.std_by()
        2.1908902300206643
        >>> # along axis 'a'
        >>> arr.std_by('a')
        a   a0   a1
           2.0  2.0

        Select some columns only

        >>> arr.std_by('a', ['b0','b1','b3'])
        a   a0   a1
           3.0  2.0
        >>> # or equivalently
        >>> # arr.std_by('a','b0,b1,b3')

        Split an axis in several parts

        >>> arr.std_by('a', (['b0', 'b1', 'b3'], 'b5:'))
        a\b  b0,b1,b3  b5:
         a0       3.0  1.0
         a1       2.0  1.0
        >>> # or equivalently
        >>> # arr.std_by('a','b0,b1,b3;b5:')

        Same with renaming

        >>> arr.std_by('a', (X.b['b0', 'b1', 'b3'] >> 'b013', X.b['b5:'] >> 'b567'))
        a\b  b013  b567
         a0   3.0   1.0
         a1   2.0   1.0
        >>> # or equivalently
        >>> # arr.std_by('a','b0,b1,b3>>b013;b5:>>b567')
        """
        pass

    # cumulative aggregates
[docs]    def cumsum(self, axis=-1) -> Union['Array', Scalar]:
        r"""
        Return the cumulative sum of array elements along an axis.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to perform the cumulative sum.
            If given as position, it can be a negative integer, in which case it counts from the last to the first axis.
            By default, the cumulative sum is performed along the last axis.

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.cumprod, Array.sum, Array.sum_by,
        Array.prod, Array.prod_by

        Notes
        -----
        Cumulative aggregation functions accept only one axis

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.cumsum()
        a\b  b0  b1  b2  b3
         a0   0   1   3   6
         a1   4   9  15  22
         a2   8  17  27  38
         a3  12  25  39  54
        >>> arr.cumsum('a')
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   6   8  10
         a2  12  15  18  21
         a3  24  28  32  36
        """
        return self._cum_aggregate(np.cumsum, axis)

[docs]    def cumprod(self, axis=-1) -> Union['Array', Scalar]:
        r"""
        Return the cumulative product of array elements.

        Parameters
        ----------
        axis : int or str or Axis, optional
            Axis along which to perform the cumulative product.
            If given as position, it can be a negative integer, in which case it counts from the last to the first axis.
            By default, the cumulative product is performed along the last axis.

        Returns
        -------
        Array or scalar

        See Also
        --------
        Array.cumsum, Array.sum, Array.sum_by,
        Array.prod, Array.prod_by

        Notes
        -----
        Cumulative aggregation functions accept only one axis.

        Examples
        --------
        >>> arr = ndtest((4, 4))
        >>> arr
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
         a2   8   9  10  11
         a3  12  13  14  15
        >>> arr.cumprod()
        a\b  b0   b1    b2     b3
         a0   0    0     0      0
         a1   4   20   120    840
         a2   8   72   720   7920
         a3  12  156  2184  32760
        >>> arr.cumprod('a')
        a\b  b0   b1    b2    b3
         a0   0    1     2     3
         a1   0    5    12    21
         a2   0   45   120   231
         a3   0  585  1680  3465
        """
        return self._cum_aggregate(np.cumprod, axis)

    # element-wise method factory
    def _binop(opname):
        fullname = f'__{opname}__'
        super_method = getattr(np.ndarray, fullname)

        def opmethod(self, other) -> 'Array':
            if isinstance(other, ExprNode):
                other = other.evaluate(self.axes)

            # XXX: unsure what happens for non scalar Groups.
            #      we might want to be more general than this and .eval all Groups?
            #      or (and I think it's better) define __larray__ on Group
            #      so that a non scalar Group acts like an Axis in this situation.
            if isinstance(other, Group) and np.isscalar(other.key):
                other = other.eval()

            # we could pass scalars through asarray too but it is too costly performance-wise for only suppressing one
            # isscalar test and an if statement.
            # TODO: ndarray should probably be converted to larrays too because that would harmonize broadcasting rules,
            #       but it makes some tests fail for some reason.
            if isinstance(other, (list, Axis)):
                other = asarray(other)

            if isinstance(other, Array):
                # TODO: first test if it is not already broadcastable
                if self.axes == other.axes:
                    self_data = self.data
                    other_data = other.data
                    res_axes = self.axes
                else:
                    (self_data, other_data), res_axes = raw_broadcastable((self, other))
            # We need to check for None explicitly because we consider None as a valid scalar, while numpy does not.
            # i.e. we consider "arr == None" as valid code
            elif isinstance(other, np.ndarray) or np.isscalar(other) or other is None:
                self_data, other_data = self.data, other
                res_axes = self.axes
            else:
                return NotImplemented
            return Array(super_method(self_data, other_data), res_axes)
        opmethod.__name__ = fullname
        return opmethod

    __lt__ = _binop('lt')
    __le__ = _binop('le')
    __eq__ = _binop('eq')
    __ne__ = _binop('ne')
    __gt__ = _binop('gt')
    __ge__ = _binop('ge')
    __add__ = _binop('add')
    __radd__ = _binop('radd')
    __sub__ = _binop('sub')
    __rsub__ = _binop('rsub')
    __mul__ = _binop('mul')
    __rmul__ = _binop('rmul')
    # div and rdiv are not longer used on Python3+
    __truediv__ = _binop('truediv')
    __rtruediv__ = _binop('rtruediv')
    __floordiv__ = _binop('floordiv')
    __rfloordiv__ = _binop('rfloordiv')
    __mod__ = _binop('mod')
    __rmod__ = _binop('rmod')
    __divmod__ = _binop('divmod')
    __rdivmod__ = _binop('rdivmod')
    __pow__ = _binop('pow')
    __rpow__ = _binop('rpow')
    __lshift__ = _binop('lshift')
    __rlshift__ = _binop('rlshift')
    __rshift__ = _binop('rshift')
    __rrshift__ = _binop('rrshift')
    __and__ = _binop('and')
    __rand__ = _binop('rand')
    __xor__ = _binop('xor')
    __rxor__ = _binop('rxor')
    __or__ = _binop('or')
    __ror__ = _binop('ror')

    def __matmul__(self, other) -> 'Array':
        r"""
        Override operator @ for matrix multiplication.

        Notes
        -----
        Only available with Python >= 3.5

        Examples
        --------
        >>> arr1d = ndtest(3)
        >>> arr1d
        a  a0  a1  a2
            0   1   2
        >>> arr2d = ndtest((3, 3))
        >>> arr2d
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
         a2   6   7   8
        >>> arr1d @ arr1d # doctest: +SKIP
        5
        >>> arr1d @ arr2d # doctest: +SKIP
        b  b0  b1  b2
           15  18  21
        >>> arr2d @ arr1d # doctest: +SKIP
        a  a0  a1  a2
            5  14  23
        >>> arr3d = ndtest('c=c0..c2;d=d0..d2;e=e0..e2')
        >>> arr1d @ arr3d # doctest: +SKIP
        c\e  e0  e1  e2
         c0  15  18  21
         c1  42  45  48
         c2  69  72  75
        >>> arr3d @ arr1d # doctest: +SKIP
        c\d  d0  d1  d2
         c0   5  14  23
         c1  32  41  50
         c2  59  68  77
        >>> arr3d @ arr3d # doctest: +SKIP
         c  d\e    e0    e1    e2
        c0   d0    15    18    21
        c0   d1    42    54    66
        c0   d2    69    90   111
        c1   d0   366   396   426
        c1   d1   474   513   552
        c1   d2   582   630   678
        c2   d0  1203  1260  1317
        c2   d1  1392  1458  1524
        c2   d2  1581  1656  1731
        """
        current = self[:]
        axes = self.axes
        if not isinstance(other, (Array, np.ndarray)):
            raise NotImplementedError(f"matrix multiplication not implemented for {type(other)}")
        if isinstance(other, np.ndarray):
            other = Array(other)
        other_axes = other.axes

        combined_axes = axes[:-2] + other_axes[:-2]
        if self.ndim > 2 and other.ndim > 2:
            current = current.expand(combined_axes).transpose(combined_axes)
            other = other.expand(combined_axes).transpose(combined_axes)

        # XXX : What doc of Numpy matmul says:
        # The behavior depends on the arguments in the following way:
        # * If both arguments are 2-D they are multiplied like conventional matrices.
        # * If either argument is N-D, N > 2, it is treated as a stack of matrices residing in the last two indexes
        #   and broadcast accordingly.
        # * If the first argument is 1-D, it is promoted to a matrix by prepending a 1 to its dimensions. After matrix
        #   multiplication the prepended 1 is removed.
        # * If the second argument is 1-D, it is promoted to a matrix by appending a 1 to its dimensions. After matrix
        #   multiplication the appended 1 is removed.
        res_data = current.data.__matmul__(other.data)

        res_axes = list(combined_axes)
        if self.ndim > 1:
            res_axes += [axes[-2]]
        if other.ndim > 1:
            res_axes += [other_axes[-1].copy()]
        if res_axes:
            return Array(res_data, res_axes)
        else:
            return res_data

    def __rmatmul__(self, other) -> 'Array':
        if isinstance(other, np.ndarray):
            other = Array(other)
        if not isinstance(other, Array):
            raise NotImplementedError(f"matrix multiplication not implemented for {type(other)}")
        return other.__matmul__(self)

    # element-wise method factory
    def _unaryop(opname):
        fullname = f'__{opname}__'
        super_method = getattr(np.ndarray, fullname)

        def opmethod(self) -> 'Array':
            return Array(super_method(self.data), self.axes)
        opmethod.__name__ = fullname
        return opmethod

    # unary ops do not need broadcasting so do not need to be overridden
    __neg__ = _unaryop('neg')
    __pos__ = _unaryop('pos')
    __abs__ = _unaryop('abs')
    __invert__ = _unaryop('invert')

    def __round__(self, n=0):
        # XXX: use the ufuncs.round instead?
        return np.round(self, decimals=n)

    def __index__(self):
        return self.data.__index__()

    def __int__(self):
        return self.data.__int__()

    def __float__(self):
        return self.data.__float__()

[docs]    @deprecate_kwarg('nan_equals', 'nans_equal')
    def equals(self, other, rtol=0, atol=0, nans_equal=False, check_axes=False) -> bool:
        r"""
        Compare this array with another array and returns True if they have the same axes and elements,
        False otherwise.

        Parameters
        ----------
        other : Array-like
            Input array. asarray() is used on a non-Array input.
        rtol : float or int, optional
            The relative tolerance parameter (see Notes). Defaults to 0.
        atol : float or int, optional
            The absolute tolerance parameter (see Notes). Defaults to 0.
        nans_equal : boolean, optional
            Whether to consider NaN values at the same positions in the two arrays as equal.
            By default, an array containing NaN values is never equal to another array, even if that other array
            also contains NaN values at the same positions. The reason is that a NaN value is different from
            *anything*, including itself. Defaults to False.
        check_axes : boolean, optional
            Whether to check that the set of axes and their order is the same on both sides. Defaults to False.
            If False, two arrays with compatible axes (and the same data) will compare equal, even if some axis is
            missing on either side or if the axes are in a different order.

        Returns
        -------
        bool
            Return True if this array is equal to other.

        See Also
        --------
        Array.eq, Array.allclose

        Notes
        -----
        For finite values, equals uses the following equation to test whether two values are equal:

            absolute(array1 - array2) <= (atol + rtol * absolute(array2))

        The above equation is not symmetric in array1 and array2, so that array1.equals(array2)
        might be different from array2.equals(array1) in some rare cases.

        Examples
        --------
        >>> arr1 = ndtest((2, 3))
        >>> arr1
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr2 = arr1.copy()
        >>> arr2.equals(arr1)
        True
        >>> arr2['b1'] += 1
        >>> arr2.equals(arr1)
        False
        >>> arr3 = arr1.set_labels('a', ['x0', 'x1'])
        >>> arr3.equals(arr1)
        False

        Test equality between two arrays within a given tolerance range.
        Return True if absolute(array1 - array2) <= (atol + rtol * absolute(array2)).

        >>> arr1 = Array([6., 8.], "a=a0,a1")
        >>> arr1
        a   a0   a1
           6.0  8.0
        >>> arr2 = Array([5.999, 8.001], "a=a0,a1")
        >>> arr2
        a     a0     a1
           5.999  8.001
        >>> arr2.equals(arr1)
        False
        >>> arr2.equals(arr1, atol=0.01)
        True
        >>> arr2.equals(arr1, rtol=0.01)
        True

        Arrays with NaN values

        >>> arr1 = ndtest((2, 3), dtype=float)
        >>> arr1['a1', 'b1'] = nan
        >>> arr1
        a\b   b0   b1   b2
         a0  0.0  1.0  2.0
         a1  3.0  nan  5.0
        >>> arr2 = arr1.copy()
        >>> # By default, an array containing NaN values is never equal to another array,
        >>> # even if that other array also contains NaN values at the same positions.
        >>> # The reason is that a NaN value is different from *anything*, including itself.
        >>> arr2.equals(arr1)
        False
        >>> # set flag nans_equal to True to overwrite this behavior
        >>> arr2.equals(arr1, nans_equal=True)
        True

        Arrays with the same data but different axes

        >>> arr1 = ndtest((2, 2))
        >>> arr1
        a\b  b0  b1
         a0   0   1
         a1   2   3
        >>> arr2 = arr1.transpose()
        >>> arr2
        b\a  a0  a1
         b0   0   2
         b1   1   3
        >>> arr2.equals(arr1)
        True
        >>> arr2.equals(arr1, check_axes=True)
        False
        >>> arr2 = arr1.expand('c=c0,c1')
        >>> arr2
         a  b\c  c0  c1
        a0   b0   0   0
        a0   b1   1   1
        a1   b0   2   2
        a1   b1   3   3
        >>> arr2.equals(arr1)
        True
        >>> arr2.equals(arr1, check_axes=True)
        False
        """
        try:
            other = asarray(other)
        except Exception:
            return False
        try:
            axes_equal = self.axes == other.axes if check_axes else True
            return axes_equal and all(self.eq(other, rtol=rtol, atol=atol, nans_equal=nans_equal))
        except ValueError:
            return False

[docs]    def allclose(self, other: Any, rtol: float = 1e-05, atol: float = 1e-08, nans_equal: bool = True,
                 check_axes: bool = False) -> bool:
        """
        Compare this array with another array and returns True if they are element-wise equal within a tolerance.

        The tolerance values are positive, typically very small numbers.
        The relative difference (rtol * abs(other)) and the absolute difference atol are added together to compare
        against the absolute difference between this array and other.

        NaN values are treated as equal if they are in the same place and if `nans_equal=True`.

        Parameters
        ----------
        other : Array-like
            Input array. asarray() is used on a non-Array input.
        rtol : float or int, optional
            The relative tolerance parameter (see Notes). Defaults to 1e-05.
        atol : float or int, optional
            The absolute tolerance parameter (see Notes). Defaults to 1e-08.
        nans_equal : boolean, optional
            Whether to consider NaN values at the same positions in the two arrays as equal.
            By default, an array containing NaN values is never equal to another array, even if that other array
            also contains NaN values at the same positions. The reason is that a NaN value is different from
            *anything*, including itself. Defaults to True.
        check_axes : boolean, optional
            Whether to check that the set of axes and their order is the same on both sides. Defaults to False.
            If False, two arrays with compatible axes (and the same data) will compare equal, even if some axis is
            missing on either side or if the axes are in a different order.

        Returns
        -------
        bool
            Return True if the two arrays are equal within the given tolerance; False otherwise.

        See Also
        --------
        Array.equals

        Notes
        -----
        If the following equation is element-wise True, then `allclose` returns True.

            absolute(array1 - array2) <= (atol + rtol * absolute(array2))

        The above equation is not symmetric in array1 and array2, so that array1.allclose(array2) might be different
        from array2.allclose(array1) in some rare cases.

        Examples
        --------
        >>> arr1 = Array([1e10, 1e-7], "a=a0,a1")
        >>> arr2 = Array([1.00001e10, 1e-8], "a=a0,a1")
        >>> arr1.allclose(arr2)
        False

        >>> arr1 = Array([1e10, 1e-8], "a=a0,a1")
        >>> arr2 = Array([1.00001e10, 1e-9], "a=a0,a1")
        >>> arr1.allclose(arr2)
        True

        >>> arr1 = Array([1e10, 1e-8], "a=a0,a1")
        >>> arr2 = Array([1.0001e10, 1e-9], "a=a0,a1")
        >>> arr1.allclose(arr2)
        False

        >>> arr1 = Array([1.0, nan], "a=a0,a1")
        >>> arr2 = Array([1.0, nan], "a=a0,a1")
        >>> arr1.allclose(arr2)
        True
        >>> arr1.allclose(arr2, nans_equal=False)
        False
        """
        return self.equals(other=other, rtol=rtol, atol=atol, nans_equal=nans_equal, check_axes=check_axes)

[docs]    @deprecate_kwarg('nan_equals', 'nans_equal')
    def eq(self, other, rtol=0, atol=0, nans_equal=False) -> 'Array':
        """
        Compare this array with another array element-wise and returns an array of booleans.

        Parameters
        ----------
        other : Array-like
            Input array. asarray() is used on a non-Array input.
        rtol : float or int, optional
            The relative tolerance parameter (see Notes). Defaults to 0.
        atol : float or int, optional
            The absolute tolerance parameter (see Notes). Defaults to 0.
        nans_equal : boolean, optional
            Whether to consider Nan values at the same positions in the two arrays as equal.
            By default, an array containing NaN values is never equal to another array, even if that other array
            also contains NaN values at the same positions. The reason is that a NaN value is different from
            *anything*, including itself. Defaults to False.

        Returns
        -------
        Array
            Boolean array where each cell tells whether corresponding elements of this array and other are equal
            within a tolerance range if given. If nans_equal=True, corresponding elements with NaN values
            will be considered as equal.

        See Also
        --------
        Array.equals, Array.isclose

        Notes
        -----
        For finite values, eq uses the following equation to test whether two values are equal:

            absolute(array1 - array2) <= (atol + rtol * absolute(array2))

        The above equation is not symmetric in array1 and array2, so that array1.eq(array2)
        might be different from array2.eq(array1) in some rare cases.

        Examples
        --------
        >>> arr1 = Array([6., np.nan, 8.], "a=a0..a2")
        >>> arr1
        a   a0   a1   a2
           6.0  nan  8.0

        Default behavior (same as == operator)

        >>> arr1.eq(arr1)
        a    a0     a1    a2
           True  False  True

        Test equality between two arrays within a given tolerance range.
        Return True if absolute(array1 - array2) <= (atol + rtol * absolute(array2)).

        >>> arr2 = Array([5.999, np.nan, 8.001], "a=a0..a2")
        >>> arr2
        a     a0   a1     a2
           5.999  nan  8.001
        >>> arr1.eq(arr2, nans_equal=True)
        a     a0    a1     a2
           False  True  False
        >>> arr1.eq(arr2, atol=0.01, nans_equal=True)
        a    a0    a1    a2
           True  True  True
        >>> arr1.eq(arr2, rtol=0.01, nans_equal=True)
        a    a0    a1    a2
           True  True  True
        """
        other = asarray(other)

        if rtol == 0 and atol == 0:
            if not nans_equal:
                return self == other
            else:
                from larray.core.npufuncs import isnan

                def general_isnan(a):
                    if np.issubclass_(a.dtype.type, np.inexact):
                        return isnan(a)
                    elif a.dtype.type is np.object_:
                        return Array(obj_isnan(a), a.axes)
                    else:
                        return False

                return (self == other) | (general_isnan(self) & general_isnan(other))
        else:
            (a1_data, a2_data), res_axes = raw_broadcastable([self, other])
            return Array(np.isclose(a1_data, a2_data, rtol=rtol, atol=atol, equal_nan=nans_equal), res_axes)

[docs]    def isin(self, test_values, assume_unique=False, invert=False) -> 'Array':
        r"""
        Compute whether each element of this array is in `test_values`. Return a boolean array of the same shape as
        this array that is True where the array element is in `test_values` and False otherwise.

        Parameters
        ----------
        test_values : array_like or set
            The values against which to test each element of this array. If `test_values` is not a 1D array, it will be
            converted to one.
        assume_unique : bool, optional
            If True, this array and `test_values` are both assumed to be unique, which can speed up the calculation.
            Defaults to False.
        invert : bool, optional
            If True, the values in the returned array are inverted, as if calculating `element not in test_values`.
            Defaults to False. ``isin(a, b, invert=True)`` is equivalent to (but faster than) ``~isin(a, b)``.

        Returns
        -------
        Array
            boolean array of the same shape as this array that is True where the array element is in `test_values`
            and False otherwise.

        Examples
        --------
        >>> arr = ndtest((2, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr.isin([1, 5, 7])
        a\b     b0     b1     b2
         a0  False   True  False
         a1  False  False   True
        >>> arr[arr.isin([1, 5, 7])]
        a_b  a0_b1  a1_b2
                 1      5
        """
        if isinstance(test_values, set):
            test_values = list(test_values)
        return Array(np.isin(self.data, test_values, assume_unique=assume_unique, invert=invert), self.axes)

[docs]    def divnot0(self, other) -> 'Array':
        # part of the doctest is skipped because it produces a warning we do not want to have to handle within the
        # doctest and cannot properly ignore
        r"""Divide this array by other, but return 0.0 where other is 0.

        Parameters
        ----------
        other : scalar or Array
            What to divide by.

        Returns
        -------
        Array
            Array divided by other, 0.0 where other is 0

        Examples
        --------
        >>> nat = Axis('nat=BE,FO')
        >>> sex = Axis('sex=M,F')
        >>> a = ndtest((nat, sex))
        >>> a
        nat\sex  M  F
             BE  0  1
             FO  2  3
        >>> b = ndtest(sex)
        >>> b
        sex  M  F
             0  1
        >>> a.divnot0(b)
        nat\sex    M    F
             BE  0.0  1.0
             FO  0.0  3.0

        Compare this to:

        >>> a / b                                  # doctest: +SKIP
        nat\sex    M    F
             BE  nan  1.0
             FO  inf  3.0
        """
        if np.isscalar(other):
            if other == 0:
                return zeros_like(self, dtype=float)
            else:
                return self / other
        else:
            (self_data, other_data), res_axes = raw_broadcastable((self, other))
            other_eq0 = other_data == 0
            # numpy array division gets slower the more zeros you have in other, so we change it before the division
            # happens. This is obviously slower than doing nothing if we have very few zeros but I think it's a win
            # on average given that other is likely to contain zeros when using divnot0.
            other_data = np.where(other_eq0, 1, other_data)
            res_data = self_data / other_data
            res_data[np.broadcast_to(other_eq0, res_data.shape)] = 0.0
            return Array(res_data, res_axes)

    # XXX: rename/change to "add_axes" ?
    # TODO: add a flag copy=True to force a new array.
[docs]    def expand(self, target_axes=None, out=None, readonly=False) -> 'Array':
        r"""Expand this array to target_axes.

        Target axes will be added to this array if not present.
        In most cases this function is not needed because LArray can do operations with arrays having different
        (compatible) axes.

        Parameters
        ----------
        target_axes : string, list of Axis or AxisCollection, optional
            This array can contain axes not present in `target_axes`.
            The result axes will be: [self.axes not in target_axes] + target_axes
        out : Array, optional
            Output array, must have more axes than array. Defaults to a new array.
            arr.expand(out=out) is equivalent to out[:] = arr
        readonly : bool, optional
            Whether returning a readonly view is acceptable or not (this is much faster)
            Defaults to False.

        Returns
        -------
        Array
            Original array if possible (and out is None).

        Examples
        --------
        >>> a = Axis('a=a1,a2')
        >>> b = Axis('b=b1,b2')
        >>> arr = ndtest([a, b])
        >>> arr
        a\b  b1  b2
         a1   0   1
         a2   2   3

        Adding one or several axes will append the new axes at the end

        >>> c = Axis('c=c1,c2')
        >>> arr.expand(c)
         a  b\c  c1  c2
        a1   b1   0   0
        a1   b2   1   1
        a2   b1   2   2
        a2   b2   3   3

        If you want the new axes to be inserted in a particular order, you have to give that order

        >>> arr.expand([a, c, b])
         a  c\b  b1  b2
        a1   c1   0   1
        a1   c2   0   1
        a2   c1   2   3
        a2   c2   2   3

        But it is enough to list only the added axes and the axes after them:

        >>> arr.expand([c, b])
         a  c\b  b1  b2
        a1   c1   0   1
        a1   c2   0   1
        a2   c1   2   3
        a2   c2   2   3
        """
        if not exactly_one(target_axes is not None, out is not None):
            raise ValueError("exactly one of either `target_axes` or `out` must be defined (not both)")

        if out is not None:
            out[:] = self
        else:
            # this is not strictly necessary but avoids doing this test twice if it is True
            if self.axes == target_axes:
                return self

            if not isinstance(target_axes, (tuple, list, AxisCollection)):
                target_axes = AxisCollection(target_axes)
            target_axes = (self.axes - target_axes) | target_axes

            broadcasted = self.broadcast_with(target_axes)
            # this can only happen if only the order of axes differed and/or all extra axes have length 1
            if broadcasted.axes == target_axes:
                return broadcasted

            if readonly:
                # requires numpy 1.10
                return Array(np.broadcast_to(broadcasted, target_axes.shape), target_axes)

            out = empty(target_axes, dtype=self.dtype)
            out[:] = broadcasted
        return out

[docs]    def append(self, axis, value, label=None) -> 'Array':
        r"""Add a value to this array along an axis.

        Parameters
        ----------
        axis : axis reference
            Axis along which to append `value`.
        value : scalar or Array
            Scalar or array with compatible axes.
        label : scalar, optional
            Label for the new item in axis. When `axis` is not present in `value`, this argument should be used.
            Defaults to None.

        Returns
        -------
        Array
            Array with `value` appended along `axis`.

        Examples
        --------
        >>> arr = ones('nat=BE,FO;sex=M,F')
        >>> arr["BE", "F"] = 2.0
        >>> arr
        nat\sex    M    F
             BE  1.0  2.0
             FO  1.0  1.0
        >>> sex_total = arr.sum('sex')
        >>> sex_total
        nat   BE   FO
             3.0  2.0
        >>> arr.append('sex', sex_total, label='M+F')
        nat\sex    M    F  M+F
             BE  1.0  2.0  3.0
             FO  1.0  1.0  2.0

        The value can already have the axis along which it is appended:

        >>> sex_total = arr.sum('sex', keepaxes='M+F')
        >>> sex_total
        nat\sex  M+F
             BE  3.0
             FO  2.0
        >>> arr.append('sex', sex_total)
        nat\sex    M    F  M+F
             BE  1.0  2.0  3.0
             FO  1.0  1.0  2.0

        The value can be a scalar or an array with fewer axes than the original array.
        In this case, the appended value is expanded (repeated) as necessary:

        >>> arr.append('nat', 2, 'Other')
        nat\sex    M    F
             BE  1.0  2.0
             FO  1.0  1.0
          Other  2.0  2.0

        The value can also have extra axes (axes not present in the original array),
        in which case, the original array is expanded as necessary:

        >>> other = zeros('type=type1,type2')
        >>> other
        type  type1  type2
                0.0    0.0
        >>> arr.append('nat', other, 'Other')
          nat  sex\type  type1  type2
           BE         M    1.0    1.0
           BE         F    2.0    2.0
           FO         M    1.0    1.0
           FO         F    1.0    1.0
        Other         M    0.0    0.0
        Other         F    0.0    0.0
        """
        axis = self.axes[axis]
        if isinstance(value, Array) and axis in value.axes:
             # This is just an optimization because going via the insert path
             # for this case makes this 10x slower.
             # FIXME: we should fix insert slowness instead
             return concat((self, value), axis)
        else:
            return self.insert(value, before=IGroup(len(axis), axis=axis), label=label)
    extend = renamed_to(append, 'extend')

[docs]    def prepend(self, axis, value, label=None) -> 'Array':
        r"""Add an array before this array along an axis.

        The two arrays must have compatible axes.

        Parameters
        ----------
        axis : axis reference
            Axis along which to prepend input array (`value`)
        value : scalar or Array
            Scalar or array with compatible axes.
        label : str, optional
            Label for the new item in axis

        Returns
        -------
        Array
            Array expanded with 'value' at the start of 'axis'.

        Examples
        --------
        >>> a = ones('nat=BE,FO;sex=M,F')
        >>> a
        nat\sex    M    F
             BE  1.0  1.0
             FO  1.0  1.0
        >>> a.prepend('sex', a.sum('sex'), 'M+F')
        nat\sex  M+F    M    F
             BE  2.0  1.0  1.0
             FO  2.0  1.0  1.0
        >>> a.prepend('nat', 2, 'Other')
        nat\sex    M    F
          Other  2.0  2.0
             BE  1.0  1.0
             FO  1.0  1.0
        >>> b = zeros('type=type1,type2')
        >>> b
        type  type1  type2
                0.0    0.0
        >>> a.prepend('sex', b, 'Other')
        nat  sex\type  type1  type2
         BE     Other    0.0    0.0
         BE         M    1.0    1.0
         BE         F    1.0    1.0
         FO     Other    0.0    0.0
         FO         M    1.0    1.0
         FO         F    1.0    1.0
        """
        return self.insert(value, before=IGroup(0, axis=axis), label=label)

[docs]    def insert(self, value, before=None, after=None, pos=None, axis=None, label=None) -> 'Array':
        r"""Insert value in array along an axis.

        Parameters
        ----------
        value : scalar or Array
            Value to insert. If an Array, it must have compatible axes. If value already has the axis along which it
            is inserted, `label` should not be used.
        before : scalar or Group
            Label or group before which to insert `value`.
        after : scalar or Group
            Label or group after which to insert `value`.
        label : str, optional
            Label for the new item in axis.

        Returns
        -------
        Array
            Array with `value` inserted along `axis`. The dtype of the returned array will be the "closest" type
            which can hold both the array values and the inserted values without loss of information. For example,
            when mixing numeric and string types, the dtype will be object.

        Examples
        --------
        >>> arr1 = ndtest((2, 3))
        >>> arr1
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr1.insert(42, before='b1', label='b0.5')
        a\b  b0  b0.5  b1  b2
         a0   0    42   1   2
         a1   3    42   4   5

        The inserted value can be an array:

        >>> arr2 = ndtest(2)
        >>> arr2
        a  a0  a1
            0   1
        >>> arr1.insert(arr2, after='b0', label='b0.5')
        a\b  b0  b0.5  b1  b2
         a0   0     0   1   2
         a1   3     1   4   5

        If you want to target positions, you have to somehow specify the axis:

        >>> a, b = arr1.axes
        >>> # arr1.insert(42, before='b.i[1]', label='b0.5')
        >>> arr1.insert(42, before=b.i[1], label='b0.5')
        a\b  b0  b0.5  b1  b2
         a0   0    42   1   2
         a1   3    42   4   5

        Insert an array which already has the axis

        >>> arr3 = ndtest('a=a0,a1;b=b0.1,b0.2') + 42
        >>> arr3
        a\b  b0.1  b0.2
         a0    42    43
         a1    44    45
        >>> arr1.insert(arr3, before='b1')
        a\b  b0  b0.1  b0.2  b1  b2
         a0   0    42    43   1   2
         a1   3    44    45   4   5
        """
        # XXX: unsure we should have arr1.insert(arr3, before='b1,b2') result in (see unit tests):

        # a\b  b0  b0.1  b1  b0.2  b2
        #  a0   0    42   1    43   2
        #  a1   3    44   4    45   5

        # we might to implement the following instead:

        # a\b  b0  b0.1  b0.2  b1  b0.1  b0.2  b2
        #  a0   0    42    43   1    42    43   2
        #  a1   3    44    45   4    44    45   5

        # The later looks less useful and could be emulated easily via:
        # arr1.insert([arr3, arr3], before='b1,b2')
        # while the above is a bit harder to achieve manually:
        # arr1.insert([arr3[[b]] for b in arr3.b], before=['b1', 'b2'])
        # but the later is *probably* more intuitive (and wouldn't suffer from the inefficiency we currently have).

        # XXX: when we have several lists, we implicitly match them by position, which we should avoid for the usual
        # reason, but I am unsure what the best syntax for that would be.

        # the goal is to get this result

        # a\b  b0  b0.5  b1  b1.5  b2
        #  a0   0     8   1     9   2
        #  a1   3     8   4     9   5

        # When the inserted arrays already contain a label, this seems reasonably readable:

        # >>> arr1 = ndtest((2, 3))
        # >>> arr1
        # a\b  b0  b1  b2
        #  a0   0   1   2
        #  a1   3   4   5
        # >>> arr2 = full('b=b0.5', 8)
        # >>> arr2
        # b  b0.5
        #       8
        # >>> arr3 = full('b=b1.5', 9)
        # >>> arr3
        # b  b1.5
        #       9
        # >>> arr1.insert(before={'b1': arr2, 'b2': arr3})
        # a\b  b0  b0.5  b1  b1.5  b2
        #  a0   0     8   1     9   2
        #  a1   3     8   4     9   5

        # When the inserted arrays/values have no label, this does not really convince me and it prevents using after
        # or pos.

        # >>> arr1.insert(value={'b0.5': ('b1', 8), 'b1.5': ('b2', 9)})
        # a\b  b0  b0.5  b1  b1.5  b2
        #  a0   0     8   1     9   2
        #  a1   3     8   4     9   5

        # This works with both after and pos and we could support it along with the above syntax when no label is
        # needed. Problem: label, value is arbitrary and as such potentially hard to remember.

        # >>> arr1.insert(before={'b1': ('b0.5', 8), 'b2': ('b1.5', 9)})
        # a\b  b0  b0.5  b1  b1.5  b2
        #  a0   0     8   1     9   2
        #  a1   3     8   4     9   5

        # This is shorter but not readable enough/even more arbitrary than the previous option.

        # >>> arr1.insert([(8, 'b1', 'b0.5'), (9, 'b2', 'b1.5')])
        # a\b  b0  b0.5  b1  b1.5  b2
        #  a0   0     8   1     9   2
        #  a1   3     8   4     9   5

        # This is readable but odd and not much gained (except efficiency) compared with multiple insert calls

        # >>> arr1.insert([(8, 'before', 'b1', 'label', 'b0.5'),
        #                  (9, 'before', 'b2', 'label', 'b1.5')])
        # >>> arr1.insert(8, before='b1', label='b0.5') \
        #         .insert(9, before='b2', label='b1.5')

        # @alixdamman suggested using a list of dictionaries {'value': XX, 'before': YY, 'label': ZZ}

        # >>> arr1.insert([{'value': 8, 'before': 'b1', 'label': 'b0.5'},
        #                  {'value': 9, 'before': 'b2', 'label': 'b1.5'}])
        # >>> arr1.insert([dict(value=8, before='b1', label='b0.5'),
        #                  dict(value=9, before='b2', label='b1.5')])

        # It would be nice to somehow support easily inserting values defined using an Array

        # >>> toinsert = Array([[8, 'b1', 'b0.5'],
        # >>>                    [9, 'b2', 'b1.5']], "row=2;column=value,before,label")
        # >>> arr1.insert(toinsert)
        # >>> arr1.insert(value=toinsert['value'], before=toinsert['before'], label=toinsert['label'])
        # >>> arr1.insert(**toinsert)
        # >>> arr1.insert(**toinsert.to_dict('column'))
        if not exactly_one(before is not None, after is not None, pos is not None):
            raise ValueError("must specify exactly one of before, after or pos")

        if pos is not None or axis is not None:
            warnings.warn("The 'pos' and 'axis' keyword arguments are deprecated, please use axis.i[pos] instead",
                          FutureWarning, stacklevel=2)
            before = IGroup(pos, axis=axis)

        if before is not None:
            axis, before_pos = self.axes._translate_axis_key(before)
        else:
            axis, after_pos = self.axes._translate_axis_key(after)
            before_pos = after_pos + 1

        def length(v):
            if isinstance(v, Array) and axis in v.axes:
                return len(v.axes[axis])
            else:
                return len(v) if isinstance(v, (tuple, list, np.ndarray)) else 1

        def expand(v, length):
            return v if isinstance(v, (tuple, list, np.ndarray)) else [v] * length

        num_inserts = max(length(before_pos), length(label), length(value))
        stops = expand(before_pos, num_inserts)

        axis_in_value = isinstance(value, Array) and axis in value.axes
        if axis_in_value:
            # FIXME: when length(before_pos) == 1 and length(label) == 1, this is inefficient
            #        in the case of extend, this is awfully inefficent (needlessly splits the value)
            value_axis = value.axes[axis]
            # This odd construction is to get a subset for each individual label of the axis
            # but keep the label AND work with ambigous labels
            # values = [value[[k]] for k in value_axis]             -> does not work for ambigous labels
            # values = [value[k] for k in value_axis]               -> does not keep the label
            # values = [value[value_axis[[k]]] for k in value_axis] -> works but is "slow"
            values = [value[IGroup([i], None, value_axis)] for i in range(len(value_axis))]
        else:
            values = expand(value, num_inserts)

        values = [asarray(v) if not isinstance(v, Array) else v
                  for v in values]

        if label is not None:
            labels = expand(label, num_inserts)
            if axis_in_value:
                values = [v.set_labels(axis, [label])
                          for v, label in zip(values, labels)]
            else:
                values = [v.expand(Axis([label], axis.name), readonly=True)
                          for v, label in zip(values, labels)]
        elif not axis_in_value:
            v_axis = Axis([None], axis.name)
            values = [v.expand(v_axis, readonly=True)
                      for v in values]
        else:
            # When label is None and axis is in value.axes, we do not need to do anything
            pass

        start = 0
        chunks = []
        for stop, value in zip(stops, values):
            chunks.append(self[axis.i[start:stop]])
            chunks.append(value)
            start = stop
        if start < len(axis):
            chunks.append(self[axis.i[start:]])
        return concat(chunks, axis)

[docs]    def drop(self, labels=None) -> 'Array':
        r"""Return array without some labels or indices along an axis.

        Parameters
        ----------
        labels : scalar, list or Group
            Label(s) or group to remove. To remove indices, one must pass an IGroup.

        Returns
        -------
        Array
            Array with `labels` removed along their axis.

        Examples
        --------
        >>> arr1 = ndtest((2, 4))
        >>> arr1
        a\b  b0  b1  b2  b3
         a0   0   1   2   3
         a1   4   5   6   7
        >>> a, b = arr1.axes

        dropping a single label

        >>> arr1.drop('b1')
        a\b  b0  b2  b3
         a0   0   2   3
         a1   4   6   7

        dropping multiple labels

        >>> # arr1.drop('b1,b3')
        >>> arr1.drop(['b1', 'b3'])
        a\b  b0  b2
         a0   0   2
         a1   4   6

        dropping a slice

        >>> # arr1.drop('b1:b3')
        >>> arr1.drop(b['b1':'b3'])
        a\b  b0
         a0   0
         a1   4

        when deleting indices instead of labels, one must specify the axis explicitly (using an IGroup):

        >>> # arr1.drop('b.i[1]')
        >>> arr1.drop(b.i[1])
        a\b  b0  b2  b3
         a0   0   2   3
         a1   4   6   7

        as when deleting ambiguous labels (which are present on several axes):

        >>> a = Axis('a=label0..label2')
        >>> b = Axis('b=label0..label2')
        >>> arr2 = ndtest((a, b))
        >>> arr2
           a\b  label0  label1  label2
        label0       0       1       2
        label1       3       4       5
        label2       6       7       8
        >>> # arr2.drop('a[label1]')
        >>> arr2.drop(a['label1'])
           a\b  label0  label1  label2
        label0       0       1       2
        label2       6       7       8
        """
        axis, indices = self.axes._translate_axis_key(labels)
        axis_idx = self.axes.index(axis)
        new_axis = Axis(np.delete(axis.labels, indices), axis.name)
        new_axes = self.axes.replace(axis, new_axis)
        return Array(np.delete(self.data, indices, axis_idx), new_axes)

[docs]    def transpose(self, *args) -> 'Array':
        r"""Reorder axes.

        By default, reverse axes, otherwise permute the axes according to the list given as argument.

        Parameters
        ----------
        *args
            Accepts either a tuple of axes specs or axes specs as `*args`. Omitted axes keep their order.
            Use ... to avoid specifying intermediate axes.

        Returns
        -------
        Array
            Array with reordered axes.

        Examples
        --------
        >>> arr = ndtest((2, 2, 2))
        >>> arr
         a  b\c  c0  c1
        a0   b0   0   1
        a0   b1   2   3
        a1   b0   4   5
        a1   b1   6   7
        >>> arr.transpose('b', 'c', 'a')
         b  c\a  a0  a1
        b0   c0   0   4
        b0   c1   1   5
        b1   c0   2   6
        b1   c1   3   7
        >>> arr.transpose('b')
         b  a\c  c0  c1
        b0   a0   0   1
        b0   a1   4   5
        b1   a0   2   3
        b1   a1   6   7
        >>> arr.transpose(..., 'a')  # doctest: +SKIP
         b  c\a  a0  a1
        b0   c0   0   4
        b0   c1   1   5
        b1   c0   2   6
        b1   c1   3   7
        >>> arr.transpose('c', ..., 'a')  # doctest: +SKIP
         c  b\a  a0  a1
        c0   b0   0   4
        c0   b1   2   6
        c1   b0   1   5
        c1   b1   3   7
        """
        axes = self.axes
        data = self.data
        if len(args) == 0:
            return Array(data.T, axes[::-1])
        elif len(args) == 1 and isinstance(args[0], (tuple, list, AxisCollection)):
            target_axes = args[0]
        else:
            target_axes = args

        # TODO: this shouldn't be necessary in most cases (and is expensive compared to the numpy op itself)
        #       but doing it only when ... is present breaks many tests => in which other cases is it necessary???
        target_axes = axes[target_axes]
        # if ... in target_axes:
        #     target_axes = axes[target_axes]

        # TODO: implement AxisCollection.index(sequence)
        axes_indices = [axes.index(axis) for axis in target_axes]
        # this whole mumbo jumbo is required (for now) for anonymous axes
        indices_present = set(axes_indices)
        missing_indices = [i for i in range(data.ndim) if i not in indices_present]
        axes_indices += missing_indices

        return Array(data.transpose(axes_indices), axes[axes_indices])
    T = property(transpose)

[docs]    def clip(self, minval=None, maxval=None, out=None) -> 'Array':
        r"""Clip (limit) the values in an array.

        Given an interval, values outside the interval are clipped to the interval bounds.
        For example, if an interval of [0, 1] is specified, values smaller than 0 become 0,
        and values larger than 1 become 1.

        Parameters
        ----------
        minval : scalar or array-like, optional
            Minimum value. If None, clipping is not performed on lower bound.
            Defaults to None.
        maxval : scalar or array-like, optional
            Maximum value. If None, clipping is not performed on upper bound.
            Defaults to None.
        out : Array, optional
            The results will be placed in this array.

        Returns
        -------
        Array
            An array with the elements of the current array,
            but where values < `minval` are replaced with `minval`, and those > `maxval` with `maxval`.

        Notes
        -----
        * At least either `minval` or `maxval` must be defined.
        * If `minval` and/or `maxval` are array_like, broadcast will occur between self, `minval` and `maxval`.

        Examples
        --------
        >>> arr = ndtest((3, 3)) - 3
        >>> arr
        a\b  b0  b1  b2
         a0  -3  -2  -1
         a1   0   1   2
         a2   3   4   5
        >>> arr.clip(0, 2)
        a\b  b0  b1  b2
         a0   0   0   0
         a1   0   1   2
         a2   2   2   2

        Clipping on lower bound only

        >>> arr.clip(0)
        a\b  b0  b1  b2
         a0   0   0   0
         a1   0   1   2
         a2   3   4   5

        Clipping on upper bound only

        >>> arr.clip(maxval=2)
        a\b  b0  b1  b2
         a0  -3  -2  -1
         a1   0   1   2
         a2   2   2   2

        clipping using bounds which vary along an axis

        >>> lower_bound = Array([-2, 0, 2], 'b=b0..b2')
        >>> upper_bound = Array([0, 2, 4], 'b=b0..b2')
        >>> arr.clip(lower_bound, upper_bound)
        a\b  b0  b1  b2
         a0  -2   0   2
         a1   0   1   2
         a2   0   2   4
        """
        from larray.core.npufuncs import clip
        return clip(self, minval, maxval, out)

[docs]    @deprecate_kwarg('transpose', 'wide')
    def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dropna=None,
               dialect='default', **kwargs) -> None:
        r"""
        Write array to a csv file.

        Parameters
        ----------
        filepath : str or Path
            path where the csv file has to be written.
        sep : str, optional
            separator for the csv file. Defaults to `,`.
        na_rep : str, optional
            replace NA values with na_rep. Defaults to ''.
        wide : boolean, optional
            Whether writing arrays in "wide" format. If True, arrays are exported with the last axis
            represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
            value column. Defaults to True.
        value_name : str, optional
            Name of the column containing the values (last column) in the csv file when `wide=False` (see above).
            Defaults to 'value'.
        dialect : 'default' | 'classic', optional
            Whether to write the last axis name (using '\' ). Defaults to 'default'.
        dropna : None, 'all', 'any' or True, optional
            Drop lines if 'all' its values are NA, if 'any' value is NA or do not drop any line (default).
            True is equivalent to 'all'.

        Examples
        --------
        >>> tmp_path = getfixture('tmp_path')
        >>> fname = tmp_path / 'test.csv'
        >>> a = ndtest('nat=BE,FO;sex=M,F')
        >>> a
        nat\sex  M  F
             BE  0  1
             FO  2  3
        >>> a.to_csv(fname)
        >>> with open(fname) as f:
        ...     print(f.read().strip())
        nat\sex,M,F
        BE,0,1
        FO,2,3
        >>> a.to_csv(fname, sep=';', wide=False)
        >>> with open(fname) as f:
        ...     print(f.read().strip())
        nat;sex;value
        BE;M;0
        BE;F;1
        FO;M;2
        FO;F;3
        >>> a.to_csv(fname, sep=';', wide=False, value_name='population')
        >>> with open(fname) as f:
        ...     print(f.read().strip())
        nat;sex;population
        BE;M;0
        BE;F;1
        FO;M;2
        FO;F;3
        >>> a.to_csv(fname, dialect='classic')
        >>> with open(fname) as f:
        ...     print(f.read().strip())
        nat,M,F
        BE,0,1
        FO,2,3
        """
        fold = dialect == 'default'
        if wide:
            frame = self.to_frame(fold, dropna)
            frame.to_csv(filepath, sep=sep, na_rep=na_rep, **kwargs)
        else:
            series = self.to_series(value_name, dropna is not None)
            series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs)

[docs]    def to_hdf(self, filepath, key) -> None:
        r"""
        Write array to a HDF file.

        A HDF file can contain multiple arrays.
        The 'key' parameter is a unique identifier for the array.

        Parameters
        ----------
        filepath : str or Path
            Path where the hdf file has to be written.
        key : str or Group
            Key (path) of the array within the HDF file (see Notes below).

        Notes
        -----
        Objects stored in a HDF file can be grouped together in `HDF groups`.
        If an object 'my_obj' is stored in a HDF group 'my_group',
        the key associated with this object is then 'my_group/my_obj'.
        Be aware that a HDF group can have subgroups.

        Examples
        --------
        >>> a = ndtest((2, 3))

        Save an array

        >>> a.to_hdf('test.h5', 'a')          # doctest: +SKIP

        Save an array in a specific HDF group

        >>> a.to_hdf('test.h5', 'arrays/a')   # doctest: +SKIP
        """
        key = _translate_group_key_hdf(key)
        with LHDFStore(filepath) as store:
            store.put(key, self.to_frame())
            attrs = store.get_storer(key).attrs
            attrs.type = 'Array'
            attrs.writer = 'LArray'
            self.meta.to_hdf(store, key)

[docs]    def to_stata(self, filepath_or_buffer, **kwargs) -> None:
        r"""
        Write array to a Stata .dta file.

        Parameters
        ----------
        filepath_or_buffer : str or file-like object
            Path to .dta file or a file handle.

        See Also
        --------
        read_stata

        Notes
        -----
        The round trip to Stata (Array.to_stata followed by read_stata) loose the name of the "column" axis.

        Examples
        --------
        >>> axes = [Axis(3, 'row'), Axis('column=country,sex')]
        >>> arr = Array([['BE', 'F'],
        ...              ['FR', 'M'],
        ...              ['FR', 'F']], axes=axes)
        >>> arr
        row*\column  country  sex
                  0       BE    F
                  1       FR    M
                  2       FR    F
        >>> arr.to_stata('test.dta')      # doctest: +SKIP
        """
        self.to_frame().to_stata(filepath_or_buffer, **kwargs)

[docs]    @deprecate_kwarg('sheet_name', 'sheet')
    def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=False, clear_sheet=False,
                 header=True, transpose=False, wide=True, value_name='value', engine=None, *args, **kwargs) -> None:
        r"""
        Write array in the specified sheet of specified excel workbook.

        Parameters
        ----------
        filepath : str or Path or int or None, optional
            Path where the excel file has to be written. If None (default), creates a new Excel Workbook in a live Excel
            instance (Windows only). Use -1 to use the currently active Excel Workbook. Use a name without extension
            (.xlsx) to use any unsaved* workbook.
        sheet : str or Group or int or None, optional
            Sheet where the data has to be written. Defaults to None, Excel standard name if adding a sheet to an
            existing file, "Sheet1" otherwise. sheet can also refer to the position of the sheet
            (e.g. 0 for the first sheet, -1 for the last one).
        position : str or tuple of integers, optional
            Integer position (row, column) must be 1-based. Used only if engine is 'xlwings'. Defaults to 'A1'.
        overwrite_file : bool, optional
            Whether to overwrite the existing file (or just modify the specified sheet). Defaults to False.
        clear_sheet : bool, optional
            Whether to clear the existing sheet (if any) before writing. Defaults to False.
        header : bool, optional
            Whether to write a header (axes names and labels). Defaults to True.
        transpose : bool, optional
            Whether to transpose the array over last axis.
            This is equivalent to paste with option transpose in Excel. Defaults to False.
        wide : boolean, optional
            Whether writing arrays in "wide" format. If True, arrays are exported with the last axis
            represented horizontally. If False, arrays are exported in "narrow" format: one column per axis plus one
            value column. Defaults to True.
        value_name : str, optional
            Name of the column containing the values (last column) in the Excel sheet when `wide=False` (see above).
            Defaults to 'value'.
        engine : 'xlwings' | 'openpyxl' | 'xlsxwriter' | 'xlwt' | None, optional
            Engine to use to make the output. If None (default), it will use 'xlwings' by default if the module is
            installed and relies on Pandas default writer otherwise.
        *args
        **kwargs

        Examples
        --------
        >>> a = ndtest('nat=BE,FO;sex=M,F')
        >>> # write to a new (unnamed) sheet
        >>> a.to_excel('test.xlsx')  # doctest: +SKIP
        >>> # write to top-left corner of an existing sheet
        >>> a.to_excel('test.xlsx', 'Sheet1')  # doctest: +SKIP
        >>> # add to existing sheet starting at position A15
        >>> a.to_excel('test.xlsx', 'Sheet1', 'A15')  # doctest: +SKIP
        """
        sheet = _translate_sheet_name(sheet)

        if wide:
            pd_obj = self.to_frame(fold_last_axis_name=True)
            if transpose and self.ndim >= 2:
                names = pd_obj.index.names
                pd_obj.index.names = names[:-2] + ['\\'.join(reversed(names[-1].split('\\')))]
        else:
            pd_obj = self.to_series(value_name)

        if engine is None:
            engine = 'xlwings' if xw is not None else None

        if isinstance(filepath, str):
            filepath = Path(filepath)

        if engine == 'xlwings':
            from larray.inout.xw_excel import open_excel

            close = False
            new_workbook = False
            if filepath is None:
                new_workbook = True
            elif isinstance(filepath, Path) and filepath.suffix:
                if not filepath.is_file():
                    new_workbook = True
                close = True
            if new_workbook or overwrite_file:
                new_workbook = overwrite_file = True

            wb = open_excel(filepath, overwrite_file=overwrite_file)

            if new_workbook:
                sheetobj = wb.sheets[0]
                if sheet is not None:
                    sheetobj.name = sheet
            elif sheet is not None and sheet in wb:
                sheetobj = wb.sheets[sheet]
                if clear_sheet:
                    sheetobj.clear()
            else:
                sheetobj = wb.sheets.add(sheet, after=wb.sheets[-1])

            options = dict(header=header, index=header, transpose=transpose)
            sheetobj[position].options(**options).value = pd_obj
            # TODO: implement wide via/in dump
            # sheet[position] = self.dump(header=header, wide=wide)
            if close:
                wb.save()
                wb.close()
        else:
            if sheet is None:
                sheet = 'Sheet1'
            # TODO: implement position in this case
            # startrow, startcol
            pd_obj.to_excel(filepath, sheet, *args, engine=engine, **kwargs)

[docs]    def to_clipboard(self, *args, **kwargs) -> None:
        r"""Send the content of the array to the clipboard.

        Using to_clipboard() makes it possible to paste the content of the array into a file (Excel, ascii file,...).

        Examples
        --------
        >>> a = ndtest('nat=BE,FO;sex=M,F')
        >>> a.to_clipboard()  # doctest: +SKIP
        """
        self.to_frame().to_clipboard(*args, **kwargs)

    # XXX: sep argument does not seem very useful
    # def to_excel(self, filename, sep='_'):
    #     # Why xlsxwriter? Because it is faster than openpyxl and xlwt
    #     # currently does not .xlsx (only .xls).
    #     # PyExcelerate seem like a decent alternative too
    #     import xlsxwriter as xl
    #
    #     workbook = xl.Workbook(filename)
    #     if self.ndim > 2:
    #         for key in product(*[axis.labels for axis in self.axes[:-2]]):
    #             sheetname = sep.join(str(k) for k in key)
    #             # sheet names must not:
    #             # * contain any of the following characters: : \ / ? * [ ]
    #             # XXX: this will NOT work for unicode strings !
    #             table = string.maketrans('[:]', '(-)')
    #             todelete = r'\/?*'
    #             sheetname = sheetname.translate(table, todelete)
    #             # * exceed 31 characters
    #             # sheetname = sheetname[:31]
    #             # * be blank
    #             assert sheetname, "sheet name cannot be blank"
    #             worksheet = workbook.add_worksheet(sheetname)
    #             worksheet.write_row(0, 1, self.axes[-1].labels)
    #             worksheet.write_column(1, 0, self.axes[-2].labels)
    #             for row, data in enumerate(np.asarray(self[key])):
    #                 worksheet.write_row(1+row, 1, data)
    #
    #     else:
    #         worksheet = workbook.add_worksheet('Sheet1')
    #         worksheet.write_row(0, 1, self.axes[-1].labels)
    #         if self.ndim == 2:
    #             worksheet.write_column(1, 0, self.axes[-2].labels)
    #         for row, data in enumerate(np.asarray(self)):
    #             worksheet.write_row(1+row, 1, data)

    @property
    def plot(self) -> PlotObject:
        r"""Plot the data of the array into a graph (window pop-up).

        The graph can be tweaked to achieve the desired formatting and can be saved to a .png file.

        Parameters
        ----------
        kind : str
            - 'line' : line plot (default)
            - 'bar' : vertical bar plot
            - 'barh' : horizontal bar plot
            - 'hist' : histogram
            - 'box' : boxplot
            - 'kde' : Kernel Density Estimation plot
            - 'density' : same as 'kde'
            - 'area' : area plot
            - 'pie' : pie plot
            - 'scatter' : scatter plot (if array's dimensions >= 2)
            - 'hexbin' : hexbin plot (if array's dimensions >= 2)
        ax : matplotlib axes object, default None
        subplots : boolean, Axis, int, str or tuple, default False
            Make several subplots. If True, will make subplots for each combination of labels for all axes except the
            last. If an Axis, int, str (or tuple of those), it will make subplots for combination of labels of those
            axes.
        sharex : boolean, default True if ax is None else False
            In case subplots=True, share x axis and set some x axis labels to invisible;
            defaults to True if ax is None otherwise False if an ax is passed in;
            Be aware, that passing in both an ax and sharex=True will alter all x axis labels for all axis in a figure!
        sharey : boolean, default False
            In case subplots=True, share y axis and set some y axis labels to invisible
        layout : tuple (optional)
            (rows, columns) for the layout of subplots
        figsize : a tuple (width, height) in inches
        use_index : boolean, default True
            Use index as ticks for x axis
        title : string
            Title to use for the plot
        grid : boolean, default None (matlab style default)
            Axis grid lines
        legend : False/True/'reverse'
            Place legend on axis subplots. Defaults to True.
        style : list or dict
            matplotlib line style per column
        logx : boolean, default False
            Use log scaling on x axis
        logy : boolean, default False
            Use log scaling on y axis
        loglog : boolean, default False
            Use log scaling on both x and y axes
        xticks : sequence
            Values to use for the xticks
        yticks : sequence
            Values to use for the yticks
        xlim : 2-tuple/list
        ylim : 2-tuple/list
        rot : int, default None
            Rotation for ticks (xticks for vertical, yticks for horizontal plots)
        fontsize : int, default None
            Font size for xticks and yticks
        colormap : str or matplotlib colormap object, default None
            Colormap to select colors from. If string, load colormap with that name from matplotlib.
        colorbar : boolean, optional
            If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots)
        position : float
            Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1 (right/top-end).
            Default is 0.5 (center)
        yerr : array-like
            Error bars on y axis
        xerr : array-like
            Error bars on x axis
        stacked : boolean, default False in line and bar plots, and True in area plot.
            If True, create stacked plot.
        **kwargs : keywords
            Options to pass to matplotlib plotting method

        Returns
        -------
        axes : matplotlib.AxesSubplot or np.array of them

        Notes
        -----
        See Pandas documentation of `plot` function for more details on this subject

        Examples
        --------
        >>> import matplotlib.pyplot as plt
        >>> # let us define an array with some made up data
        >>> arr = Array([[5, 20, 5, 10], [6, 16, 8, 11]], 'gender=M,F;year=2018..2021')

        Simple line plot

        >>> arr.plot()
        >>> # show figure (it also resets it after showing it! Do not call it before savefig)
        >>> plt.show()

        Line plot with grid and a title

        >>> arr.plot(grid=True, title='line plot')
        >>> # save figure in a file (see matplotlib.pyplot.savefig documentation for more details)
        >>> plt.savefig('my_file.png')

        2 bar plots (one for each gender) sharing the same y axis, which makes sub plots
        easier to compare. By default sub plots are independant of each other and the axes
        ranges are computed to "fit" just the data for their individual plot.

        >>> arr.plot.bar(subplots='gender', sharey=True)
        >>> plt.show()

        Create a figure containing 2 x 2 graphs

        >>> # see matplotlib.pyplot.subplots documentation for more details
        >>> fig, ax = plt.subplots(2, 2, figsize=(10, 8), tight_layout=True)   # doctest: +SKIP
        >>> # line plot with 2 curves (Males and Females) in the top left corner (0, 0)
        >>> arr.plot(ax=ax[0, 0], title='line plot')                           # doctest: +SKIP
        >>> # bar plot with stacked values in the top right corner (0, 1)
        >>> arr.plot.bar(ax=ax[0, 1], stacked=True, title='stacked bar plot')  # doctest: +SKIP
        >>> # area plot in the bottom left corner (1, 0)
        >>> arr.plot.area(ax=ax[1, 0], title='area plot')                      # doctest: +SKIP
        >>> # scatter plot in the bottom right corner (1, 1), using the year as color
        >>> # index and a specific colormap
        >>> arr.plot.scatter(ax=ax[1, 1], x='M', y='F', c=arr.year, colormap='viridis',
        ...                  title='scatter plot')                             # doctest: +SKIP
        >>> plt.show()                                                         # doctest: +SKIP
        """
        return PlotObject(self)

    @property
    def shape(self) -> Tuple[int, ...]:
        r"""Return the shape of the array as a tuple.

        Returns
        -------
        tuple
            Tuple representing the current shape.

        Examples
        --------
        >>> a = ndtest('nat=BE,FO;sex=M,F;type=type1,type2,type3')
        >>> a.shape  # doctest: +SKIP
        (2, 2, 3)
        """
        return self.data.shape

    @property
    def ndim(self) -> int:
        r"""Return the number of dimensions of the array.

        Returns
        -------
        int
            Number of dimensions of an Array.

        Examples
        --------
        >>> a = ndtest('nat=BE,FO;sex=M,F')
        >>> a.ndim
        2
        """
        return self.data.ndim

    @property
    def size(self) -> int:
        r"""Return the number of elements in array.

        Returns
        -------
        int
            Number of elements in array.

        Examples
        --------
        >>> a = ndtest('sex=M,F;type=type1,type2,type3')
        >>> a.size
        6
        """
        return self.data.size

    @property
    def nbytes(self) -> int:
        r"""Return the number of bytes used to store the array in memory.

        Returns
        -------
        int
            Number of bytes in array.

        Examples
        --------
        >>> a = ndtest('sex=M,F;type=type1,type2,type3', dtype=float)
        >>> a.nbytes
        48
        """
        return self.data.nbytes

    @property
    def memory_used(self) -> str:
        r"""Return the memory consumed by the array in human readable form.

        Returns
        -------
        str
            Memory used by the array.

        Examples
        --------
        >>> a = ndtest('sex=M,F;type=type1,type2,type3', dtype=float)
        >>> a.memory_used
        '48 bytes'
        """
        return size2str(self.data.nbytes)

    @property
    def dtype(self) -> np.dtype:
        r"""Return the type of the data of the array.

        Returns
        -------
        dtype
            Type of the data of the array.

        Examples
        --------
        >>> a = zeros('sex=M,F;type=type1,type2,type3')
        >>> a.dtype
        dtype('float64')
        """
        return self.data.dtype

    @property
    def item(self) -> Scalar:
        return self.data.item

    def __len__(self) -> int:
        return len(self.data)

    def __array__(self, dtype=None):
        return np.asarray(self.data, dtype=dtype)

    __array_priority__ = 100

    # TODO: this should be a thin wrapper around a method in AxisCollection
[docs]    def set_labels(self, axis=None, labels=None, inplace=False, **kwargs) -> 'Array':
        r"""Replace the labels of one or several axes of the array.

        Parameters
        ----------
        axis : string or Axis or dict
            Axis for which we want to replace labels, or mapping {axis: changes} where changes can either be the
            complete list of labels, a mapping {old_label: new_label} or a function to transform labels.
            If there is no ambiguity (two or more axes have the same labels), `axis` can be a direct mapping
            {old_label: new_label}.
        labels : int, str, iterable or mapping or function, optional
            Integer or list of values usable as the collection of labels for an Axis. If this is mapping, it must be
            {old_label: new_label}. If it is a function, it must be a function accepting a single argument (a
            label) and returning a single value. This argument must not be used if axis is a mapping.
        inplace : bool, optional
            Whether to modify the original object or return a new array and leave the original intact.
            Defaults to False.
        **kwargs :
            `axis`=`labels` for each axis you want to set labels.

        Returns
        -------
        Array
            Array with modified labels.

        Warnings
        --------
        Not passing a mapping but the complete list of new labels as the 'labels' argument must be done with caution.
        Make sure that the order of new labels corresponds to the exact same order of previous labels.

        See Also
        --------
        AxisCollection.set_labels

        Examples
        --------
        >>> a = ndtest('nat=BE,FO;sex=M,F')
        >>> a
        nat\sex  M  F
             BE  0  1
             FO  2  3
        >>> a.set_labels('sex', ['Men', 'Women'])
        nat\sex  Men  Women
             BE    0      1
             FO    2      3

        when passing a single string as labels, it will be interpreted to create the list of labels, so that one can
        use the same syntax than during axis creation.

        >>> a.set_labels('sex', 'Men,Women')
        nat\sex  Men  Women
             BE    0      1
             FO    2      3

        to replace only some labels, one must give a mapping giving the new label for each label to replace

        >>> a.set_labels('sex', {'M': 'Men'})
        nat\sex  Men  F
             BE    0  1
             FO    2  3

        to transform labels by a function, use any function accepting and returning a single argument:

        >>> a.set_labels('nat', str.lower)
        nat\sex  M  F
             be  0  1
             fo  2  3

        to replace labels for several axes at the same time, one should give a mapping giving the new labels for each
        changed axis

        >>> a.set_labels({'sex': 'Men,Women', 'nat': 'Belgian,Foreigner'})
          nat\sex  Men  Women
          Belgian    0      1
        Foreigner    2      3

        or use keyword arguments

        >>> a.set_labels(sex='Men,Women', nat='Belgian,Foreigner')
          nat\sex  Men  Women
          Belgian    0      1
        Foreigner    2      3

        one can also replace some labels in several axes by giving a mapping of mappings

        >>> a.set_labels({'sex': {'M': 'Men'}, 'nat': {'BE': 'Belgian'}})
        nat\sex  Men  F
        Belgian    0  1
             FO    2  3

        when there is no ambiguity (two or more axes have the same labels), it is possible to give a mapping
        between old and new labels

        >>> a.set_labels({'M': 'Men', 'BE': 'Belgian'})
        nat\sex  Men  F
        Belgian    0  1
             FO    2  3
        """
        axes = self.axes.set_labels(axis, labels, **kwargs)
        if inplace:
            self.axes = axes
            return self
        else:
            return Array(self.data, axes)

[docs]    def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True) -> 'Array':
        return Array(self.data.astype(dtype, order, casting, subok, copy), self.axes)
    astype.__doc__ = np.ndarray.astype.__doc__

[docs]    def shift(self, axis, n=1) -> 'Array':
        r"""Shift the cells of the array n-times to the right along axis.

        Parameters
        ----------
        axis : int, str or Axis
            Axis for which we want to perform the shift.
        n : int, optional
            Number of cells to shift. Defaults to 1.

        Returns
        -------
        Array

        See Also
        --------
        Array.roll : cells which are pushed "outside of the axis" are reintroduced on the opposite side of the axis
                      instead of being dropped.

        Examples
        --------
        >>> arr = ndtest('sex=M,F;year=2019..2021')
        >>> arr
        sex\year  2019  2020  2021
               M     0     1     2
               F     3     4     5
        >>> arr.shift('year')
        sex\year  2020  2021
               M     0     1
               F     3     4
        >>> arr.shift('year', n=-1)
        sex\year  2019  2020
               M     1     2
               F     4     5
        """
        axis = self.axes[axis]
        if n > 0:
            return self[axis.i[:-n]].set_labels(axis, axis.labels[n:])
        elif n < 0:
            return self[axis.i[-n:]].set_labels(axis, axis.labels[:n])
        else:
            return self[:]

[docs]    def roll(self, axis=None, n=1) -> 'Array':
        r"""Roll the cells of the array n-times to the right along axis. Cells which would be pushed "outside of the
        axis" are reintroduced on the opposite side of the axis.

        Parameters
        ----------
        axis : int, str or Axis, optional
            Axis along which to roll. Defaults to None (all axes).
        n : int or Array, optional
            Number of positions to roll. Defaults to 1. Use a negative integers to roll left.
            If n is an Array the number of positions rolled can vary along the axes of n.

        Returns
        -------
        Array

        See Also
        --------
        Array.shift : cells which are pushed "outside of the axis" are dropped instead of being reintroduced on the
                       opposite side of the axis.

        Examples
        --------
        >>> arr = ndtest('sex=M,F;year=2019..2021')
        >>> arr
        sex\year  2019  2020  2021
               M     0     1     2
               F     3     4     5
        >>> arr.roll('year')
        sex\year  2019  2020  2021
               M     2     0     1
               F     5     3     4

        One can also roll by a different amount depending on another axis

        >>> # let us roll by 1 for men and by 2 for women
        >>> n = sequence(arr.sex, initial=1)
        >>> n
        sex  M  F
             1  2
        >>> arr.roll('year', n)
        sex\year  2019  2020  2021
               M     2     0     1
               F     4     5     3
        """
        if isinstance(n, (int, np.integer)):
            axis_idx = None if axis is None else self.axes.index(axis)
            return Array(np.roll(self.data, n, axis=axis_idx), self.axes)
        else:
            if not isinstance(n, Array):
                raise TypeError("n should either be an integer or an Array")
            if axis is None:
                raise TypeError("axis may not be None if n is an Array")
            axis = self.axes[axis]
            seq = sequence(axis)
            return self[axis.i[(seq - n) % len(axis)]]

    # TODO: add support for groups as axis (like aggregates)
    # eg a.diff(X.year[2018:]) instead of a[2018:].diff(X.year)
[docs]    def diff(self, axis=-1, d=1, n=1, label='upper') -> 'Array':
        r"""Compute the n-th order discrete difference along a given axis.

        The first order difference is given by out[n] = a[n + 1] - a[n] along the given axis, higher order differences
        are calculated by using diff recursively.

        Parameters
        ----------
        axis : int, str, Group or Axis, optional
            Axis or group along which the difference is taken. Defaults to the last axis.
        d : int, optional
            Periods to shift for forming difference. Defaults to 1.
        n : int, optional
            The number of times values are differenced. Defaults to 1.
        label : {'lower', 'upper'}, optional
            The new labels in `axis` will have the labels of either the array being subtracted ('lower') or the array
            it is subtracted from ('upper'). Defaults to 'upper'.

        Returns
        -------
        Array
            The n-th order differences. The shape of the output is the same as `a` except for `axis` which is smaller
            by `n` * `d`.

        Examples
        --------
        >>> a = ndtest('sex=M,F;type=type1,type2,type3').cumsum('type')
        >>> a
        sex\type  type1  type2  type3
               M      0      1      3
               F      3      7     12
        >>> a.diff()
        sex\type  type2  type3
               M      1      2
               F      4      5
        >>> a.diff(n=2)
        sex\type  type3
               M      1
               F      1
        >>> a.diff('sex')
        sex\type  type1  type2  type3
               F      3      6      9
        >>> a.diff(a.type['type2':])
        sex\type  type3
               M      2
               F      5
        """
        if isinstance(axis, Group):
            array = self[axis]
            axis = array.axes[axis.axis]
        else:
            array = self
        for _ in range(n):
            axis_obj = array.axes[axis]
            left = array[axis_obj.i[d:]]
            right = array[axis_obj.i[:-d]]
            if label == 'upper':
                right = right.ignore_labels(axis)
            else:
                left = left.ignore_labels(axis)
            array = left - right
        return array

    # XXX: this is called pct_change in Pandas (but returns the same results, not results * 100, which I find silly).
    # Maybe change_rate would be better (because growth is not always positive)?
[docs]    def growth_rate(self, axis=-1, d=1, label='upper') -> 'Array':
        r"""Compute the growth along a given axis.

        Roughly equivalent to a.diff(axis, d, label) / a[axis.i[:-d]]

        Parameters
        ----------
        axis : int, str, Group or Axis, optional
            Axis or group along which the difference is taken. Defaults to the last axis.
        d : int, optional
            Periods to shift for forming difference. Defaults to 1.
        label : {'lower', 'upper'}, optional
            The new labels in `axis` will have the labels of either
            the array being subtracted ('lower') or the array it is
            subtracted from ('upper'). Defaults to 'upper'.

        Returns
        -------
        Array

        Examples
        --------
        >>> data = [[4, 5, 4, 6, 9], [2, 4, 3, 0, 0]]
        >>> a = Array(data, "sex=F,M; year=2017..2021")
        >>> a
        sex\year  2017  2018  2019  2020  2021
               F     4     5     4     6     9
               M     2     4     3     0     0
        >>> a.growth_rate()
        sex\year  2018   2019  2020  2021
               F  0.25   -0.2   0.5   0.5
               M   1.0  -0.25  -1.0   0.0
        >>> a.growth_rate(label='lower')
        sex\year  2017   2018  2019  2020
               F  0.25   -0.2   0.5   0.5
               M   1.0  -0.25  -1.0   0.0
        >>> a.growth_rate(d=2)
        sex\year  2019  2020  2021
               F   0.0   0.2  1.25
               M   0.5  -1.0  -1.0

        It works on any axis, not just time-based axes

        >>> a.growth_rate('sex')
        sex\year  2017  2018   2019  2020  2021
               M  -0.5  -0.2  -0.25  -1.0  -1.0

        Or part of axes

        >>> a.growth_rate(a.year[2017:])
        sex\year  2018   2019  2020  2021
               F  0.25   -0.2   0.5   0.5
               M   1.0  -0.25  -1.0   0.0
        """
        if isinstance(axis, Group):
            array = self[axis]
            axis = array.axes[axis.axis]
        else:
            array = self
            axis = array.axes[axis]
        diff = array.diff(axis=axis, d=d, label=label)
        # replace 0/0 by 0/inf to avoid a nan (and a warning)
        shifted_array = np.where(diff.data == 0, inf, array.shift(axis, n=d).data)
        return Array(diff.data / shifted_array, diff.axes)

[docs]    def compact(self, display=False, name='array') -> 'Array':
        r"""Detect and remove "useless" axes (ie axes for which values are constant over the whole axis).

        Parameters
        ----------
        display : bool, optional
            Whether to display a message with the name of constant axes which were discarded. Defaults to False.
        name : str, optional
            Name to use in the message if `display` is True. Defaults to "array".

        Returns
        -------
        Array or scalar
            Array with constant axes removed.

        Examples
        --------
        >>> a = Array([[1, 2],
        ...            [1, 2]], [Axis('sex=M,F'), Axis('nat=BE,FO')])
        >>> a
        sex\nat  BE  FO
              M   1   2
              F   1   2
        >>> a.compact()
        nat  BE  FO
              1   2
        """
        res = self
        compacted_axes = []
        for axis in res.axes:
            axis_first_value = res[axis.i[0]]
            if (res == axis_first_value).all():
                res = axis_first_value
                compacted_axes.append(axis.name)
        if display and compacted_axes:
            print(f"{name} was constant over: {', '.join(compacted_axes)}")
        return res

[docs]    def combine_axes(self, axes=None, sep='_', wildcard=False) -> 'Array':
        r"""Combine several axes into one.

        Parameters
        ----------
        axes : tuple, list, AxisCollection of axes or list of combination of those or dict, optional
            axes to combine. Tuple, list or AxisCollection will combine several axes into one. To chain several axes
            combinations, pass a list of tuple/list/AxisCollection of axes. To set the name(s) of resulting axis(es),
            use a {(axes, to, combine): 'new_axis_name'} dictionary. Defaults to all axes.
        sep : str, optional
            delimiter to use for combining. Defaults to '_'.
        wildcard : bool, optional
            whether to produce a wildcard axis even if the axes to combine are not. This is much faster,
            but loose axes labels.

        Returns
        -------
        Array
            Array with combined axes.

        Examples
        --------
        >>> arr = ndtest((2, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> arr.combine_axes()
        a_b  a0_b0  a0_b1  a0_b2  a1_b0  a1_b1  a1_b2
                 0      1      2      3      4      5
        >>> arr.combine_axes(sep='/')
        a/b  a0/b0  a0/b1  a0/b2  a1/b0  a1/b1  a1/b2
                 0      1      2      3      4      5
        >>> arr = ndtest((2, 2, 2, 2))
        >>> arr
         a   b  c\d  d0  d1
        a0  b0   c0   0   1
        a0  b0   c1   2   3
        a0  b1   c0   4   5
        a0  b1   c1   6   7
        a1  b0   c0   8   9
        a1  b0   c1  10  11
        a1  b1   c0  12  13
        a1  b1   c1  14  15
        >>> arr.combine_axes(('a', 'c'))
          a_c  b\d  d0  d1
        a0_c0   b0   0   1
        a0_c0   b1   4   5
        a0_c1   b0   2   3
        a0_c1   b1   6   7
        a1_c0   b0   8   9
        a1_c0   b1  12  13
        a1_c1   b0  10  11
        a1_c1   b1  14  15
        >>> arr.combine_axes({('a', 'c'): 'ac'})
           ac  b\d  d0  d1
        a0_c0   b0   0   1
        a0_c0   b1   4   5
        a0_c1   b0   2   3
        a0_c1   b1   6   7
        a1_c0   b0   8   9
        a1_c0   b1  12  13
        a1_c1   b0  10  11
        a1_c1   b1  14  15

        # make several combinations at once

        >>> arr.combine_axes([('a', 'c'), ('b', 'd')])
        a_c\b_d  b0_d0  b0_d1  b1_d0  b1_d1
          a0_c0      0      1      4      5
          a0_c1      2      3      6      7
          a1_c0      8      9     12     13
          a1_c1     10     11     14     15
        >>> arr.combine_axes({('a', 'c'): 'ac', ('b', 'd'): 'bd'})
        ac\bd  b0_d0  b0_d1  b1_d0  b1_d1
        a0_c0      0      1      4      5
        a0_c1      2      3      6      7
        a1_c0      8      9     12     13
        a1_c1     10     11     14     15
        """
        if axes is None:
            axes = {tuple(self.axes): None}
        elif isinstance(axes, AxisCollection):
            axes = {tuple(axes): None}
        elif isinstance(axes, (list, tuple)):
            # checks for nested tuple/list
            if all(isinstance(axis, (list, tuple, AxisCollection)) for axis in axes):
                axes = {tuple(axes_to_combine): None for axes_to_combine in axes}
            else:
                axes = {tuple(axes): None}
        # axes should be a dict at this time
        assert isinstance(axes, dict)

        transposed_axes = self.axes[:]
        for axes_to_combine in axes.keys():
            # transpose all axes next to each other, using index of first axis
            axes_to_combine = self.axes[axes_to_combine]
            axes_indices = [transposed_axes.index(axis) for axis in axes_to_combine]
            min_axis_index = min(axes_indices)
            transposed_axes = transposed_axes - axes_to_combine
            transposed_axes = transposed_axes[:min_axis_index] + axes_to_combine + transposed_axes[min_axis_index:]
        transposed = self.transpose(transposed_axes)
        # XXX: I think this might be problematic if axes to combine are given by position instead of by name/object
        new_axes = transposed.axes.combine_axes(axes, sep=sep, wildcard=wildcard)
        return transposed.reshape(new_axes)

[docs]    def split_axes(self, axes=None, sep='_', names=None, regex=None, sort=False, fill_value=nan) -> 'Array':
        r"""Split axes and returns a new array.

        Parameters
        ----------
        axes : int, str, Axis or any combination of those
            axes to split. All labels *must* contain the given delimiter string. To split several axes at once, pass
            a list or tuple of axes to split. To set the names of resulting axes, use a {'axis_to_split': (new, axes)}
            dictionary. Defaults to all axes whose name contains the `sep` delimiter.
        sep : str, optional
            delimiter to use for splitting. Defaults to '_'.
            When `regex` is provided, the delimiter is only used on `names` if given as one string or on axis name if
            `names` is None.
        names : str or list of str, optional
            names of resulting axes. Defaults to None.
        regex : str, optional
            use regex instead of delimiter to split labels. Defaults to None.
        sort : bool, optional
            Whether to sort the combined axis before splitting it. When all combinations of labels are present in
            the combined axis, sorting is faster than not sorting. Defaults to False.
        fill_value : scalar or Array, optional
            Value to use for missing values when the combined axis does not contain all combination of labels.
            Defaults to NaN.

        Returns
        -------
        Array

        Examples
        --------
        >>> arr = ndtest((2, 3))
        >>> arr
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5
        >>> combined = arr.combine_axes()
        >>> combined
        a_b  a0_b0  a0_b1  a0_b2  a1_b0  a1_b1  a1_b2
                 0      1      2      3      4      5
        >>> combined.split_axes()
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5

        Split labels using regex

        >>> combined = ndtest('a_b=a0b0..a1b2')
        >>> combined
        a_b  a0b0  a0b1  a0b2  a1b0  a1b1  a1b2
                0     1     2     3     4     5
        >>> combined.split_axes('a_b', regex=r'(\w{2})(\w{2})')
        a\b  b0  b1  b2
         a0   0   1   2
         a1   3   4   5

        Split several axes at once

        >>> combined = ndtest('a_b=a0_b0..a1_b1; c_d=c0_d0..c1_d1')
        >>> combined
        a_b\c_d  c0_d0  c0_d1  c1_d0  c1_d1
          a0_b0      0      1      2      3
          a0_b1      4      5      6      7
          a1_b0      8      9     10     11
          a1_b1     12     13     14     15
        >>> # equivalent to combined.split_axes() which split all axes whose name contains the `sep` delimiter.
        >>> combined.split_axes(['a_b', 'c_d'])
         a   b  c\d  d0  d1
        a0  b0   c0   0   1
        a0  b0   c1   2   3
        a0  b1   c0   4   5
        a0  b1   c1   6   7
        a1  b0   c0   8   9
        a1  b0   c1  10  11
        a1  b1   c0  12  13
        a1  b1   c1  14  15
        >>> combined.split_axes({'a_b': ('A', 'B'), 'c_d': ('C', 'D')})
         A   B  C\D  d0  d1
        a0  b0   c0   0   1
        a0  b0   c1   2   3
        a0  b1   c0   4   5
        a0  b1   c1   6   7
        a1  b0   c0   8   9
        a1  b0   c1  10  11
        a1  b1   c0  12  13
        a1  b1   c1  14  15
        """
        array = self.sort_labels(axes) if sort else self
        # TODO: do multiple axes split in one go
        axes = array.axes._prepare_split_axes(axes, names, sep)
        for axis, names in axes.items():
            axis = array.axes[axis]
            split_axes, split_labels = axis.split(sep, names, regex, return_labels=True)

            axis_index = array.axes.index(axis)
            new_axes = array.axes[:axis_index] + split_axes + array.axes[axis_index + 1:]
            # fast path when all combinations of labels are present in the combined axis
            all_combinations_present = AxisCollection(split_axes).size == len(np.unique(axis.labels))
            if all_combinations_present and sort:
                array = array.reshape(new_axes)
            else:
                if all_combinations_present:
                    res = empty(new_axes, dtype=array.dtype)
                else:
                    res = full(new_axes, fill_value=fill_value, dtype=common_dtype((array, fill_value)))
                if axis.name is not None:
                    if names is None:
                        names = axis.name.split(sep)
                    # Rename axis to make sure we broadcast correctly. We should NOT use sep here, but rather '_'
                    # must be kept in sync with the default sep of _adv_keys_to_combined_axis_la_keys
                    new_axis_name = '_'.join(names)
                    if new_axis_name != axis.name:
                        array = array.rename(axis, new_axis_name)
                res.points[split_labels] = array
                array = res
        return array
    split_axis = renamed_to(split_axes, 'split_axis', raise_error=True)

[docs]    def reverse(self, axes=None) -> 'Array':
        r"""
        Reverse axes of an array.

        Parameters
        ----------
        axes : int, str, Axis or any combination of those
            axes to reverse. If None, all axes are reversed. Defaults to None.

        Returns
        -------
        Array
            Array with passed `axes` reversed.

        Examples
        --------
        >>> arr = ndtest((2, 2, 2))
        >>> arr
         a  b\c  c0  c1
        a0   b0   0   1
        a0   b1   2   3
        a1   b0   4   5
        a1   b1   6   7

        Reverse one axis

        >>> arr.reverse('c')
         a  b\c  c1  c0
        a0   b0   1   0
        a0   b1   3   2
        a1   b0   5   4
        a1   b1   7   6

        Reverse several axes

        >>> arr.reverse(('a', 'c'))
         a  b\c  c1  c0
        a1   b0   5   4
        a1   b1   7   6
        a0   b0   1   0
        a0   b1   3   2

        Reverse all axes

        >>> arr.reverse()
         a  b\c  c1  c0
        a1   b1   7   6
        a1   b0   5   4
        a0   b1   3   2
        a0   b0   1   0
        """
        if axes is None:
            axes = self.axes
        else:
            axes = self.axes[axes]
        if not isinstance(axes, AxisCollection):
            axes = AxisCollection(axes)
        reversed_axes = tuple(axis[::-1] for axis in axes)
        return self[reversed_axes]

    # TODO: add excluded argument (to pass to vectorize but we must also compute res_axes / broadcasted arguments
    #       accordingly and handle it when axes is not None)
    #     excluded : set, optional
    #         Set of strings or integers representing the positional or keyword arguments for which the function
    #         will not be vectorized. These will be passed directly to the `transform` function unmodified.
[docs]    def apply(self, transform, *args, by=None, axes=None, dtype=None, ascending=True,
              **kwargs) -> Union['Array', Scalar, Tuple['Array', ...]]:
        r"""
        Apply a transformation function to array elements.

        Parameters
        ----------
        transform : function
            Function to apply. This function will be called in turn with each element of the array as the first
            argument and must return an Array, scalar or tuple.
            If returning arrays the axes of those arrays must be the same for all calls to the function.
        *args
            Extra arguments to pass to the function.
        by : str, int or Axis or tuple/list/AxisCollection of the them, optional
            Axis or axes along which to iterate. The function will thus be called with arrays having all axes not
            mentioned. Defaults to None (all axes). Mutually exclusive with the `axes` argument.
        axes : str, int or Axis or tuple/list/AxisCollection of the them, optional
            Axis or axes the arrays passed to the function will have. Defaults to None (the function is given
            scalars). Mutually exclusive with the `by` argument.
        dtype : type or list of types, optional
            Output(s) data type(s). Defaults to None (inspect all output values to infer it automatically).
        ascending : bool, optional
            Whether to iterate the axes in ascending order (from start to end). Defaults to True.
        **kwargs
            Extra keyword arguments are passed to the function (as keyword arguments).

        Returns
        -------
        Array or scalar, or tuple of them
            Axes will be the union of those in axis and those of values returned by the function.

        Examples
        --------
        First let us define a test array

        >>> arr = Array([[0, 2, 1],
        ...              [3, 1, 5]], 'a=a0,a1;b=b0..b2')
        >>> arr
        a\b  b0  b1  b2
         a0   0   2   1
         a1   3   1   5

        Here is a simple function we would like to apply to each element of the array.
        Note that this particular example should rather be written as: arr ** 2
        as it is both more concise and much faster.

        >>> def square(x):
        ...     return x ** 2
        >>> arr.apply(square)
        a\b  b0  b1  b2
         a0   0   4   1
         a1   9   1  25

        Functions can also be applied along some axes:

        >>> # this is equivalent to (but much slower than): arr.sum('a')
        ... arr.apply(sum, axes='a')
        b  b0  b1  b2
            3   3   6
        >>> # this is equivalent to (but much slower than): arr.sum_by('a')
        ... arr.apply(sum, by='a')
        a  a0  a1
            3   9

        Applying the function along some axes will return an array with the
        union of those axes and the axes of the returned values. For example,
        let us define a function which returns the k highest values of an array.

        >>> def topk(a, k=2):
        ...     return a.sort_values(ascending=False).ignore_labels().i[:k]
        >>> arr.apply(topk, by='a')
        a\b*  0  1
          a0  2  1
          a1  5  3

        Other arguments can be passed to the function:

        >>> arr.apply(topk, 3, by='a')
        a\b*  0  1  2
          a0  2  1  0
          a1  5  3  1

        or by using keyword arguments:

        >>> arr.apply(topk, by='a', k=3)
        a\b*  0  1  2
          a0  2  1  0
          a1  5  3  1

        If the function returns several values (as a tuple), the result will be a tuple of arrays. For example,
        let use define a function which decompose an array in its mean and the difference to that mean :

        >>> def mean_decompose(a):
        ...     mean = a.mean()
        ...     return mean, a - mean
        >>> mean_by_a, diff_to_mean = arr.apply(mean_decompose, by='a')
        >>> mean_by_a
        a   a0   a1
           1.0  3.0
        >>> diff_to_mean
        a\b    b0    b1   b2
         a0  -1.0   1.0  0.0
         a1   0.0  -2.0  2.0
        """
        if axes is not None:
            if by is not None:
                raise ValueError("cannot specify both `by` and `axes` arguments in Array.apply")
            by = self.axes - axes

        # XXX: we could go one step further than vectorize and support a array of callables which would be broadcasted
        #      with the other arguments. I don't know whether that would actually help because I think it always
        #      possible to emulate that with a single callable with an extra argument (eg type) which dispatches to
        #      potentially different callables. It might be more practical & efficient though.
        if by is None:
            otypes = [dtype] if isinstance(dtype, type) else dtype
            vfunc = np.vectorize(transform, otypes=otypes)
            # XXX: we should probably handle excluded here
            # raw_bcast_args, raw_bcast_kwargs, res_axes = make_args_broadcastable((self,) + args, kwargs)
            raw_bcast_args, raw_bcast_kwargs, res_axes = ((self,) + args, kwargs, self.axes)
            res_data = vfunc(*raw_bcast_args, **raw_bcast_kwargs)
            if isinstance(res_data, tuple):
                return tuple(Array(res_arr, res_axes) for res_arr in res_data)
            else:
                return Array(res_data, res_axes)
        else:
            by = self.axes[by]

            values = (self,) + args + tuple(kwargs.values())
            first_kw = 1 + len(args)
            kwnames = tuple(kwargs.keys())
            key_values = [(k, transform(*a_and_kwa[:first_kw], **dict(zip(kwnames, a_and_kwa[first_kw:]))))
                          for k, a_and_kwa in zip_array_items(values, by, ascending)]
            first_key, first_value = key_values[0]
            if isinstance(first_value, tuple):
                # assume all other values are the same shape
                tuple_length = len(first_value)
                res_arrays = [stack({key: value[i] for key, value in key_values}, axes=by, dtype=dtype,
                                    res_axes=get_axes(first_value[i]).union(by))
                              for i in range(tuple_length)]
                # transpose back axis where it was
                return tuple(res_arr.transpose(self.axes & res_arr.axes) for res_arr in res_arrays)
            else:
                res_axes = get_axes(first_value).union(by)
                res_arr = stack(key_values, axes=by, dtype=dtype, res_axes=res_axes)

                # transpose back axis where it was
                return res_arr.transpose(self.axes & res_arr.axes)

[docs]    def apply_map(self, mapping, dtype=None) -> Union['Array', Scalar, Tuple['Array', ...]]:
        r"""
        Apply a transformation mapping to array elements.

        Parameters
        ----------
        mapping : mapping (dict)
            Mapping to apply to values of the array.
            A mapping (dict) must have the values to transform as keys and the new values as values, that is:
            {<oldvalue1>: <newvalue1>, <oldvalue2>: <newvalue2>, ...}.
        dtype : type, optional
            Output dtype. Defaults to None (inspect all output values to infer it automatically).

        Returns
        -------
        Array
            Axes will be the same as the original array axes.

        Notes
        -----
        To apply a transformation given as an Array (with current values as labels on one axis of
        the array and desired values as the array values), you can use: ``mapping_arr[original_arr]``.

        Examples
        --------
        First let us define a test array

        >>> arr = Array([[0, 2, 1],
        ...              [3, 1, 5]], 'a=a0,a1;b=b0..b2')
        >>> arr
        a\b  b0  b1  b2
         a0   0   2   1
         a1   3   1   5

        Now, assuming for a moment that the values of our test array above were in fact some numeric representation of
        names and we had the correspondence to the actual names stored in a dictionary:

        >>> code_to_names = {0: 'foo', 1: 'bar', 2: 'baz',
        ...                  3: 'boo', 4: 'far', 5: 'faz'}

        We could get back an array with the actual names by using:

        >>> arr.apply_map(code_to_names)
        a\b   b0   b1   b2
         a0  foo  baz  bar
         a1  boo  bar  faz
        """
        def transform(v):
            return mapping.get(v, v)
        return self.apply(transform, dtype=dtype)


class LArray(Array):
    def __init__(self, *args, **kwargs):
        warnings.warn("LArray has been renamed as Array.", FutureWarning, stacklevel=2)
        Array.__init__(self, *args, **kwargs)


def larray_equal(a1, a2):
    msg = "larray_equal() is deprecated. Use Array.equals() instead."
    warnings.warn(msg, FutureWarning, stacklevel=2)
    try:
        a1 = asarray(a1)
    except Exception:
        return False
    return a1.equals(a2)


def larray_nan_equal(a1, a2):
    msg = "larray_nan_equal() is deprecated. Use Array.equals() instead."
    warnings.warn(msg, FutureWarning, stacklevel=2)
    try:
        a1 = asarray(a1)
    except Exception:
        return False
    return a1.equals(a2, nans_equal=True)


[docs]def asarray(a, meta=None) -> Array:
    r"""
    Convert input as Array if possible.

    Parameters
    ----------
    a : array-like
        Input array to convert into an Array.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> # NumPy array
    >>> np_arr = np.arange(6).reshape((2,3))
    >>> asarray(np_arr)
    {0}*\{1}*  0  1  2
            0  0  1  2
            1  3  4  5
    >>> # Pandas dataframe
    >>> data = {'normal'  : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
    ...         'reverse' : pd.Series([3., 2., 1.], index=['a', 'b', 'c'])}
    >>> df = pd.DataFrame(data)
    >>> asarray(df)
    {0}\{1}  normal  reverse
          a     1.0      3.0
          b     2.0      2.0
          c     3.0      1.0
    """
    if isinstance(a, Array):
        if meta is not None:
            res = a.copy()
            res.meta = meta
            return res
        else:
            return a
    elif hasattr(a, '__larray__'):
        res = a.__larray__()
        if meta is not None:
            res.meta = meta
        return res
    elif isinstance(a, pd.DataFrame):
        from larray.inout.pandas import from_frame
        return from_frame(a, meta=meta)
    else:
        return Array(a, meta=meta)


aslarray = renamed_to(asarray, 'aslarray', raise_error=True)


def _check_axes_argument(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs) -> Array:
        if len(args) > 1 and isinstance(args[1], (int, Axis)):
            raise ValueError(f"If you want to pass several axes or dimension lengths to {func.__name__}, you must pass "
                             f"them as a list (using []) or tuple (using()).")
        return func(*args, **kwargs)
    return wrapper


[docs]@_check_axes_argument
def zeros(axes, title=None, dtype=float, order='C', meta=None) -> Array:
    r"""Return an array with the specified axes and filled with zeros.

    Parameters
    ----------
    axes : int, tuple of int, Axis or tuple/list/AxisCollection of Axis
        Collection of axes or a shape.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Desired data-type for the array, e.g., `numpy.int8`. Default is `numpy.float64`.
    order : {'C', 'F'}, optional
        Whether to store multidimensional data in C- (default) or Fortran-contiguous (row- or column-wise) order in
        memory.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> zeros('nat=BE,FO;sex=M,F')
    nat\sex    M    F
         BE  0.0  0.0
         FO  0.0  0.0
    >>> zeros([(['BE', 'FO'], 'nat'),
    ...        (['M', 'F'], 'sex')])
    nat\sex    M    F
         BE  0.0  0.0
         FO  0.0  0.0
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> zeros([nat, sex])
    nat\sex    M    F
         BE  0.0  0.0
         FO  0.0  0.0
    """
    # FIXME: the error message is wrong (stackdepth is wrong) because of _check_axes_argument
    meta = _handle_meta(meta, title)
    axes = AxisCollection(axes)
    return Array(np.zeros(axes.shape, dtype, order), axes, meta=meta)


[docs]def zeros_like(array, title=None, dtype=None, order='K', meta=None) -> Array:
    r"""Return an array with the same axes as array and filled with zeros.

    Parameters
    ----------
    array : Array
         Input array.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Overrides the data type of the result.
    order : {'C', 'F', 'A', or 'K'}, optional
        Overrides the memory layout of the result.
        'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise.
        'K' (default) means match the layout of `a` as closely as possible.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> a = ndtest((2, 3))
    >>> zeros_like(a)
    a\b  b0  b1  b2
     a0   0   0   0
     a1   0   0   0
    """
    meta = _handle_meta(meta, title)
    return Array(np.zeros_like(array, dtype, order), array.axes, meta=meta)


[docs]@_check_axes_argument
def ones(axes, title=None, dtype=float, order='C', meta=None) -> Array:
    r"""Return an array with the specified axes and filled with ones.

    Parameters
    ----------
    axes : int, tuple of int, Axis or tuple/list/AxisCollection of Axis
        Collection of axes or a shape.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Desired data-type for the array, e.g., `numpy.int8`.  Default is `numpy.float64`.
    order : {'C', 'F'}, optional
        Whether to store multidimensional data in C- (default) or Fortran-contiguous (row- or column-wise) order in
        memory.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> ones([nat, sex])
    nat\sex    M    F
         BE  1.0  1.0
         FO  1.0  1.0
    """
    meta = _handle_meta(meta, title)
    axes = AxisCollection(axes)
    return Array(np.ones(axes.shape, dtype, order), axes, meta=meta)


[docs]def ones_like(array, title=None, dtype=None, order='K', meta=None) -> Array:
    r"""Return an array with the same axes as array and filled with ones.

    Parameters
    ----------
    array : Array
        Input array.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Overrides the data type of the result.
    order : {'C', 'F', 'A', or 'K'}, optional
        Overrides the memory layout of the result.
        'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise.
        'K' (default) means match the layout of `a` as closely as possible.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> a = ndtest((2, 3))
    >>> ones_like(a)
    a\b  b0  b1  b2
     a0   1   1   1
     a1   1   1   1
    """
    meta = _handle_meta(meta, title)
    axes = array.axes
    return Array(np.ones_like(array, dtype, order), axes, meta=meta)


[docs]@_check_axes_argument
def empty(axes, title=None, dtype=float, order='C', meta=None) -> Array:
    r"""Return an array with the specified axes and uninitialized (arbitrary) data.

    Parameters
    ----------
    axes : int, tuple of int, Axis or tuple/list/AxisCollection of Axis
        Collection of axes or a shape.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Desired data-type for the array, e.g., `numpy.int8`.  Default is `numpy.float64`.
    order : {'C', 'F'}, optional
        Whether to store multidimensional data in C- (default) or Fortran-contiguous (row- or column-wise) order in
        memory.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> empty([nat, sex])  # doctest: +SKIP
    nat\sex                   M                   F
         BE  2.47311483356e-315  2.47498446195e-315
         FO                 0.0  6.07684618082e-31
    """
    meta = _handle_meta(meta, title)
    axes = AxisCollection(axes)
    return Array(np.empty(axes.shape, dtype, order), axes, meta=meta)


[docs]def empty_like(array, title=None, dtype=None, order='K', meta=None) -> Array:
    r"""Return an array with the same axes as array and uninitialized (arbitrary) data.

    Parameters
    ----------
    array : Array
        Input array.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Overrides the data type of the result. Defaults to the data type of array.
    order : {'C', 'F', 'A', or 'K'}, optional
        Overrides the memory layout of the result.
        'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise.
        'K' (default) means match the layout of `a` as closely as possible.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> a = ndtest((3, 2))
    >>> empty_like(a)   # doctest: +SKIP
    a\b                  b0                  b1
     a0  2.12199579097e-314  6.36598737388e-314
     a1  1.06099789568e-313  1.48539705397e-313
     a2  1.90979621226e-313  2.33419537056e-313
    """
    meta = _handle_meta(meta, title)
    # cannot use empty() because order == 'K' is not understood
    return Array(np.empty_like(array.data, dtype, order), array.axes, meta=meta)


# We cannot use @_check_axes_argument here because an integer fill_value would be considered as an error
[docs]def full(axes, fill_value, title=None, dtype=None, order='C', meta=None) -> Array:
    r"""Return an array with the specified axes and filled with fill_value.

    Parameters
    ----------
    axes : int, tuple of int, Axis or tuple/list/AxisCollection of Axis
        Collection of axes or a shape.
    fill_value : scalar or Array
        Value to fill the array
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Desired data-type for the array. Default is the data type of fill_value.
    order : {'C', 'F'}, optional
        Whether to store multidimensional data in C- (default) or Fortran-contiguous (row- or column-wise) order in
        memory.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> full([nat, sex], 42.0)
    nat\sex     M     F
         BE  42.0  42.0
         FO  42.0  42.0
    >>> initial_value = ndtest([sex])
    >>> initial_value
    sex  M  F
         0  1
    >>> full([nat, sex], initial_value)
    nat\sex  M  F
         BE  0  1
         FO  0  1
    """
    meta = _handle_meta(meta, title)
    if isinstance(fill_value, Axis):
        raise ValueError("If you want to pass several axes or dimension lengths to full, you must pass them as a "
                         "list (using []) or tuple (using()).")
    if dtype is None:
        dtype = np.asarray(fill_value).dtype
    res = empty(axes, dtype=dtype, order=order, meta=meta)
    res[:] = fill_value
    return res


[docs]def full_like(array, fill_value, title=None, dtype=None, order='K', meta=None) -> Array:
    r"""Return an array with the same axes and type as input array and filled with fill_value.

    Parameters
    ----------
    array : Array
        Input array.
    fill_value : scalar or Array
        Value to fill the array
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Overrides the data type of the result. Defaults to the data type of array.
    order : {'C', 'F', 'A', or 'K'}, optional
        Overrides the memory layout of the result.
        'C' means C-order, 'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous, 'C' otherwise.
        'K' (default) means match the layout of `a` as closely as possible.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> a = ndtest((2, 3))
    >>> full_like(a, 5)
    a\b  b0  b1  b2
     a0   5   5   5
     a1   5   5   5
    """
    meta = _handle_meta(meta, title)
    # cannot use full() because order == 'K' is not understood
    # cannot use np.full_like() because it would not handle Array fill_value
    res = empty_like(array, dtype=dtype, meta=meta)
    res[:] = fill_value
    return res


_integer_types = (int, np.integer)


# XXX: would it be possible to generalize to multiple axes?
[docs]def sequence(axis, initial=0, inc=None, mult=None, func=None, axes=None, title=None, meta=None) -> Array:
    r"""
    Create an array by sequentially applying modifications to the array along axis.

    The value for each label in axis will be given by sequentially transforming the value for the previous label.
    This transformation on the previous label value consists of applying the function "func" on that value if provided,
    or to multiply it by mult and increment it by inc otherwise.

    Parameters
    ----------
    axis : axis definition (Axis, str, int)
        Axis along which to apply mod. An axis definition can be passed as a string. An int will be interpreted as the
        length for a new anonymous axis.
    initial : scalar or Array, optional
        Value for the first label of axis. Defaults to 0.
    inc : scalar, Array, optional
        Value to increment the previous value by. Defaults to 1 unless mult is provided (in which case it defaults
        to 0).
    mult : scalar, Array, optional
        Value to multiply the previous value by. Defaults to None.
    func : function/callable, optional
        Function to apply to the previous value. Defaults to None.
        Note that this is much slower than using inc and/or mult.
    axes : int, tuple of int or tuple/list/AxisCollection of Axis, optional
        Axes of the result. Defaults to the union of axes present in other arguments.
    title : str, optional
        Deprecated. See 'meta' below.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Examples
    --------
    >>> year = Axis('year=2016..2019')
    >>> sex = Axis('sex=M,F')
    >>> sequence(year)
    year  2016  2017  2018  2019
             0     1     2     3
    >>> sequence('year=2016..2019')
    year  2016  2017  2018  2019
             0     1     2     3
    >>> sequence(year, 1.0, 0.5)
    year  2016  2017  2018  2019
           1.0   1.5   2.0   2.5
    >>> sequence(year, 1.0, mult=1.5)
    year  2016  2017  2018   2019
           1.0   1.5  2.25  3.375
    >>> inc = Array([1, 2], [sex])
    >>> inc
    sex  M  F
         1  2
    >>> sequence(year, 1.0, inc)
    sex\year  2016  2017  2018  2019
           M   1.0   2.0   3.0   4.0
           F   1.0   3.0   5.0   7.0
    >>> mult = Array([2, 3], [sex])
    >>> mult
    sex  M  F
         2  3
    >>> sequence(year, 1.0, mult=mult)
    sex\year  2016  2017  2018  2019
           M   1.0   2.0   4.0   8.0
           F   1.0   3.0   9.0  27.0
    >>> initial = Array([3, 4], [sex])
    >>> initial
    sex  M  F
         3  4
    >>> sequence(year, initial, 1)
    sex\year  2016  2017  2018  2019
           M     3     4     5     6
           F     4     5     6     7
    >>> sequence(year, initial, mult=2)
    sex\year  2016  2017  2018  2019
           M     3     6    12    24
           F     4     8    16    32
    >>> sequence(year, initial, inc, mult)
    sex\year  2016  2017  2018  2019
           M     3     7    15    31
           F     4    14    44   134
    >>> def modify(prev_value):
    ...     return prev_value / 2
    >>> sequence(year, 8, func=modify)
    year  2016  2017  2018  2019
             8     4     2     1
    >>> sequence(3)
    {0}*  0  1  2
          0  1  2
    >>> sequence('year', axes=(sex, year))
    sex\year  2016  2017  2018  2019
           M     0     1     2     3
           F     0     1     2     3

    sequence can be used as the inverse of growth_rate:

    >>> a = Array([1.0, 2.0, 3.0, 3.0], year)
    >>> a
    year  2016  2017  2018  2019
           1.0   2.0   3.0   3.0
    >>> g = a.growth_rate() + 1
    >>> g
    year  2017  2018  2019
           2.0   1.5   1.0
    >>> sequence(year, a[2016], mult=g)
    year  2016  2017  2018  2019
           1.0   2.0   3.0   3.0
    """
    meta = _handle_meta(meta, title)

    if inc is None:
        inc = 1 if mult is None else 0
    if mult is None:
        mult = 1

    # make sure we have an axis object
    if axes is None:
        axis = _make_axis(axis)

    no_mult = isinstance(mult, _integer_types) and mult == 1

    # fast path for the most common case (integer inc and initial value, no mult, no func, no axes)
    if (isinstance(inc, _integer_types)
            and isinstance(initial, _integer_types)
            and no_mult
            and func is None
            and axes is None):
        # stop is not included
        stop = initial + inc * len(axis)
        data = np.arange(initial, stop, inc)
        return Array(data, axis, meta=meta)

    def strip_axes(col):
        return get_axes(col) - axis

    def has_axis(a, axis):
        return isinstance(a, Array) and axis in a.axes

    def array_or_full(a, axis, initial):
        dt = common_dtype((a, initial))
        r = empty(strip_axes(initial) | strip_axes(a) | axis, dtype=dt)
        r[axis.i[0]] = initial
        if isinstance(a, Array) and axis in a.axes:
            # not using axis.i[1:] because a could have less ticks
            # on axis than axis
            r[axis.i[1:]] = a[axis[axis.labels[1]:]]
        else:
            r[axis.i[1:]] = a
        return r

    if axes is None:
        # we need to remove axis if present, because it might be incompatible
        axes = strip_axes(initial) | strip_axes(inc) | strip_axes(mult) | axis
    else:
        axes = AxisCollection(axes)
        if axis not in axes:
            axis = _make_axis(axis)
        axis = axes[axis]

    res_dtype = common_dtype((initial, inc, mult))
    res = empty(axes, dtype=res_dtype, meta=meta)

    if func is not None:
        res[axis.i[0]] = prev_value = initial
        for i in range(1, len(axis)):
            res[axis.i[i]] = prev_value = func(prev_value)
    # inc only (integer) == fastpath but with axes not None
    elif res_dtype.kind == 'i' and np.isscalar(inc) and np.isscalar(initial) and np.isscalar(mult) and mult == 1:
        res[:] = sequence(axis, initial, inc)
    # inc only (non integer scalar)
    elif np.isscalar(inc) and np.isscalar(initial) and np.isscalar(mult) and mult == 1:
        # -1 because stop is included in linspace
        stop = initial + inc * (len(axis) - 1)
        data = np.linspace(initial, stop=stop, num=len(axis))
        res[:] = Array(data, axis)
    # inc only (array)
    elif np.isscalar(mult) and mult == 1:
        inc_array = array_or_full(inc, axis, initial)
        # TODO: when axis is None, this is inefficient (inc_array.cumsum() is the result)
        res[axis.i[0]] = initial
        res[axis.i[1:]] = inc_array.cumsum(axis)[axis.i[1:]]
    # mult only (scalar or array)
    elif np.isscalar(inc) and inc == 0:
        mult_array = array_or_full(mult, axis, initial)
        res[axis.i[0]] = initial
        # TODO: when axis is None, this is inefficient (mult_array.cumprod() is the result)
        res[axis.i[1:]] = mult_array.cumprod(axis)[axis.i[1:]]
    # both inc and mult defined but constant (scalars or axis not present)
    elif not has_axis(inc, axis) and not has_axis(mult, axis):
        # FIXME: the assert is broken (not has_axis is not what we want)
        assert ((np.isscalar(inc) and inc != 0) or not has_axis(inc, axis)) and \
               (np.isscalar(mult) or not has_axis(mult, axis))
        mult_array = array_or_full(mult, axis, 1.0)
        cum_mult = mult_array.cumprod(axis)[axis.i[1:]]
        res[axis.i[0]] = initial

        # a[0] = initial
        # a[1] = initial * mult ** 1                   + inc * mult ** 0
        # a[2] = initial * mult ** 2 + inc * mult ** 1 + inc * mult ** 0
        # ...
        # each term includes the sum of a geometric series:
        # series_sum = inc + inc * mult ** 1 + ... + inc * mult ** (i-1)
        # which can be computed using:
        # series_sum = inc * ((1 - mult ** i) / (1 - mult))
        # but if mult is 1, a different formula is necessary:
        # series_sum = i * inc

        # a[i] = initial * cum_mult[i] + inc * cum_mult[i - 1]

        # the case "mult == 1" was already handled above but we still need to handle the case where mult is
        # an array and *one cell* == 1
        res_where_not_1 = ((1 - cum_mult) / (1 - mult)) * inc + initial * cum_mult
        if isinstance(mult, Array) and any(mult == 1):
            from larray.core.ufuncs import where

            res_where_1 = Array(np.linspace(initial, initial + inc * (len(axis) - 1), len(axis)), axis)
            res[axis.i[1:]] = where(mult == 1, res_where_1, res_where_not_1)
        else:
            res[axis.i[1:]] = res_where_not_1
    else:
        assert has_axis(inc, axis) or has_axis(mult, axis)
        # This case is more complicated to vectorize. It seems
        # doable (probably by adding a fictive axis), but let us wait until
        # someone requests it. The trick is to be able to write this:
        # a[i] =  initial * prod(mult[j])
        #                  j=1..i
        #      +   inc[1] * prod(mult[j])
        #                  j=2..i
        #      + ...
        #      + inc[i-2] * prod(mult[j])
        #                  j=i-1..i
        #      + inc[i-1] * mult[i]
        #      + inc[i]

        # a[0] = initial
        # a[1] = initial * mult[1]
        #      +  inc[1]
        # a[2] = initial * mult[1] * mult[2]
        #      +  inc[1]           * mult[2]
        #      +  inc[2]
        # ...
        # a[4] = initial * mult[1] * mult[2] * mult[3] * mult[4]
        #      +  inc[1]           * mult[2] * mult[3] * mult[4]
        #      +  inc[2]                     * mult[3] * mult[4]
        #      +  inc[3]                               * mult[4]
        #      +  inc[4]

        # a[1:] = initial * cumprod(mult[1:]) + ...
        def index_if_exists(a, axis, i):
            if isinstance(a, Array) and axis in a.axes:
                a_axis = a.axes[axis]
                return a[a_axis[axis.labels[i]]]
            else:
                return a
        # CHECK: try something like:
        # def index_if_exists(a, igroup):
        #     axis = igroup.axis
        #     if isinstance(a, Array) and axis in a.axes:
        #         a_axis = a.axes[axis]
        #         return a[a_axis[axis.labels[i]]]
        #     else:
        #         return a
        # for i in axis.i[1:]:
        #     i_mult = index_if_exists(mult, i)
        #     i_inc = index_if_exists(inc, i)
        #     res[i] = res[i - 1] * i_mult + i_inc
        res[axis.i[0]] = prev_value = initial
        for i in range(1, len(axis)):
            i_mult = index_if_exists(mult, axis, i)
            i_inc = index_if_exists(inc, axis, i)
            res[axis.i[i]] = prev_value = prev_value * i_mult + i_inc
    return res


create_sequential = renamed_to(sequence, 'create_sequential', raise_error=True)


@_check_axes_argument
def ndrange(axes, start=0, title=None, dtype=int):
    warnings.warn("ndrange() is deprecated. Use sequence() or ndtest() instead.", FutureWarning, stacklevel=2)
    return ndtest(axes, start=start, title=title, dtype=dtype)


[docs]@_check_axes_argument
def ndtest(shape_or_axes, start=0, label_start=0, title=None, dtype=int, meta=None) -> Array:
    r"""Return test array with given shape.

    Axes are named by single letters starting from 'a'.
    Axes labels are constructed using a '{axis_name}{label_pos}' pattern (e.g. 'a0').
    Values start from `start` increase by steps of 1.

    Parameters
    ----------
    shape_or_axes : int, tuple/list of int, str, single axis or tuple/list/AxisCollection of axes
        If int or tuple/list of int, represents the shape of the array to create.
        In that case, default axes are generated.
        If string, it is used to generate axes (see :py:class:`AxisCollection` constructor).
    start : int or float, optional
        Start value
    label_start : int, optional
        Label index for each axis is `label_start + position`. `label_start` defaults to 0.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : type or np.dtype, optional
        Type of resulting array.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    Create test array by passing a shape

    >>> ndtest(6)
    a  a0  a1  a2  a3  a4  a5
        0   1   2   3   4   5
    >>> ndtest((2, 3))
    a\b  b0  b1  b2
     a0   0   1   2
     a1   3   4   5
    >>> ndtest((2, 3), label_start=1)
    a\b  b1  b2  b3
     a1   0   1   2
     a2   3   4   5
    >>> ndtest((2, 3), start=2)
    a\b  b0  b1  b2
     a0   2   3   4
     a1   5   6   7
    >>> ndtest((2, 3), dtype=float)
    a\b   b0   b1   b2
     a0  0.0  1.0  2.0
     a1  3.0  4.0  5.0

    Create test array by passing axes

    >>> ndtest("nat=BE,FO;sex=M,F")
    nat\sex  M  F
         BE  0  1
         FO  2  3
    >>> nat = Axis("nat=BE,FO")
    >>> sex = Axis("sex=M,F")
    >>> ndtest([nat, sex])
    nat\sex  M  F
         BE  0  1
         FO  2  3
    """
    meta = _handle_meta(meta, title)
    # XXX: try to come up with a syntax where start is before "end".
    # For ndim > 1, I cannot think of anything nice.
    if isinstance(shape_or_axes, int):
        shape_or_axes = (shape_or_axes,)
    if isinstance(shape_or_axes, (list, tuple)) and all([isinstance(i, int) for i in shape_or_axes]):
        # TODO: move this to a class method on AxisCollection
        assert len(shape_or_axes) <= 26
        axes_names = [chr(ord('a') + i) for i in range(len(shape_or_axes))]
        label_ranges = [range(label_start, label_start + length) for length in shape_or_axes]
        shape_or_axes = [Axis([f'{name}{i}' for i in label_range], name)
                         for name, label_range in zip(axes_names, label_ranges)]
    if isinstance(shape_or_axes, AxisCollection):
        axes = shape_or_axes
    else:
        axes = AxisCollection(shape_or_axes)
    data = np.arange(start, start + axes.size, dtype=dtype).reshape(axes.shape)
    return Array(data, axes, meta=meta)


def kth_diag_indices(shape, k):
    indices = np.diag_indices(min(shape), ndim=len(shape))
    if len(shape) == 2 and k != 0:
        rows, cols = indices
        if k < 0:
            return rows[-k:], cols[:k]
        elif k > 0:
            return rows[:-k], cols[k:]
    elif k != 0:
        raise NotImplementedError("k != 0 and len(axes) != 2")
    else:
        return indices


[docs]def diag(a, k=0, axes=(0, 1), ndim=2, split=True) -> Array:
    r"""
    Extract a diagonal or construct a diagonal array.

    Parameters
    ----------
    a : Array
        If `a` has 2 dimensions or more, return a copy of its `k`-th diagonal.
        If `a` has 1 dimension, return an array with `ndim` dimensions on the `k`-th diagonal.
    k : int, optional
        Offset of the diagonal from the main diagonal.  Can be positive or negative.  Defaults to main diagonal (0).
    axes : tuple or list or AxisCollection of axes references, optional
        Axes along which the diagonals should be taken.  Use None for all axes. Defaults to the first two axes (0, 1).
    ndim : int, optional
        Target number of dimensions when constructing a diagonal array from an array without axes names/labels.
        Defaults to 2.
    split : bool, optional
        Whether to try to split the axis name and labels. Defaults to True.

    Returns
    -------
    Array
        The extracted diagonal or constructed diagonal array.

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> a = ndtest([nat, sex], start=1)
    >>> a
    nat\sex  M  F
         BE  1  2
         FO  3  4
    >>> d = diag(a)
    >>> d
    nat_sex  BE_M  FO_F
                1     4
    >>> diag(d)
    nat\sex  M  F
         BE  1  0
         FO  0  4
    >>> a = ndtest(sex, start=1)
    >>> a
    sex  M  F
         1  2
    >>> diag(a)
    sex\sex  M  F
          M  1  0
          F  0  2
    """
    if a.ndim == 1:
        axis = a.axes[0]
        axis_name = axis.name
        if k != 0:
            raise NotImplementedError("k != 0 not supported for 1D arrays")
        if split and isinstance(axis_name, str) and '_' in axis_name:
            axes_names = axis_name.split('_')
            axes_labels = list(zip(*np.char.split(axis.labels, '_')))
            axes = [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]
        else:
            # avoid checking the axis name and labels (it expects a value lik sex_sex=M_M,F_F instead of sex=M,F)
            # TODO: in theory, this should work, but something breaks (probably those damn axes matching rules)
            # a = a.rename(0, None).ignore_labels()
            a = a.data
            axes = [axis] + [axis.copy() for _ in range(ndim - 1)]
        res = zeros(axes, dtype=a.dtype)
        diag_indices = kth_diag_indices(res.shape, k)
        res.ipoints[diag_indices] = a
        return res
    else:
        if k != 0 and len(axes) > 2:
            raise NotImplementedError("k != 0 and len(axes) > 2")
        if axes is None:
            axes = a.axes
        else:
            axes = a.axes[axes]
        axes_indices = kth_diag_indices(axes.shape, k)
        indexer = tuple(axis.i[indices] for axis, indices in zip(axes, axes_indices))
        return a.points[indexer]


[docs]@_check_axes_argument
def labels_array(axes, title=None, meta=None) -> Array:
    r"""Return an array with specified axes and the combination of
    corresponding labels as values.

    Parameters
    ----------
    axes : Axis or collection of Axis
    title : str, optional
        Deprecated. See 'meta' below.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> labels_array(sex)
    sex  M  F
         M  F
    >>> labels_array((nat, sex))
    nat  sex\axis  nat  sex
     BE         M   BE    M
     BE         F   BE    F
     FO         M   FO    M
     FO         F   FO    F
    """
    # >>> labels_array((nat, sex))
    # nat\sex     M     F
    #      BE  BE,M  BE,F
    #      FO  FO,M  FO,F
    meta = _handle_meta(meta, title)
    axes = AxisCollection(axes)
    if len(axes) > 1:
        res_axes = axes + Axis(axes.names, 'axis')
        res_data = np.empty(res_axes.shape, dtype=object)
        res_data.flat[:] = list(product(*axes.labels))
        # XXX: I wonder if it wouldn't be better to return LGroups or a similar object which would display as "a,b" but
        #      where each label is stored separately.
        # flat_data = np.array([p for p in product(*axes.labels)])
        # res_data = flat_data.reshape(axes.shape)
    else:
        res_axes = axes
        res_data = axes[0].labels
    return Array(res_data, res_axes, meta=meta)


[docs]def identity(axis):
    raise NotImplementedError("identity(axis) is deprecated. In most cases, you can now use the axis directly. "
                              "For example, 'identity(age) < 10' can be replaced by 'age < 10'. "
                              "In other cases, you should use labels_array(axis) instead.")


[docs]def eye(rows, columns=None, k=0, title=None, dtype=None, meta=None) -> Array:
    r"""Return a 2-D array with ones on the diagonal and zeros elsewhere.

    Parameters
    ----------
    rows : int or Axis or tuple or length 2 AxisCollection
        Rows of the output (if int or Axis) or rows and columns (if tuple or AxisCollection).
    columns : int or Axis, optional
        Columns of the output. Defaults to the value of `rows` if it is an int or Axis.
    k : int, optional
        Index of the diagonal: 0 (the default) refers to the main diagonal, a positive value refers to an upper
        diagonal, and a negative value to a lower diagonal.
    title : str, optional
        Deprecated. See 'meta' below.
    dtype : data-type, optional
        Data-type of the returned array. Defaults to float.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array of shape (rows, columns)
        An array where all elements are equal to zero, except for the k-th diagonal, whose values are equal to one.

    Examples
    --------
    >>> eye('sex=M,F')
    sex\sex    M    F
          M  1.0  0.0
          F  0.0  1.0
    >>> eye(2, dtype=int)
    {0}*\{1}*  0  1
            0  1  0
            1  0  1
    >>> age = Axis('age=0..2')
    >>> sex = Axis('sex=M,F')
    >>> eye(age, sex)
    age\sex    M    F
          0  1.0  0.0
          1  0.0  1.0
          2  0.0  0.0
    >>> eye(3, k=1)
    {0}*\{1}*    0    1    2
            0  0.0  1.0  0.0
            1  0.0  0.0  1.0
            2  0.0  0.0  0.0
    """
    meta = _handle_meta(meta, title)
    if isinstance(rows, AxisCollection):
        assert columns is None
        axes = rows
    elif isinstance(rows, (tuple, list)):
        assert columns is None
        axes = AxisCollection(rows)
    else:
        if columns is None:
            columns = rows.copy() if isinstance(rows, Axis) else rows
        axes = AxisCollection([rows, columns])
    shape = axes.shape
    data = np.eye(shape[0], shape[1], k, dtype)
    return Array(data, axes, meta=meta)


# XXX: we could change the syntax to use *args
#      => less punctuation but forces kwarg
#      => potentially longer
#      => unsure for now. The most important point is that it should be consistent with other functions.
# stack(a1, a2, axis=Axis('M,F', 'sex'))
# stack(('M', a1), ('F', a2), axis='sex')
# stack(a1, a2, axis='sex')

# we could do something like (it would make from_lists obsolete for 1D arrays):
# stack('sex', M=1, F=2)

# which is almost equivalent to:

# stack(M=1, F=2, axis='sex')

# but we cannot support the current syntax unmodified AND the first version, but second version we could.

# we would only have to explain that they cannot do:

# stack(0=1, 1=2, axis='age')
# stack(0A=1, 1B=2, axis='code')

# but should use this instead:

# stack({0: 1, 1: 2}, 'age=0,1')
# stack({'0A': 1, '1B': 2}, 'code=0A,1B')

# stack({0: 1, 1: 2}, age)
# stack({'0A': 1, '1B': 2}, code)

# or this, if we decide to support *args instead:

# stack((0, 1), (1, 2), axis='age')
# stack(('0A', 1), ('1B', 2), axis='code')

# stack(M=1, F=2, axis='sex')

# is much nicer than:

# from_lists(['sex', 'M', 'F'],
#            [   '',   1,   2])

# for 2D arrays, from_lists and stack would be mostly as ugly and for 3D+ from_lists stays nicer even though I still do
# not like it much.

# stack('nationality',
#       BE=stack('sex', M=0, F=1),
#       FR=stack('sex', M=2, F=3),
#       DE=stack('sex', M=4, F=5))
#
# from_lists([['nationality\\sex', 'M', 'F'],
#             [              'BE',   0,   1],
#             [              'FR',   2,   3],
#             [              'DE',   4,   5]])

# SUPER SLOPPY (I hate this, but I bet users would like it):

# stack(BE_M=0, BE_F=1,
#       FR_M=2, FR_F=3,
#       DE_M=4, DE_F=5, axis='nationality_sex')

# stack(('nationality', 'sex'), {
#       ('BE', 'M'): 0, ('BE', 'F'): 1,
#       ('FR', 'M'): 2, ('FR', 'F'): 3,
#       ('DE', 'M'): 4, ('DE', 'F'): 5})

# for 2D, I think the best compromise is the nested dict (especially for python 3.7+):

# stack({'BE': {'M': 0, 'F': 1},
#        'FR': {'M': 2, 'F': 3},
#        'DE': {'M': 4, 'F': 5}}, axes=('nationality', 'sex'))

# we could make this valid too (combine pos and labels) but I don't think it worth it unless it comes
# naturally from the implementation:

# stack({'BE': {'M,F': [0, 1]},
#        'FR': {'M,F': [2, 3]},
#        'DE': {'M,F': [4, 5]}}, axes=('nationality', 'sex'))

# It looks especially nice if the labels have been extracted to variables:

# BE, FR, DE = nat['BE,FR,DE']
# M, F = sex['M,F']

# stack({BE: {M: 0, F: 1},
#        FR: {M: 2, F: 3},
#        DE: {M: 4, F: 5}})

# for 3D:

# stack({'a0': {'b0': {'c0':  0, 'c1':  1},
#               'b1': {'c0':  2, 'c1':  3},
#               'b2': {'c0':  4, 'c1':  5}},
#        'a1': {'b0': {'c0':  6, 'c1':  7},
#               'b1': {'c0':  8, 'c1':  9},
#               'b2': {'c0': 10, 'c1': 11}}},
#       axes=('a', 'b', 'c'))

# a0, a1 = a['a0,a1']
# b0, b1, b2 = b['b0,b1,b2']
# c0, c1 = c['c0,c1']

# stack({a0: {b0: {c0:  0, c1:  1},
#             b1: {c0:  2, c1:  3},
#             b2: {c0:  4, c1:  5}},
#        a1: {b0: {c0:  6, c1:  7},
#             b1: {c0:  8, c1:  9},
#             b2: {c0: 10, c1: 11}}},
#       axes=(a, b, c))

# if we implement:
#     arr[key] = {'a0': 0, 'a1': 1}
# where key must not be related to the "a" axis
# if would make it relatively easy to implement the nested dict syntax I think:
# first do a pass at the structure to get axes (if not provided) then:
#     for k, v in d.items():
#         arr[k] = v
# but that syntax could be annoying if we want to have an array of dicts

# alternatives:

# arr['a0'] = 0; arr['a1'] = 1 # <-- this already works
# arr['a0,a1'] = [0, 1]        # <-- unsure if this works, but we should make it work (it is annoying if we
#                              #     have an array of lists
# arr[:] = {'a0': 0, 'a1': 1}
# arr[:] = stack({'a0': 0, 'a1': 1}) # <-- not equivalent if a has more labels

# FIXME: move this function elswhere + update returned type to Union[Array, Session]
[docs]@deprecate_kwarg('axis', 'axes')
def stack(elements=None, axes=None, title=None, meta=None, dtype=None, res_axes=None, **kwargs) -> 'Array':
    r"""
    Combine several arrays or sessions along an axis.

    Parameters
    ----------
    elements : tuple, list, dict or Session.
        Elements to stack. Elements can be scalars, arrays, sessions, (label, value) pairs or a {label: value} mapping.

        Stacking a single session will stack all its arrays in a single array.
        Stacking several sessions will take the corresponding arrays in all the sessions and stack them, returning a
        new session. An array missing in a session will be replaced by NaN.
    axes : str, Axis, Group or sequence of Axis, optional
        Axes to create. If None, defaults to a range() axis.
    title : str, optional
        Deprecated. See 'meta' below.
    meta : list of pairs or dict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.
    dtype : type, optional
        Output dtype. Defaults to None (inspect all output values to infer it automatically).
    res_axes : AxisCollection, optional
        Axes of the output. Defaults to None (union of axes of all values and the stacking axes).

    Returns
    -------
    Array or Session
        A single Array combining input values, or a single Session combining input Sessions.
        The new (stacked) axes will be the last axes of the new array.

    Examples
    --------
    >>> nat = Axis('nat=BE,FO')
    >>> sex = Axis('sex=M,F')
    >>> arr1 = ones(sex)
    >>> arr1
    sex    M    F
         1.0  1.0
    >>> arr2 = zeros(sex)
    >>> arr2
    sex    M    F
         0.0  0.0

    In case the axis to create has already been defined in a variable (Axis or Group)

    >>> stack({'BE': arr1, 'FO': arr2}, nat)
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0

    Otherwise (when one wants to create an axis from scratch), any of these syntaxes works:

    >>> stack([arr1, arr2], 'nat=BE,FO')
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0
    >>> stack({'BE': arr1, 'FO': arr2}, 'nat=BE,FO')
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0
    >>> stack([('BE', arr1), ('FO', arr2)], 'nat=BE,FO')
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0

    When stacking arrays with different axes, the result has the union of all axes present:

    >>> stack({'BE': arr1, 'FO': 0}, nat)
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0

    Creating an axis without name nor labels can be done using:

    >>> stack((arr1, arr2))
    sex\{1}*    0    1
           M  1.0  0.0
           F  1.0  0.0

    When labels are "simple" strings (ie no integers, no string starting with integers, etc.), using keyword
    arguments can be an attractive alternative.

    >>> stack(FO=arr2, BE=arr1, axes=nat)
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0

    Without passing an explicit order for labels (or an axis object like above)

    >>> stack(BE=arr1, FO=arr2, axes='nat')   # doctest: +SKIP
    sex\nat   BE   FO
          M  1.0  0.0
          F  1.0  0.0

    One can also stack along several axes

    >>> test = Axis('test=T1,T2')
    >>> stack({('BE', 'T1'): arr1,
    ...        ('BE', 'T2'): arr2,
    ...        ('FO', 'T1'): arr2,
    ...        ('FO', 'T2'): arr1},
    ...       (nat, test))
    sex  nat\test   T1   T2
      M        BE  1.0  0.0
      M        FO  0.0  1.0
      F        BE  1.0  0.0
      F        FO  0.0  1.0

    To stack sessions, let us first create two test sessions. For example suppose we have a session storing the results
    of a baseline simulation:

    >>> from larray import Session
    >>> baseline = Session({'arr1': arr1, 'arr2': arr2})

    and another session with a variant (here we simply added 0.5 to each array)

    >>> variant = Session({'arr1': arr1 + 0.5, 'arr2': arr2 + 0.5})

    then we stack them together

    >>> stacked = stack({'baseline': baseline, 'variant': variant}, 'sessions')
    >>> stacked
    Session(arr1, arr2)
    >>> stacked.arr1
    sex\sessions  baseline  variant
               M       1.0      1.5
               F       1.0      1.5
    >>> stacked.arr2
    sex\sessions  baseline  variant
               M       0.0      0.5
               F       0.0      0.5
    """
    from larray import Session

    axes_to_anonymize = ()

    meta = _handle_meta(meta, title)

    if elements is not None and kwargs:
        raise TypeError("stack() accepts either keyword arguments OR a collection of elements, not both")

    if isinstance(axes, str) and '=' in axes:
        axes = Axis(axes)
    elif isinstance(axes, Group):
        axes = Axis(axes)

    if axes is not None and not isinstance(axes, str):
        axes = AxisCollection(axes)

    if kwargs:
        elements = kwargs.items()

    if isinstance(elements, (dict, Session)):
        elements = elements.items()

    if isinstance(elements, Array):
        if axes is None:
            axes = -1
        axes = elements.axes[axes]
        items = elements.items(axes)
    elif isinstance(elements, Session):
        if axes is None:
            axes = 'array'
        items = elements.items()
    elif isinstance(elements, Iterable):
        if not isinstance(elements, Sequence):
            elements = list(elements)

        if all(isinstance(e, tuple) for e in elements):
            assert all(len(e) == 2 for e in elements)
            if axes is None or isinstance(axes, str):
                keys = [k for k, v in elements]
                values = [v for k, v in elements]
                # assert that all keys are indexers
                assert all(np.isscalar(k) or isinstance(k, (Group, tuple)) for k in keys)

                # we need a kludge to support stacking along an anonymous axis because AxisCollection.extend
                # (and thus AxisCollection.union) support for anonymous axes is kinda messy. This needs to happen
                # *before* we compute items, otherwise the IGroups will be on the wrong axis, making result[k] = v
                # a lot slower
                stack_axis = Axis(keys, "___anonymous___" if axes is None else axes)
                if axes is None:
                    axes_to_anonymize = (stack_axis,)
                # FIXME: if res_axes is not None, we should make sure it contains "axes" (with keys in the same order!!)
                #        and extract axes from there, before we compute items, otherwise, we do not work on the
                #        result axis objects, which makes results[k] = v a lot slower
                #        and if keys are not in the same order (or maybe do it systematically?) we will need to
                #        pass via dict like below (translate_and_sort_key) but it will break with duplicate labels,
                #        unless keys are IGroups,
                #        there ought to be a way to sort the k, v efficiently without breaking duplicate labels
                # TODO: add support for more than one axis here
                axes = AxisCollection(stack_axis)
                items = list(zip(stack_axis, values))
            else:
                def translate_and_sort_key(key, axes):
                    dict_of_indices = axes._key_to_axis_indices_dict(key)
                    return tuple(IGroup(dict_of_indices[axis], axis=axis) for axis in axes)

                # passing only via _key_to_igroup should be enough if we allow for partial axes
                dict_elements = {translate_and_sort_key(key, axes): value for key, value in elements}
                items = [(k, dict_elements[k]) for k in axes.iter_labels()]
        else:
            if axes is None or isinstance(axes, str):
                stack_axis = Axis(len(elements), "___anonymous___" if axes is None else axes)
                if axes is None:
                    axes_to_anonymize = (stack_axis,)
                axes = AxisCollection(stack_axis)
            else:
                # TODO: add support for more than one axis here
                assert axes.ndim == 1 and len(axes[0]) == len(elements)
            items = list(zip(axes[0], elements))
    else:
        elements_type = type(elements).__name__
        raise TypeError(f'unsupported type for arrays: {elements_type}')

    if any(isinstance(v, Session) for k, v in items):
        if not all(isinstance(v, Session) for k, v in items):
            raise TypeError("stack() only supports stacking Session with other Session objects")

        array_names = unique_multi(sess.keys() for sess_name, sess in items)

        def stack_one(array_name):
            try:
                return stack({sess_name: sess.get(array_name, nan)
                              for sess_name, sess in items}, axes=axes)
            # TypeError for str arrays, ValueError for incompatible axes, ...
            except Exception:
                return nan

        return Session({array_name: stack_one(array_name) for array_name in array_names}, meta=meta)
    else:
        if res_axes is None or dtype is None:
            values = [asarray(v) if not np.isscalar(v) else v
                      for k, v in items]

            if res_axes is None:
                # XXX: with the current semantics of stack, we need to compute the union of axes for values but axis
                #      needs to be added unconditionally. We *might* want to change the semantics to mean either stack
                #      or concat depending on whether the axis already exists.
                #      this would be more convenient for users I think, but would mean one class of error we cannot
                #      detect anymore: if a user unintentionally stacks an array with the axis already present.
                #      (this is very similar to the debate about combining Array.append and Array.extend)
                all_axes = [get_axes(v) for v in values] + [axes]
                res_axes = AxisCollection.union(*all_axes)
            elif not isinstance(res_axes, AxisCollection):
                res_axes = AxisCollection(res_axes)

            if dtype is None:
                # dtype = common_type(values + [fill_value])
                dtype = common_dtype(values)

        # if needs_fill:
        #     result = full(res_axes, fill_value, dtype=dtype, meta=meta)
        # else:
        result = empty(res_axes, dtype=dtype, meta=meta)

        # FIXME: this is *much* faster but it only works for scalars and not for stacking arrays
        # keys = tuple(zip(*[k for k, v in items]))
        # result.points[keys] = values
        for k, v in items:
            result[k] = v

        return result if not axes_to_anonymize else result.rename({a: None for a in axes_to_anonymize})


def get_axes(value) -> AxisCollection:
    return value.axes if isinstance(value, Array) else AxisCollection([])


def _strip_shape(shape):
    return tuple(s for s in shape if s != 1)


def _equal_modulo_len1(shape1, shape2):
    return _strip_shape(shape1) == _strip_shape(shape2)


# assigning a temporary name to anonymous axes before broadcasting and removing it afterwards is not a good idea after
# all because it copies the axes/change the object, and thus "flatten" wouldn't work with index axes:
# a[ones(a.axes[axes], dtype=bool)]
# but if we had assigned axes names from the start (without dropping them) this wouldn't be a problem.
def make_numpy_broadcastable(values, min_axes=None) -> Tuple[List[Array], AxisCollection]:
    r"""
    Return values where Arrays are (NumPy) broadcastable between them.
    For that to be possible, all common axes must be compatible (see Axis class documentation).
    Extra axes (in any array) can have any length.

    * the resulting arrays will have the combination of all axes found in the input arrays, the earlier arrays defining
      the order of axes. Axes with labels take priority over wildcard axes.
    * length 1 wildcard axes will be added for axes not present in input

    Parameters
    ----------
    values : iterable of arrays
        Arrays that requires to be (NumPy) broadcastable between them.
    min_axes : AxisCollection, optional
        Minimum axes the result should have. This argument is useful both when one wants to have extra axes in the
        result or for having resulting axes in a specific order. Defaults to all input axes, ordered by first
        appearance.

    Returns
    -------
    arrays : list of arrays
        List of arrays broadcastable between them. Arrays will have the combination of all axes found in the input
        arrays, the earlier arrays defining the order of axes.
    res_axes : AxisCollection
        Union of ``min_axes`` and the axes of all input arrays.

    See Also
    --------
    Axis.iscompatible : tests if axes are compatible between them.
    """
    axes_union = AxisCollection.union(*[get_axes(v) for v in values])
    if min_axes is not None:
        if not isinstance(min_axes, AxisCollection):
            min_axes = AxisCollection(min_axes)
        axes_union = min_axes | axes_union
    def broadcasted_value(value):
        if isinstance(value, Array):
            return value.broadcast_with(axes_union)
        elif isinstance(value, ExprNode):
            return value.evaluate(axes_union)
        else:
            return value
    return [broadcasted_value(value) for value in values], axes_union


def raw_broadcastable(values, min_axes=None) -> Tuple[Tuple[Any, ...], AxisCollection]:
    r"""
    same as make_numpy_broadcastable but returns numpy arrays.
    """
    arrays, res_axes = make_numpy_broadcastable(values, min_axes=min_axes)
    raw = tuple(a.data if isinstance(a, Array) else a
                for a in arrays)
    return raw, res_axes


def make_args_broadcastable(args, kwargs=None) -> Tuple[Any, Any, Any]:
    """
    Make args and kwargs (NumPy) broadcastable between them.
    """
    values = (args + tuple(kwargs.values())) if kwargs is not None else args
    first_kw = len(args)
    raw_bcast_values, res_axes = raw_broadcastable(values)
    raw_bcast_args = raw_bcast_values[:first_kw]
    raw_bcast_kwargs = dict(zip(kwargs.keys(), raw_bcast_values[first_kw:]))
    return raw_bcast_args, raw_bcast_kwargs, res_axes


[docs]def zip_array_values(values, axes=None, ascending=True) -> SequenceZip:
    r"""Return a sequence as if simultaneously iterating on several arrays.

    Parameters
    ----------
    values : sequence of (scalar or Array)
        Values to iterate on. Scalars are repeated as many times as necessary.
    axes : int, str or Axis or tuple of them, optional
        Axis or axes along which to iterate and in which order. All those axes must be compatible (if present) between
        the different values. Defaults to None (union of all axes present in all arrays, in the order they are found).
    ascending : bool, optional
        Whether to iterate the axes in ascending order (from start to end). Defaults to True.

    Returns
    -------
    Sequence

    Examples
    --------
    >>> arr1 = ndtest('a=a0,a1;b=b1,b2')
    >>> arr2 = ndtest('a=a0,a1;c=c1,c2')
    >>> arr1
    a\b  b1  b2
     a0   0   1
     a1   2   3
    >>> arr2
    a\c  c1  c2
     a0   0   1
     a1   2   3
    >>> for a1, a2 in zip_array_values((arr1, arr2), 'a'):
    ...     print("==")
    ...     print(a1)
    ...     print(a2)
    ==
    b  b1  b2
        0   1
    c  c1  c2
        0   1
    ==
    b  b1  b2
        2   3
    c  c1  c2
        2   3

    When the axis to iterate on (`c` in this case) is not present in one of the arrays (arr1), that array is repeated
    for each label of that axis:

    >>> for a1, a2 in zip_array_values((arr1, arr2), arr2.c):
    ...     print("==")
    ...     print(a1)
    ...     print(a2)
    ==
    a\b  b1  b2
     a0   0   1
     a1   2   3
    a  a0  a1
        0   2
    ==
    a\b  b1  b2
     a0   0   1
     a1   2   3
    a  a0  a1
        1   3

    When no `axes` are given, it iterates on the union of all compatible axes (a, b, and c in this case):

    >>> for a1, a2 in zip_array_values((arr1, arr2)):
    ...     print(f"arr1: {a1}, arr2: {a2}")
    arr1: 0, arr2: 0
    arr1: 0, arr2: 1
    arr1: 1, arr2: 0
    arr1: 1, arr2: 1
    arr1: 2, arr2: 2
    arr1: 2, arr2: 3
    arr1: 3, arr2: 2
    arr1: 3, arr2: 3
    """
    def values_with_expand(value, axes, readonly=True, ascending=True):
        if isinstance(value, Array):
            # an Axis axis is not necessarily in array.axes
            expanded = value.expand(axes, readonly=readonly)
            return expanded.values(axes, ascending=ascending)
        else:
            size = axes.size if axes.ndim else 0
            return Repeater(value, size)

    values_axes = [get_axes(v) for v in values]

    if axes is None:
        all_iter_axes = values_axes
    else:
        if not isinstance(axes, (tuple, list, AxisCollection)):
            axes = (axes,)

        # transform string axes _definitions_ to objects
        axes = [Axis(axis) if isinstance(axis, str) and '=' in axis else axis
                for axis in axes]

        # get iter axes for all values and transform string axes _references_ to objects
        all_iter_axes = [AxisCollection([value_axes[axis] for axis in axes if axis in value_axes])
                         for value_axes in values_axes]

    common_iter_axes = AxisCollection.union(*all_iter_axes)

    # sequence of tuples (of scalar or arrays)
    return SequenceZip([values_with_expand(v, common_iter_axes, ascending=ascending) for v in values])


[docs]def zip_array_items(values, axes=None, ascending=True) -> SequenceZip:
    r"""Return a sequence as if simultaneously iterating on several arrays as well as the current iteration "key".

    Broadcasts all values against each other. Scalars are simply repeated.

    Parameters
    ----------
    values : Iterable
        arrays to iterate on.
    axes : int, str or Axis or tuple of them, optional
        Axis or axes along which to iterate and in which order. Defaults to None (union of all axes present in
        all arrays, in the order they are found).
    ascending : bool, optional
        Whether to iterate the axes in ascending order (from start to end). Defaults to True.

    Returns
    -------
    Sequence

    Examples
    --------
    >>> arr1 = ndtest('a=a0,a1;b=b0,b1')
    >>> arr2 = ndtest('a=a0,a1;c=c0,c1')
    >>> arr1
    a\b  b0  b1
     a0   0   1
     a1   2   3
    >>> arr2
    a\c  c0  c1
     a0   0   1
     a1   2   3
    >>> for k, (a1, a2) in zip_array_items((arr1, arr2), 'a'):
    ...     print("==", k[0], "==")
    ...     print(a1)
    ...     print(a2)
    == a0 ==
    b  b0  b1
        0   1
    c  c0  c1
        0   1
    == a1 ==
    b  b0  b1
        2   3
    c  c0  c1
        2   3
    >>> for k, (a1, a2) in zip_array_items((arr1, arr2), arr2.c):
    ...     print("==", k[0], "==")
    ...     print(a1)
    ...     print(a2)
    == c0 ==
    a\b  b0  b1
     a0   0   1
     a1   2   3
    a  a0  a1
        0   2
    == c1 ==
    a\b  b0  b1
     a0   0   1
     a1   2   3
    a  a0  a1
        1   3
    >>> for k, (a1, a2) in zip_array_items((arr1, arr2)):
    ...     print(k, "arr1: {}, arr2: {}".format(a1, a2))
    (a.i[0], b.i[0], c.i[0]) arr1: 0, arr2: 0
    (a.i[0], b.i[0], c.i[1]) arr1: 0, arr2: 1
    (a.i[0], b.i[1], c.i[0]) arr1: 1, arr2: 0
    (a.i[0], b.i[1], c.i[1]) arr1: 1, arr2: 1
    (a.i[1], b.i[0], c.i[0]) arr1: 2, arr2: 2
    (a.i[1], b.i[0], c.i[1]) arr1: 2, arr2: 3
    (a.i[1], b.i[1], c.i[0]) arr1: 3, arr2: 2
    (a.i[1], b.i[1], c.i[1]) arr1: 3, arr2: 3
    """
    res_axes = AxisCollection.union(*[get_axes(v) for v in values])
    return SequenceZip((res_axes.iter_labels(axes, ascending=ascending),
                        zip_array_values(values, axes=axes, ascending=ascending)))


_default_float_error_handler = float_error_handler_factory(3)


original_float_error_settings = np.seterr(divide='call', invalid='call')
original_float_error_handler = np.seterrcall(_default_float_error_handler)

# excel IO tools in Python
# - openpyxl: the slowest but most-complete package but still lags behind PHPExcel from which it was ported. despite
#             the drawbacks the API is very complete.
#   biggest drawbacks:
#   * you can get either the "cached" value of cells OR their formulas but NOT BOTH and this is a file-wide setting
#     (data_only=True). if you have an excel file and want to add a sheet to it, you either loose all cached values
#     (which is problematic in many cases since you do not necessarily have linked files) or loose all formulas.
#   * it loose "charts" on read. => cannot append/update a sheet to a file with charts, which is precisely what many
#     users asked. => users need to create their charts using code.
# - xlsxwriter: faster and slightly more feature-complete than openpyxl regarding writing but does not read anything
#               => cannot update an existing file. API seems extremely complete.
# - pyexcelerate: yet faster but also write only. Didn't check whether API is more featured than xlsxwriter or not.
# - xlwings: wraps win32com & equivalent on mac, so can potentially do everything (I guess) but this is SLOW and needs
#            a running excel instance, etc.