Interactive online version: Binder badge

Transforming Arrays (Relabeling, Renaming, Reordering, Combining, Extending, Sorting, …)

Import the LArray library:

[1]:
from larray import *
[2]:
# load the 'demography_eurostat' dataset
demo_eurostat = load_example_data('demography_eurostat')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-43cbc3a4a95d> in <module>
      1 # load the 'demography_eurostat' dataset
----> 2 demo_eurostat = load_example_data('demography_eurostat')

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/example.py in load_example_data(name)
     91     if name not in AVAILABLE_EXAMPLE_DATA.keys():
     92         raise ValueError("example_data must be chosen from list {}".format(list(AVAILABLE_EXAMPLE_DATA.keys())))
---> 93     return la.Session(AVAILABLE_EXAMPLE_DATA[name])

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in __init__(self, *args, **kwargs)
     94             if isinstance(a0, str):
     95                 # assume a0 is a filename
---> 96                 self.load(a0)
     97             else:
     98                 # iterable of tuple or dict-like

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
    426         else:
    427             handler = handler_cls(fname)
--> 428         metadata, objects = handler.read(names, display=display, **kwargs)
    429         for k, v in objects.items():
    430             self[k] = v

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
    128                 print("loading", type, "object", key, "...", end=' ')
    129             try:
--> 130                 res[key] = self._read_item(key, type, *args, **kwargs)
    131             except Exception:
    132                 if not ignore_exceptions:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in _read_item(self, key, type, *args, **kwargs)
    137         else:
    138             raise TypeError()
--> 139         return read_hdf(self.handle, hdf_key, *args, **kwargs)
    140
    141     def _dump_item(self, key, value, *args, **kwargs):

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
     81             cartesian_prod = writer != 'LArray'
     82             res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83                              parse_header=False, cartesian_prod=cartesian_prod)
     84             if _meta is not None:
     85                 res.meta = _meta

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    241             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
    242                              'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243         axes_labels = index_to_labels(df.index, sort=False)
    244
    245     # Pandas treats column labels as column names (strings) so we need to convert them to values

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'
[3]:
# get a copy of the 'pop' array from the 'demography_eurostat' dataset
pop = demo_eurostat.pop.copy()
pop
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-05b1a30c03e6> in <module>
      1 # get a copy of the 'pop' array from the 'demography_eurostat' dataset
----> 2 pop = demo_eurostat.pop.copy()
      3 pop

NameError: name 'demo_eurostat' is not defined

Manipulating axes

The Array class offers several methods to manipulate the axes and labels of an array:

  • set_labels: to replace all or some labels of one or several axes.

  • rename: to replace one or several axis names.

  • set_axes: to replace one or several axes.

  • transpose: to modify the order of axes.

  • drop: to remove one or several labels.

  • combine_axes: to combine axes.

  • split_axes: to split one or several axes by splitting their labels and names.

  • reindex: to reorder, add and remove labels of one or several axes.

  • insert: to insert a label at a given position.

Relabeling

Replace some labels of an axis:

[4]:
# replace all labels of the 'gender' axis by passing the list of all new labels
pop_new_labels = pop.set_labels('gender', ['Men', 'Women'])
pop_new_labels
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-623309d162e8> in <module>
      1 # replace all labels of the 'gender' axis by passing the list of all new labels
----> 2 pop_new_labels = pop.set_labels('gender', ['Men', 'Women'])
      3 pop_new_labels

NameError: name 'pop' is not defined
[5]:
# set all labels of the 'country' axis to uppercase by passing the function str.upper()
pop_new_labels = pop.set_labels('country', str.upper)
pop_new_labels
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-2c6df1fd25e2> in <module>
      1 # set all labels of the 'country' axis to uppercase by passing the function str.upper()
----> 2 pop_new_labels = pop.set_labels('country', str.upper)
      3 pop_new_labels

NameError: name 'pop' is not defined
[6]:
# replace only one label of the 'gender' axis by passing a dict
pop_new_labels = pop.set_labels('gender', {'Male': 'Men'})
pop_new_labels
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-44062fe81d08> in <module>
      1 # replace only one label of the 'gender' axis by passing a dict
----> 2 pop_new_labels = pop.set_labels('gender', {'Male': 'Men'})
      3 pop_new_labels

NameError: name 'pop' is not defined

See set_labels for more details and examples.

Renaming axes

Rename one axis:

[7]:
# 'rename' returns a copy of the array
pop_new_names = pop.rename('time', 'year')
pop_new_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-ebe3a3298518> in <module>
      1 # 'rename' returns a copy of the array
----> 2 pop_new_names = pop.rename('time', 'year')
      3 pop_new_names

NameError: name 'pop' is not defined

Rename several axes at once:

[8]:
pop_new_names = pop.rename({'gender': 'sex', 'time': 'year'})
pop_new_names
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-1c5ebbf525cf> in <module>
----> 1 pop_new_names = pop.rename({'gender': 'sex', 'time': 'year'})
      2 pop_new_names

NameError: name 'pop' is not defined

See rename for more details and examples.

Replacing Axes

Replace one axis:

[9]:
new_gender = Axis('sex=Men,Women')
pop_new_axis = pop.set_axes('gender', new_gender)
pop_new_axis
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-9465ac881bab> in <module>
      1 new_gender = Axis('sex=Men,Women')
----> 2 pop_new_axis = pop.set_axes('gender', new_gender)
      3 pop_new_axis

NameError: name 'pop' is not defined

Replace several axes at once:

[10]:
new_country = Axis('country_codes=BE,FR,DE')
pop_new_axes = pop.set_axes({'country': new_country, 'gender': new_gender})
pop_new_axes
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-01acccdf346c> in <module>
      1 new_country = Axis('country_codes=BE,FR,DE')
----> 2 pop_new_axes = pop.set_axes({'country': new_country, 'gender': new_gender})
      3 pop_new_axes

NameError: name 'pop' is not defined

Reordering axes

Axes can be reordered using transpose method. By default, transpose reverse axes, otherwise it permutes the axes according to the list given as argument. Axes not mentioned come after those which are mentioned(and keep their relative order). Finally, transpose returns a copy of the array.

[11]:
# starting order : country, gender, time
pop
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-8ec305916275> in <module>
      1 # starting order : country, gender, time
----> 2 pop

NameError: name 'pop' is not defined
[12]:
# no argument --> reverse all axes
pop_transposed = pop.transpose()

# .T is a shortcut for .transpose()
pop_transposed = pop.T

pop_transposed
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-7225533ef09b> in <module>
      1 # no argument --> reverse all axes
----> 2 pop_transposed = pop.transpose()
      3
      4 # .T is a shortcut for .transpose()
      5 pop_transposed = pop.T

NameError: name 'pop' is not defined
[13]:
# reorder according to list
pop_transposed = pop.transpose('gender', 'country', 'time')
pop_transposed
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-691cd72c0f5b> in <module>
      1 # reorder according to list
----> 2 pop_transposed = pop.transpose('gender', 'country', 'time')
      3 pop_transposed

NameError: name 'pop' is not defined
[14]:
# move 'time' axis at first place
# not mentioned axes come after those which are mentioned (and keep their relative order)
pop_transposed = pop.transpose('time')
pop_transposed
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-0a5906d260bc> in <module>
      1 # move 'time' axis at first place
      2 # not mentioned axes come after those which are mentioned (and keep their relative order)
----> 3 pop_transposed = pop.transpose('time')
      4 pop_transposed

NameError: name 'pop' is not defined
[15]:
# move 'gender' axis at last place
# not mentioned axes come before those which are mentioned (and keep their relative order)
pop_transposed = pop.transpose(..., 'gender')
pop_transposed
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-a70bb6241c42> in <module>
      1 # move 'gender' axis at last place
      2 # not mentioned axes come before those which are mentioned (and keep their relative order)
----> 3 pop_transposed = pop.transpose(..., 'gender')
      4 pop_transposed

NameError: name 'pop' is not defined

See transpose for more details and examples.

Dropping Labels

[16]:
pop_labels_dropped = pop.drop([2014, 2016])
pop_labels_dropped
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-1d801a07a183> in <module>
----> 1 pop_labels_dropped = pop.drop([2014, 2016])
      2 pop_labels_dropped

NameError: name 'pop' is not defined

See drop for more details and examples.

Combine And Split Axes

Combine two axes:

[17]:
pop_combined_axes = pop.combine_axes(('country', 'gender'))
pop_combined_axes
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-a8b2b6532b26> in <module>
----> 1 pop_combined_axes = pop.combine_axes(('country', 'gender'))
      2 pop_combined_axes

NameError: name 'pop' is not defined

Split an axis:

[18]:
pop_split_axes = pop_combined_axes.split_axes('country_gender')
pop_split_axes
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-448027490424> in <module>
----> 1 pop_split_axes = pop_combined_axes.split_axes('country_gender')
      2 pop_split_axes

NameError: name 'pop_combined_axes' is not defined

See combine_axes and split_axes for more details and examples.

Reordering, adding and removing labels

The reindex method allows to reorder, add and remove labels along one axis:

[19]:
# reverse years + remove 2013 + add 2018 + copy data for 2017 to 2018
pop_new_time = pop.reindex('time', '2018..2014', fill_value=pop[2017])
pop_new_time
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-861f69ddf561> in <module>
      1 # reverse years + remove 2013 + add 2018 + copy data for 2017 to 2018
----> 2 pop_new_time = pop.reindex('time', '2018..2014', fill_value=pop[2017])
      3 pop_new_time

NameError: name 'pop' is not defined

or several axes:

[20]:
pop_new = pop.reindex({'country': 'country=Luxembourg,Belgium,France,Germany',
                       'time': 'time=2018..2014'}, fill_value=0)
pop_new
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-20-d5de989749ed> in <module>
----> 1 pop_new = pop.reindex({'country': 'country=Luxembourg,Belgium,France,Germany',
      2                        'time': 'time=2018..2014'}, fill_value=0)
      3 pop_new

NameError: name 'pop' is not defined

See reindex for more details and examples.

Another way to insert new labels is to use the insert method:

[21]:
# insert a new country before 'France' with all values set to 0
pop_new_country = pop.insert(0, before='France', label='Luxembourg')
# or equivalently
pop_new_country = pop.insert(0, after='Belgium', label='Luxembourg')

pop_new_country
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-c1e4f8f51f62> in <module>
      1 # insert a new country before 'France' with all values set to 0
----> 2 pop_new_country = pop.insert(0, before='France', label='Luxembourg')
      3 # or equivalently
      4 pop_new_country = pop.insert(0, after='Belgium', label='Luxembourg')
      5

NameError: name 'pop' is not defined

See insert for more details and examples.

Sorting

[22]:
# get a copy of the 'pop_benelux' array
pop_benelux = demo_eurostat.pop_benelux.copy()
pop_benelux
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-a960c49ccebf> in <module>
      1 # get a copy of the 'pop_benelux' array
----> 2 pop_benelux = demo_eurostat.pop_benelux.copy()
      3 pop_benelux

NameError: name 'demo_eurostat' is not defined

Sort an axis (alphabetically if labels are strings)

[23]:
pop_sorted = pop_benelux.sort_axes('gender')
pop_sorted
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-128d08165772> in <module>
----> 1 pop_sorted = pop_benelux.sort_axes('gender')
      2 pop_sorted

NameError: name 'pop_benelux' is not defined

Give labels which would sort the axis

[24]:
pop_benelux.labelsofsorted('country')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-e45d81dcc459> in <module>
----> 1 pop_benelux.labelsofsorted('country')

NameError: name 'pop_benelux' is not defined

Sort according to values

[25]:
pop_sorted = pop_benelux.sort_values(('Male', 2017))
pop_sorted
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-c4da1fa82a19> in <module>
----> 1 pop_sorted = pop_benelux.sort_values(('Male', 2017))
      2 pop_sorted

NameError: name 'pop_benelux' is not defined

Combining arrays

The LArray library offers several methods and functions to combine arrays:

  • insert: inserts an array in another array along an axis

  • append: adds an array at the end of an axis.

  • prepend: adds an array at the beginning of an axis.

  • extend: extends an array along an axis.

  • stack: combines several arrays along an axis.

Insert

[26]:
other_countries = zeros((Axis('country=Luxembourg,Netherlands'), pop.gender, pop.time), dtype=int)

# insert new countries before 'France'
pop_new_countries = pop.insert(other_countries, before='France')
pop_new_countries
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-dc5de004068f> in <module>
----> 1 other_countries = zeros((Axis('country=Luxembourg,Netherlands'), pop.gender, pop.time), dtype=int)
      2
      3 # insert new countries before 'France'
      4 pop_new_countries = pop.insert(other_countries, before='France')
      5 pop_new_countries

NameError: name 'pop' is not defined

See insert for more details and examples.

Append

Append one element to an axis of an array:

[27]:
# append data for 'Luxembourg'
pop_new = pop.append('country', pop_benelux['Luxembourg'], 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-45d84e78fd69> in <module>
      1 # append data for 'Luxembourg'
----> 2 pop_new = pop.append('country', pop_benelux['Luxembourg'], 'Luxembourg')
      3 pop_new

NameError: name 'pop' is not defined

The value being appended can have missing (or even extra) axes as long as common axes are compatible:

[28]:
pop_lux = Array([-1, 1], pop.gender)
pop_lux
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-ded64bec24ce> in <module>
----> 1 pop_lux = Array([-1, 1], pop.gender)
      2 pop_lux

NameError: name 'pop' is not defined
[29]:
pop_new = pop.append('country', pop_lux, 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-d8b88e525bb0> in <module>
----> 1 pop_new = pop.append('country', pop_lux, 'Luxembourg')
      2 pop_new

NameError: name 'pop' is not defined

See append for more details and examples.

Prepend

Prepend one element to an axis of an array:

[30]:
# append data for 'Luxembourg'
pop_new = pop.prepend('country', pop_benelux['Luxembourg'], 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-30-2d1c7645ce3f> in <module>
      1 # append data for 'Luxembourg'
----> 2 pop_new = pop.prepend('country', pop_benelux['Luxembourg'], 'Luxembourg')
      3 pop_new

NameError: name 'pop' is not defined

See prepend for more details and examples.

Extend

Extend an array along an axis with another array with that axis (but other labels)

[31]:
pop_extended = pop.extend('country', pop_benelux[['Luxembourg', 'Netherlands']])
pop_extended
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-31-901cdde89f99> in <module>
----> 1 pop_extended = pop.extend('country', pop_benelux[['Luxembourg', 'Netherlands']])
      2 pop_extended

NameError: name 'pop' is not defined

See extend for more details and examples.

Stack

Stack several arrays together to create an entirely new dimension

[32]:
# imagine you have loaded data for each country in different arrays
# (e.g. loaded from different Excel sheets)
pop_be = pop['Belgium']
pop_fr = pop['France']
pop_de = pop['Germany']

pop_stacked = stack({'Belgium': pop_be, 'France': pop_fr, 'Germany': pop_de}, 'country')
pop_stacked
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-b62297997bac> in <module>
      1 # imagine you have loaded data for each country in different arrays
      2 # (e.g. loaded from different Excel sheets)
----> 3 pop_be = pop['Belgium']
      4 pop_fr = pop['France']
      5 pop_de = pop['Germany']

NameError: name 'pop' is not defined

See stack for more details and examples.

Aligning Arrays

The align method align two arrays on their axes with a specified join method. In other words, it ensure all common axes are compatible.

[33]:
# get a copy of the 'births' array
births = demo_eurostat.births.copy()

# align the two arrays with the 'inner' join method
pop_aligned, births_aligned = pop_benelux.align(births, join='inner')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-33-9ac56279d5ed> in <module>
      1 # get a copy of the 'births' array
----> 2 births = demo_eurostat.births.copy()
      3
      4 # align the two arrays with the 'inner' join method
      5 pop_aligned, births_aligned = pop_benelux.align(births, join='inner')

NameError: name 'demo_eurostat' is not defined
[34]:
print('pop_benelux before align:')
print(pop_benelux)
print()
print('pop_benelux after align:')
print(pop_aligned)
pop_benelux before align:
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-34-032609b23e87> in <module>
      1 print('pop_benelux before align:')
----> 2 print(pop_benelux)
      3 print()
      4 print('pop_benelux after align:')
      5 print(pop_aligned)

NameError: name 'pop_benelux' is not defined
[35]:
print('births before align:')
print(births)
print()
print('births after align:')
print(births_aligned)
births before align:
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-2a930ae5ee1a> in <module>
      1 print('births before align:')
----> 2 print(births)
      3 print()
      4 print('births after align:')
      5 print(births_aligned)

NameError: name 'births' is not defined

Aligned arrays can then be used in arithmetic operations:

[36]:
pop_aligned - births_aligned
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-36-15f7c12091d3> in <module>
----> 1 pop_aligned - births_aligned

NameError: name 'pop_aligned' is not defined

See align for more details and examples.