Transforming Arrays (Relabeling, Renaming, Reordering, Combining, Extending, Sorting, …)¶
Import the LArray library:
[1]:
from larray import *
[2]:
# load the 'demography_eurostat' dataset
demo_eurostat = load_example_data('demography_eurostat')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-43cbc3a4a95d> in <module>
1 # load the 'demography_eurostat' dataset
----> 2 demo_eurostat = load_example_data('demography_eurostat')
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/example.py in load_example_data(name)
91 if name not in AVAILABLE_EXAMPLE_DATA.keys():
92 raise ValueError("example_data must be chosen from list {}".format(list(AVAILABLE_EXAMPLE_DATA.keys())))
---> 93 return la.Session(AVAILABLE_EXAMPLE_DATA[name])
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in __init__(self, *args, **kwargs)
94 if isinstance(a0, str):
95 # assume a0 is a filename
---> 96 self.load(a0)
97 else:
98 # iterable of tuple or dict-like
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
426 else:
427 handler = handler_cls(fname)
--> 428 metadata, objects = handler.read(names, display=display, **kwargs)
429 for k, v in objects.items():
430 self[k] = v
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
128 print("loading", type, "object", key, "...", end=' ')
129 try:
--> 130 res[key] = self._read_item(key, type, *args, **kwargs)
131 except Exception:
132 if not ignore_exceptions:
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in _read_item(self, key, type, *args, **kwargs)
137 else:
138 raise TypeError()
--> 139 return read_hdf(self.handle, hdf_key, *args, **kwargs)
140
141 def _dump_item(self, key, value, *args, **kwargs):
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
81 cartesian_prod = writer != 'LArray'
82 res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83 parse_header=False, cartesian_prod=cartesian_prod)
84 if _meta is not None:
85 res.meta = _meta
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
338 unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
339 res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340 unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
341
342 # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
241 raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
242 'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243 axes_labels = index_to_labels(df.index, sort=False)
244
245 # Pandas treats column labels as column names (strings) so we need to convert them to values
~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
41 Returns unique labels for each dimension.
42 """
---> 43 if isinstance(idx, pd.core.index.MultiIndex):
44 if sort:
45 return list(idx.levels)
AttributeError: module 'pandas.core' has no attribute 'index'
[3]:
# get a copy of the 'pop' array from the 'demography_eurostat' dataset
pop = demo_eurostat.pop.copy()
pop
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-3-05b1a30c03e6> in <module>
1 # get a copy of the 'pop' array from the 'demography_eurostat' dataset
----> 2 pop = demo_eurostat.pop.copy()
3 pop
NameError: name 'demo_eurostat' is not defined
Manipulating axes¶
The Array
class offers several methods to manipulate the axes and labels of an array:
set_labels: to replace all or some labels of one or several axes.
rename: to replace one or several axis names.
set_axes: to replace one or several axes.
transpose: to modify the order of axes.
drop: to remove one or several labels.
combine_axes: to combine axes.
split_axes: to split one or several axes by splitting their labels and names.
reindex: to reorder, add and remove labels of one or several axes.
insert: to insert a label at a given position.
Relabeling¶
Replace some labels of an axis:
[4]:
# replace all labels of the 'gender' axis by passing the list of all new labels
pop_new_labels = pop.set_labels('gender', ['Men', 'Women'])
pop_new_labels
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-4-623309d162e8> in <module>
1 # replace all labels of the 'gender' axis by passing the list of all new labels
----> 2 pop_new_labels = pop.set_labels('gender', ['Men', 'Women'])
3 pop_new_labels
NameError: name 'pop' is not defined
[5]:
# set all labels of the 'country' axis to uppercase by passing the function str.upper()
pop_new_labels = pop.set_labels('country', str.upper)
pop_new_labels
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-2c6df1fd25e2> in <module>
1 # set all labels of the 'country' axis to uppercase by passing the function str.upper()
----> 2 pop_new_labels = pop.set_labels('country', str.upper)
3 pop_new_labels
NameError: name 'pop' is not defined
[6]:
# replace only one label of the 'gender' axis by passing a dict
pop_new_labels = pop.set_labels('gender', {'Male': 'Men'})
pop_new_labels
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-44062fe81d08> in <module>
1 # replace only one label of the 'gender' axis by passing a dict
----> 2 pop_new_labels = pop.set_labels('gender', {'Male': 'Men'})
3 pop_new_labels
NameError: name 'pop' is not defined
See set_labels for more details and examples.
Renaming axes¶
Rename one axis:
[7]:
# 'rename' returns a copy of the array
pop_new_names = pop.rename('time', 'year')
pop_new_names
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-7-ebe3a3298518> in <module>
1 # 'rename' returns a copy of the array
----> 2 pop_new_names = pop.rename('time', 'year')
3 pop_new_names
NameError: name 'pop' is not defined
Rename several axes at once:
[8]:
pop_new_names = pop.rename({'gender': 'sex', 'time': 'year'})
pop_new_names
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-8-1c5ebbf525cf> in <module>
----> 1 pop_new_names = pop.rename({'gender': 'sex', 'time': 'year'})
2 pop_new_names
NameError: name 'pop' is not defined
See rename for more details and examples.
Replacing Axes¶
Replace one axis:
[9]:
new_gender = Axis('sex=Men,Women')
pop_new_axis = pop.set_axes('gender', new_gender)
pop_new_axis
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-9-9465ac881bab> in <module>
1 new_gender = Axis('sex=Men,Women')
----> 2 pop_new_axis = pop.set_axes('gender', new_gender)
3 pop_new_axis
NameError: name 'pop' is not defined
Replace several axes at once:
[10]:
new_country = Axis('country_codes=BE,FR,DE')
pop_new_axes = pop.set_axes({'country': new_country, 'gender': new_gender})
pop_new_axes
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-01acccdf346c> in <module>
1 new_country = Axis('country_codes=BE,FR,DE')
----> 2 pop_new_axes = pop.set_axes({'country': new_country, 'gender': new_gender})
3 pop_new_axes
NameError: name 'pop' is not defined
Reordering axes¶
Axes can be reordered using transpose
method. By default, transpose reverse axes, otherwise it permutes the axes according to the list given as argument. Axes not mentioned come after those which are mentioned(and keep their relative order). Finally, transpose returns a copy of the array.
[11]:
# starting order : country, gender, time
pop
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-8ec305916275> in <module>
1 # starting order : country, gender, time
----> 2 pop
NameError: name 'pop' is not defined
[12]:
# no argument --> reverse all axes
pop_transposed = pop.transpose()
# .T is a shortcut for .transpose()
pop_transposed = pop.T
pop_transposed
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-12-7225533ef09b> in <module>
1 # no argument --> reverse all axes
----> 2 pop_transposed = pop.transpose()
3
4 # .T is a shortcut for .transpose()
5 pop_transposed = pop.T
NameError: name 'pop' is not defined
[13]:
# reorder according to list
pop_transposed = pop.transpose('gender', 'country', 'time')
pop_transposed
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-13-691cd72c0f5b> in <module>
1 # reorder according to list
----> 2 pop_transposed = pop.transpose('gender', 'country', 'time')
3 pop_transposed
NameError: name 'pop' is not defined
[14]:
# move 'time' axis at first place
# not mentioned axes come after those which are mentioned (and keep their relative order)
pop_transposed = pop.transpose('time')
pop_transposed
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-0a5906d260bc> in <module>
1 # move 'time' axis at first place
2 # not mentioned axes come after those which are mentioned (and keep their relative order)
----> 3 pop_transposed = pop.transpose('time')
4 pop_transposed
NameError: name 'pop' is not defined
[15]:
# move 'gender' axis at last place
# not mentioned axes come before those which are mentioned (and keep their relative order)
pop_transposed = pop.transpose(..., 'gender')
pop_transposed
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-15-a70bb6241c42> in <module>
1 # move 'gender' axis at last place
2 # not mentioned axes come before those which are mentioned (and keep their relative order)
----> 3 pop_transposed = pop.transpose(..., 'gender')
4 pop_transposed
NameError: name 'pop' is not defined
See transpose for more details and examples.
Dropping Labels¶
[16]:
pop_labels_dropped = pop.drop([2014, 2016])
pop_labels_dropped
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-16-1d801a07a183> in <module>
----> 1 pop_labels_dropped = pop.drop([2014, 2016])
2 pop_labels_dropped
NameError: name 'pop' is not defined
See drop for more details and examples.
Combine And Split Axes¶
Combine two axes:
[17]:
pop_combined_axes = pop.combine_axes(('country', 'gender'))
pop_combined_axes
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-a8b2b6532b26> in <module>
----> 1 pop_combined_axes = pop.combine_axes(('country', 'gender'))
2 pop_combined_axes
NameError: name 'pop' is not defined
Split an axis:
[18]:
pop_split_axes = pop_combined_axes.split_axes('country_gender')
pop_split_axes
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-18-448027490424> in <module>
----> 1 pop_split_axes = pop_combined_axes.split_axes('country_gender')
2 pop_split_axes
NameError: name 'pop_combined_axes' is not defined
See combine_axes and split_axes for more details and examples.
Reordering, adding and removing labels¶
The reindex
method allows to reorder, add and remove labels along one axis:
[19]:
# reverse years + remove 2013 + add 2018 + copy data for 2017 to 2018
pop_new_time = pop.reindex('time', '2018..2014', fill_value=pop[2017])
pop_new_time
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-19-861f69ddf561> in <module>
1 # reverse years + remove 2013 + add 2018 + copy data for 2017 to 2018
----> 2 pop_new_time = pop.reindex('time', '2018..2014', fill_value=pop[2017])
3 pop_new_time
NameError: name 'pop' is not defined
or several axes:
[20]:
pop_new = pop.reindex({'country': 'country=Luxembourg,Belgium,France,Germany',
'time': 'time=2018..2014'}, fill_value=0)
pop_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-20-d5de989749ed> in <module>
----> 1 pop_new = pop.reindex({'country': 'country=Luxembourg,Belgium,France,Germany',
2 'time': 'time=2018..2014'}, fill_value=0)
3 pop_new
NameError: name 'pop' is not defined
See reindex for more details and examples.
Another way to insert new labels is to use the insert
method:
[21]:
# insert a new country before 'France' with all values set to 0
pop_new_country = pop.insert(0, before='France', label='Luxembourg')
# or equivalently
pop_new_country = pop.insert(0, after='Belgium', label='Luxembourg')
pop_new_country
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-21-c1e4f8f51f62> in <module>
1 # insert a new country before 'France' with all values set to 0
----> 2 pop_new_country = pop.insert(0, before='France', label='Luxembourg')
3 # or equivalently
4 pop_new_country = pop.insert(0, after='Belgium', label='Luxembourg')
5
NameError: name 'pop' is not defined
See insert for more details and examples.
Sorting¶
sort_axes: sort the labels of an axis.
labelsofsorted: give labels which would sort an axis.
sort_values: sort axes according to values
[22]:
# get a copy of the 'pop_benelux' array
pop_benelux = demo_eurostat.pop_benelux.copy()
pop_benelux
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-22-a960c49ccebf> in <module>
1 # get a copy of the 'pop_benelux' array
----> 2 pop_benelux = demo_eurostat.pop_benelux.copy()
3 pop_benelux
NameError: name 'demo_eurostat' is not defined
Sort an axis (alphabetically if labels are strings)
[23]:
pop_sorted = pop_benelux.sort_axes('gender')
pop_sorted
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-23-128d08165772> in <module>
----> 1 pop_sorted = pop_benelux.sort_axes('gender')
2 pop_sorted
NameError: name 'pop_benelux' is not defined
Give labels which would sort the axis
[24]:
pop_benelux.labelsofsorted('country')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-24-e45d81dcc459> in <module>
----> 1 pop_benelux.labelsofsorted('country')
NameError: name 'pop_benelux' is not defined
Sort according to values
[25]:
pop_sorted = pop_benelux.sort_values(('Male', 2017))
pop_sorted
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-25-c4da1fa82a19> in <module>
----> 1 pop_sorted = pop_benelux.sort_values(('Male', 2017))
2 pop_sorted
NameError: name 'pop_benelux' is not defined
Combining arrays¶
The LArray library offers several methods and functions to combine arrays:
insert: inserts an array in another array along an axis
append: adds an array at the end of an axis.
prepend: adds an array at the beginning of an axis.
extend: extends an array along an axis.
stack: combines several arrays along an axis.
Insert¶
[26]:
other_countries = zeros((Axis('country=Luxembourg,Netherlands'), pop.gender, pop.time), dtype=int)
# insert new countries before 'France'
pop_new_countries = pop.insert(other_countries, before='France')
pop_new_countries
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-26-dc5de004068f> in <module>
----> 1 other_countries = zeros((Axis('country=Luxembourg,Netherlands'), pop.gender, pop.time), dtype=int)
2
3 # insert new countries before 'France'
4 pop_new_countries = pop.insert(other_countries, before='France')
5 pop_new_countries
NameError: name 'pop' is not defined
See insert for more details and examples.
Append¶
Append one element to an axis of an array:
[27]:
# append data for 'Luxembourg'
pop_new = pop.append('country', pop_benelux['Luxembourg'], 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-27-45d84e78fd69> in <module>
1 # append data for 'Luxembourg'
----> 2 pop_new = pop.append('country', pop_benelux['Luxembourg'], 'Luxembourg')
3 pop_new
NameError: name 'pop' is not defined
The value being appended can have missing (or even extra) axes as long as common axes are compatible:
[28]:
pop_lux = Array([-1, 1], pop.gender)
pop_lux
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-28-ded64bec24ce> in <module>
----> 1 pop_lux = Array([-1, 1], pop.gender)
2 pop_lux
NameError: name 'pop' is not defined
[29]:
pop_new = pop.append('country', pop_lux, 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-29-d8b88e525bb0> in <module>
----> 1 pop_new = pop.append('country', pop_lux, 'Luxembourg')
2 pop_new
NameError: name 'pop' is not defined
See append for more details and examples.
Prepend¶
Prepend one element to an axis of an array:
[30]:
# append data for 'Luxembourg'
pop_new = pop.prepend('country', pop_benelux['Luxembourg'], 'Luxembourg')
pop_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-30-2d1c7645ce3f> in <module>
1 # append data for 'Luxembourg'
----> 2 pop_new = pop.prepend('country', pop_benelux['Luxembourg'], 'Luxembourg')
3 pop_new
NameError: name 'pop' is not defined
See prepend for more details and examples.
Extend¶
Extend an array along an axis with another array with that axis (but other labels)
[31]:
pop_extended = pop.extend('country', pop_benelux[['Luxembourg', 'Netherlands']])
pop_extended
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-31-901cdde89f99> in <module>
----> 1 pop_extended = pop.extend('country', pop_benelux[['Luxembourg', 'Netherlands']])
2 pop_extended
NameError: name 'pop' is not defined
See extend for more details and examples.
Stack¶
Stack several arrays together to create an entirely new dimension
[32]:
# imagine you have loaded data for each country in different arrays
# (e.g. loaded from different Excel sheets)
pop_be = pop['Belgium']
pop_fr = pop['France']
pop_de = pop['Germany']
pop_stacked = stack({'Belgium': pop_be, 'France': pop_fr, 'Germany': pop_de}, 'country')
pop_stacked
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-32-b62297997bac> in <module>
1 # imagine you have loaded data for each country in different arrays
2 # (e.g. loaded from different Excel sheets)
----> 3 pop_be = pop['Belgium']
4 pop_fr = pop['France']
5 pop_de = pop['Germany']
NameError: name 'pop' is not defined
See stack for more details and examples.
Aligning Arrays¶
The align
method align two arrays on their axes with a specified join method. In other words, it ensure all common axes are compatible.
[33]:
# get a copy of the 'births' array
births = demo_eurostat.births.copy()
# align the two arrays with the 'inner' join method
pop_aligned, births_aligned = pop_benelux.align(births, join='inner')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-33-9ac56279d5ed> in <module>
1 # get a copy of the 'births' array
----> 2 births = demo_eurostat.births.copy()
3
4 # align the two arrays with the 'inner' join method
5 pop_aligned, births_aligned = pop_benelux.align(births, join='inner')
NameError: name 'demo_eurostat' is not defined
[34]:
print('pop_benelux before align:')
print(pop_benelux)
print()
print('pop_benelux after align:')
print(pop_aligned)
pop_benelux before align:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-34-032609b23e87> in <module>
1 print('pop_benelux before align:')
----> 2 print(pop_benelux)
3 print()
4 print('pop_benelux after align:')
5 print(pop_aligned)
NameError: name 'pop_benelux' is not defined
[35]:
print('births before align:')
print(births)
print()
print('births after align:')
print(births_aligned)
births before align:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-35-2a930ae5ee1a> in <module>
1 print('births before align:')
----> 2 print(births)
3 print()
4 print('births after align:')
5 print(births_aligned)
NameError: name 'births' is not defined
Aligned arrays can then be used in arithmetic operations:
[36]:
pop_aligned - births_aligned
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-36-15f7c12091d3> in <module>
----> 1 pop_aligned - births_aligned
NameError: name 'pop_aligned' is not defined
See align for more details and examples.