Combining arrays
Import the LArray library:
[1]:
from larray import *
[2]:
# load the 'demography_eurostat' dataset
demography_eurostat = load_example_data('demography_eurostat')
# load 'gender' and 'time' axes
gender = demography_eurostat.gender
time = demography_eurostat.time
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 2
1 # load the 'demography_eurostat' dataset
----> 2 demography_eurostat = load_example_data('demography_eurostat')
4 # load 'gender' and 'time' axes
5 gender = demography_eurostat.gender
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/example.py:97, in load_example_data(name)
95 available_datasets = list(AVAILABLE_EXAMPLE_DATA.keys())
96 raise ValueError(f"example_data must be chosen from list {available_datasets}")
---> 97 return la.Session(AVAILABLE_EXAMPLE_DATA[name])
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/core/session.py:98, in Session.__init__(self, meta, *args, **kwargs)
94 elements = {a.name: a for a in args}
96 if isinstance(elements, (str, Path)):
97 # assume elements is a filename
---> 98 self.load(elements)
99 self.update(**kwargs)
100 else:
101 # iterable of tuple or dict-like
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/core/session.py:438, in Session.load(self, fname, names, engine, display, **kwargs)
436 else:
437 handler = handler_cls(fname)
--> 438 metadata, objects = handler.read(names, display=display, **kwargs)
439 self._update_from_iterable(objects.items())
440 self.meta = metadata
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/inout/common.py:139, in FileHandler.read(self, keys, display, ignore_exceptions, *args, **kwargs)
114 def read(self, keys, *args, display=False, ignore_exceptions=False, **kwargs) -> Tuple[Metadata, dict]:
115 r"""
116 Read file content (HDF, Excel, CSV, ...) and returns a dictionary containing loaded objects.
117
(...)
137 Dictionary containing the loaded objects.
138 """
--> 139 self._open_for_read()
140 metadata = self._read_metadata()
141 item_types = self.item_types()
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/inout/hdf.py:138, in PandasHDFHandler._open_for_read(self)
137 def _open_for_read(self):
--> 138 self.handle = HDFStore(self.fname, mode='r')
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/pandas/io/pytables.py:566, in HDFStore.__init__(self, path, mode, complevel, complib, fletcher32, **kwargs)
563 if "format" in kwargs:
564 raise ValueError("format is not a defined argument for HDFStore")
--> 566 tables = import_optional_dependency("tables")
568 if complib is not None and complib not in tables.filters.all_complibs:
569 raise ValueError(
570 f"complib only supports {tables.filters.all_complibs} compression."
571 )
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/pandas/compat/_optional.py:135, in import_optional_dependency(name, extra, errors, min_version)
130 msg = (
131 f"Missing optional dependency '{install_name}'. {extra} "
132 f"Use pip or conda to install {install_name}."
133 )
134 try:
--> 135 module = importlib.import_module(name)
136 except ImportError:
137 if errors == "raise":
File ~/.asdf/installs/python/3.11.9/lib/python3.11/importlib/__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1204, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1176, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1147, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:690, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:940, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/tables/__init__.py:44
40 raise RuntimeError("Blosc2 library not found. "
41 f"I looked for \"{', '.join(blosc2_search_paths)}\"")
43 # Necessary imports to get versions stored on the cython extension
---> 44 from .utilsextension import get_hdf5_version as _get_hdf5_version
46 from ._version import __version__
48 hdf5_version = _get_hdf5_version()
File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/tables/utilsextension.pyx:1, in init tables.utilsextension()
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
[3]:
# load the 'population' array from the 'demography_eurostat' dataset
population = demography_eurostat.population
# show 'population' array
population
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 2
1 # load the 'population' array from the 'demography_eurostat' dataset
----> 2 population = demography_eurostat.population
4 # show 'population' array
5 population
NameError: name 'demography_eurostat' is not defined
[4]:
# load the 'population_benelux' array from the 'demography_eurostat' dataset
population_benelux = demography_eurostat.population_benelux
# show 'population_benelux' array
population_benelux
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 2
1 # load the 'population_benelux' array from the 'demography_eurostat' dataset
----> 2 population_benelux = demography_eurostat.population_benelux
4 # show 'population_benelux' array
5 population_benelux
NameError: name 'demography_eurostat' is not defined
The LArray library offers several methods and functions to combine arrays:
insert: inserts an array in another array along an axis
append: adds an array at the end of an axis.
prepend: adds an array at the beginning of an axis.
extend: extends an array along an axis.
stack: combines several arrays along a new axis.
Insert
[5]:
other_countries = zeros((Axis('country=Luxembourg,Netherlands'), gender, time), dtype=int)
# insert new countries before 'France'
population_new_countries = population.insert(other_countries, before='France')
population_new_countries
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 other_countries = zeros((Axis('country=Luxembourg,Netherlands'), gender, time), dtype=int)
3 # insert new countries before 'France'
4 population_new_countries = population.insert(other_countries, before='France')
NameError: name 'gender' is not defined
[6]:
# insert new countries after 'France'
population_new_countries = population.insert(other_countries, after='France')
population_new_countries
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 2
1 # insert new countries after 'France'
----> 2 population_new_countries = population.insert(other_countries, after='France')
3 population_new_countries
NameError: name 'population' is not defined
See insert for more details and examples.
Append
Append one element to an axis of an array:
[7]:
# append data for 'Luxembourg'
population_new = population.append('country', population_benelux['Luxembourg'], 'Luxembourg')
population_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 2
1 # append data for 'Luxembourg'
----> 2 population_new = population.append('country', population_benelux['Luxembourg'], 'Luxembourg')
3 population_new
NameError: name 'population' is not defined
The value being appended can have missing (or even extra) axes as long as common axes are compatible:
[8]:
population_lux = stack({'Male': -1, 'Female': 1}, gender)
population_lux
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 1
----> 1 population_lux = stack({'Male': -1, 'Female': 1}, gender)
2 population_lux
NameError: name 'gender' is not defined
[9]:
population_new = population.append('country', population_lux, 'Luxembourg')
population_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[9], line 1
----> 1 population_new = population.append('country', population_lux, 'Luxembourg')
2 population_new
NameError: name 'population' is not defined
The value being appended can also have the axis along which we are appending:
[10]:
population_nelux = population_benelux[['Netherlands', 'Luxembourg']]
population_nelux
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[10], line 1
----> 1 population_nelux = population_benelux[['Netherlands', 'Luxembourg']]
2 population_nelux
NameError: name 'population_benelux' is not defined
[11]:
population_extended = population.append('country', population_nelux)
population_extended
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 1
----> 1 population_extended = population.append('country', population_nelux)
2 population_extended
NameError: name 'population' is not defined
See append for more details and examples.
Prepend
Prepend one element to an axis of an array:
[12]:
# append data for 'Luxembourg'
population_new = population.prepend('country', population_benelux['Luxembourg'], 'Luxembourg')
population_new
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[12], line 2
1 # append data for 'Luxembourg'
----> 2 population_new = population.prepend('country', population_benelux['Luxembourg'], 'Luxembourg')
3 population_new
NameError: name 'population' is not defined
See prepend for more details and examples.
Stack
Stack several arrays together to create an entirely new dimension
[13]:
# imagine you have loaded data for each country in different arrays
# (e.g. loaded from different Excel sheets)
population_be = population['Belgium']
population_fr = population['France']
population_de = population['Germany']
print(population_be)
print(population_fr)
print(population_de)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[13], line 3
1 # imagine you have loaded data for each country in different arrays
2 # (e.g. loaded from different Excel sheets)
----> 3 population_be = population['Belgium']
4 population_fr = population['France']
5 population_de = population['Germany']
NameError: name 'population' is not defined
[14]:
# create a new array with an extra axis 'country' by stacking the three arrays population_be/fr/de
population_stacked = stack({'Belgium': population_be, 'France': population_fr, 'Germany': population_de}, 'country')
population_stacked
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[14], line 2
1 # create a new array with an extra axis 'country' by stacking the three arrays population_be/fr/de
----> 2 population_stacked = stack({'Belgium': population_be, 'France': population_fr, 'Germany': population_de}, 'country')
3 population_stacked
NameError: name 'population_be' is not defined
See stack for more details and examples.