Interactive online version: Binder badge

Plotting

Import the LArray library:

[1]:
from larray import *

Import the test array population from the demography_eurostat dataset:

[2]:
demography_eurostat = load_example_data('demography_eurostat')
population = demography_eurostat.population / 1_000_000

# show the 'population' array
population
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[2], line 1
----> 1 demography_eurostat = load_example_data('demography_eurostat')
      2 population = demography_eurostat.population / 1_000_000
      4 # show the 'population' array

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/example.py:97, in load_example_data(name)
     95     available_datasets = list(AVAILABLE_EXAMPLE_DATA.keys())
     96     raise ValueError(f"example_data must be chosen from list {available_datasets}")
---> 97 return la.Session(AVAILABLE_EXAMPLE_DATA[name])

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/core/session.py:98, in Session.__init__(self, meta, *args, **kwargs)
     94     elements = {a.name: a for a in args}
     96 if isinstance(elements, (str, Path)):
     97     # assume elements is a filename
---> 98     self.load(elements)
     99     self.update(**kwargs)
    100 else:
    101     # iterable of tuple or dict-like

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/core/session.py:438, in Session.load(self, fname, names, engine, display, **kwargs)
    436 else:
    437     handler = handler_cls(fname)
--> 438 metadata, objects = handler.read(names, display=display, **kwargs)
    439 self._update_from_iterable(objects.items())
    440 self.meta = metadata

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/inout/common.py:139, in FileHandler.read(self, keys, display, ignore_exceptions, *args, **kwargs)
    114 def read(self, keys, *args, display=False, ignore_exceptions=False, **kwargs) -> Tuple[Metadata, dict]:
    115     r"""
    116     Read file content (HDF, Excel, CSV, ...) and returns a dictionary containing loaded objects.
    117
   (...)
    137         Dictionary containing the loaded objects.
    138     """
--> 139     self._open_for_read()
    140     metadata = self._read_metadata()
    141     item_types = self.item_types()

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/larray/inout/hdf.py:138, in PandasHDFHandler._open_for_read(self)
    137 def _open_for_read(self):
--> 138     self.handle = HDFStore(self.fname, mode='r')

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/pandas/io/pytables.py:566, in HDFStore.__init__(self, path, mode, complevel, complib, fletcher32, **kwargs)
    563 if "format" in kwargs:
    564     raise ValueError("format is not a defined argument for HDFStore")
--> 566 tables = import_optional_dependency("tables")
    568 if complib is not None and complib not in tables.filters.all_complibs:
    569     raise ValueError(
    570         f"complib only supports {tables.filters.all_complibs} compression."
    571     )

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/pandas/compat/_optional.py:135, in import_optional_dependency(name, extra, errors, min_version)
    130 msg = (
    131     f"Missing optional dependency '{install_name}'. {extra} "
    132     f"Use pip or conda to install {install_name}."
    133 )
    134 try:
--> 135     module = importlib.import_module(name)
    136 except ImportError:
    137     if errors == "raise":

File ~/.asdf/installs/python/3.11.9/lib/python3.11/importlib/__init__.py:126, in import_module(name, package)
    124             break
    125         level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)

File <frozen importlib._bootstrap>:1204, in _gcd_import(name, package, level)

File <frozen importlib._bootstrap>:1176, in _find_and_load(name, import_)

File <frozen importlib._bootstrap>:1147, in _find_and_load_unlocked(name, import_)

File <frozen importlib._bootstrap>:690, in _load_unlocked(spec)

File <frozen importlib._bootstrap_external>:940, in exec_module(self, module)

File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/tables/__init__.py:44
     40     raise RuntimeError("Blosc2 library not found. "
     41                        f"I looked for \"{', '.join(blosc2_search_paths)}\"")
     43 # Necessary imports to get versions stored on the cython extension
---> 44 from .utilsextension import get_hdf5_version as _get_hdf5_version
     46 from ._version import __version__
     48 hdf5_version = _get_hdf5_version()

File ~/checkouts/readthedocs.org/user_builds/larray/envs/0.34.3/lib/python3.11/site-packages/tables/utilsextension.pyx:1, in init tables.utilsextension()

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

Inline matplotlib (required in notebooks):

[3]:
%matplotlib inline

In a Python script, add the following import on top of the script:

[4]:
import matplotlib.pyplot as plt

Create and show a simple plot (last axis define the different curves to draw):

[5]:
population['Belgium'].plot()
# shows the figure
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 population['Belgium'].plot()
      2 # shows the figure
      3 plt.show()

NameError: name 'population' is not defined
  • Create a Line plot with grid, user-defined xticks, label and title.

  • Save the plot as a png file (using plt.savefig()).

  • Show the plot:

[6]:
population['Belgium'].plot(grid=True, xticks=population.time, ylabel='population (millions)', title='Belgium')
# saves figure in a file (see matplotlib.pyplot.savefig documentation for more details)
plt.savefig('Belgium_population.png')
# WARNING: show() resets the current figure after showing it! Do not call it before savefig
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 population['Belgium'].plot(grid=True, xticks=population.time, ylabel='population (millions)', title='Belgium')
      2 # saves figure in a file (see matplotlib.pyplot.savefig documentation for more details)
      3 plt.savefig('Belgium_population.png')

NameError: name 'population' is not defined

Specify line styles and width:

[7]:
# line styles: '-' for solid line, '--' for dashed line, '-.' for dash-dotted line and ':' for dotted line
population['Male'].plot(style=['-', '--', '-.'], linewidth=2,
                        xticks=population.time, ylabel='population (millions)', title='Male')
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 2
      1 # line styles: '-' for solid line, '--' for dashed line, '-.' for dash-dotted line and ':' for dotted line
----> 2 population['Male'].plot(style=['-', '--', '-.'], linewidth=2,
      3                         xticks=population.time, ylabel='population (millions)', title='Male')
      4 plt.show()

NameError: name 'population' is not defined

Configuring the legend can be done by passing a dict to the legend argument. For example, to put the legend in a specific position inside the graph, one would use legend={'loc': <position>}.

Where <position> can be: 'best' (default), 'upper right', 'upper left', 'lower left', 'lower right', 'right', 'center left', 'center right', 'lower center', 'upper center' or 'center'.

[8]:
population['Belgium'].plot(xticks=population.time, ylabel='population (millions)', title='Male', legend={'loc': 'lower right'})
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 population['Belgium'].plot(xticks=population.time, ylabel='population (millions)', title='Male', legend={'loc': 'lower right'})
      2 plt.show()

NameError: name 'population' is not defined

There are many other ways to customize the legend, see the “Other parameters” section of matplotlib’s legend documentation. For example, to put the legend outside the plot:

[9]:
population['Belgium'].plot(xticks=population.time, ylabel='population (millions)', title='Male',
                           legend={'bbox_to_anchor': (1.25, 0.6)})
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[9], line 1
----> 1 population['Belgium'].plot(xticks=population.time, ylabel='population (millions)', title='Male',
      2                            legend={'bbox_to_anchor': (1.25, 0.6)})
      3 plt.show()

NameError: name 'population' is not defined

Create a Bar plot:

[10]:
population['Belgium'].plot.bar(ylabel='population (millions)', title='Belgium')
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 population['Belgium'].plot.bar(ylabel='population (millions)', title='Belgium')
      2 plt.show()

NameError: name 'population' is not defined

Create a stacked Bar plot:

[11]:
population['Belgium'].plot.bar(title='Belgium', ylabel='population (millions)', stacked=True)
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 1
----> 1 population['Belgium'].plot.bar(title='Belgium', ylabel='population (millions)', stacked=True)
      2 plt.show()

NameError: name 'population' is not defined

Create a multiplot figure (using subplots=axes):

[12]:
population.plot(subplots=('country', 'gender'), sharex=True,
                xticks=population.time, ylabel='population (millions)',
                figsize=(8, 10))
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 1
----> 1 population.plot(subplots=('country', 'gender'), sharex=True,
      2                 xticks=population.time, ylabel='population (millions)',
      3                 figsize=(8, 10))
      4 plt.show()

NameError: name 'population' is not defined

See plot for more details and examples.

See pyplot tutorial for a short introduction to matplotlib.pyplot.