Interactive online version:

Load And Dump Arrays, Sessions, Axes And Groups¶

The LArray library provides methods and functions to load and dump Array, Session, Axis Group objects to several formats such as Excel, CSV and HDF5. The HDF5 file format is designed to store and organize large amounts of data. It allows to read and write data much faster than when working with CSV and Excel files.

[1]:

# first of all, import the LArray library
from larray import *

Loading and Dumping Arrays¶

Loading Arrays - Basic Usage (CSV, Excel, HDF5)¶

To read an array from a CSV file, you must use the read_csv function:

[2]:

csv_dir = get_example_filepath('examples')

# read the array pop from the file 'pop.csv'.
# The data of the array below is derived from a subset of the demo_pjan table from Eurostat
pop = read_csv(csv_dir + '/pop.csv')
pop

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-0ebfcb6cb1c0> in <module>
      3 # read the array pop from the file 'pop.csv'.
      4 # The data of the array below is derived from a subset of the demo_pjan table from Eurostat
----> 5 pop = read_csv(csv_dir + '/pop.csv')
      6 pop

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

To read an array from a sheet of an Excel file, you can use the read_excel function:

[3]:

filepath_excel = get_example_filepath('examples.xlsx')

# read the array from the sheet 'births' of the Excel file 'examples.xlsx'
# The data of the array below is derived from a subset of the demo_fasec table from Eurostat
births = read_excel(filepath_excel, 'births')
births

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-3-03e491b54429> in <module>
      3 # read the array from the sheet 'births' of the Excel file 'examples.xlsx'
      4 # The data of the array below is derived from a subset of the demo_fasec table from Eurostat
----> 5 births = read_excel(filepath_excel, 'births')
      6 births

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in read_excel(filepath, sheet, nb_axes, index_col, fill_value, na, sort_rows, sort_columns, wide, engine, range, **kwargs)
    222         df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs)
    223         return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None,
--> 224                           fill_value=fill_value, wide=wide)
    225
    226

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

The open_excel function in combination with the load method allows you to load several arrays from the same Workbook without opening and closing it several times:

# open the Excel file 'population.xlsx' and let it opened as long as you keep the indent.
# The Python keyword ``with`` ensures that the Excel file is properly closed even if an error occurs
with open_excel(filepath_excel) as wb:
    # load the array 'pop' from the sheet 'pop'
    pop = wb['pop'].load()
    # load the array 'births' from the sheet 'births'
    births = wb['births'].load()
    # load the array 'deaths' from the sheet 'deaths'
    deaths = wb['deaths'].load()

# the Workbook is automatically closed when getting out the block defined by the with statement

Warning: open_excel requires to work on Windows and to have the library xlwings installed.

The HDF5 file format is specifically designed to store and organize large amounts of data. Reading and writing data in this file format is much faster than with CSV or Excel. An HDF5 file can contain multiple arrays, each array being associated with a key. To read an array from an HDF5 file, you must use the read_hdf function and provide the key associated with the array:

[4]:

filepath_hdf = get_example_filepath('examples.h5')

# read the array from the file 'examples.h5' associated with the key 'deaths'
# The data of the array below is derived from a subset of the demo_magec table from Eurostat
deaths = read_hdf(filepath_hdf, 'deaths')
deaths

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-4-a781a0c3b5ad> in <module>
      3 # read the array from the file 'examples.h5' associated with the key 'deaths'
      4 # The data of the array below is derived from a subset of the demo_magec table from Eurostat
----> 5 deaths = read_hdf(filepath_hdf, 'deaths')
      6 deaths

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
     81             cartesian_prod = writer != 'LArray'
     82             res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83                              parse_header=False, cartesian_prod=cartesian_prod)
     84             if _meta is not None:
     85                 res.meta = _meta

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    241             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
    242                              'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243         axes_labels = index_to_labels(df.index, sort=False)
    244
    245     # Pandas treats column labels as column names (strings) so we need to convert them to values

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

Dumping Arrays - Basic Usage (CSV, Excel, HDF5)¶

To write an array in a CSV file, you must use the to_csv method:

[5]:

# save the array pop in the file 'pop.csv'
pop.to_csv('pop.csv')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-73b7c4cf8e59> in <module>
      1 # save the array pop in the file 'pop.csv'
----> 2 pop.to_csv('pop.csv')

NameError: name 'pop' is not defined

To write an array to a sheet of an Excel file, you can use the to_excel method:

[6]:

# save the array pop in the sheet 'pop' of the Excel file 'population.xlsx'
pop.to_excel('population.xlsx', 'pop')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-be0820cd6131> in <module>
      1 # save the array pop in the sheet 'pop' of the Excel file 'population.xlsx'
----> 2 pop.to_excel('population.xlsx', 'pop')

NameError: name 'pop' is not defined

Note that to_excel create a new Excel file if it does not exist yet. If the file already exists, a new sheet is added after the existing ones if that sheet does not already exists:

[7]:

# add a new sheet 'births' to the file 'population.xlsx' and save the array births in it
births.to_excel('population.xlsx', 'births')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-b1bcd740e9ec> in <module>
      1 # add a new sheet 'births' to the file 'population.xlsx' and save the array births in it
----> 2 births.to_excel('population.xlsx', 'births')

NameError: name 'births' is not defined

To reset an Excel file, you simply need to set the overwrite_file argument as True:

[8]:

# 1. reset the file 'population.xlsx' (all sheets are removed)
# 2. create a sheet 'pop' and save the array pop in it
pop.to_excel('population.xlsx', 'pop', overwrite_file=True)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-e2b202392c1c> in <module>
      1 # 1. reset the file 'population.xlsx' (all sheets are removed)
      2 # 2. create a sheet 'pop' and save the array pop in it
----> 3 pop.to_excel('population.xlsx', 'pop', overwrite_file=True)

NameError: name 'pop' is not defined

The open_excel function in combination with the dump() method allows you to open a Workbook and to export several arrays at once. If the Excel file doesn’t exist, the overwrite_file argument must be set to True.

Warning: The save method must be called at the end of the block defined by the with statement to actually write data in the Excel file, otherwise you will end up with an empty file.

# to create a new Excel file, argument overwrite_file must be set to True
with open_excel('population.xlsx', overwrite_file=True) as wb:
    # add a new sheet 'pop' and dump the array pop in it
    wb['pop'] = pop.dump()
    # add a new sheet 'births' and dump the array births in it
    wb['births'] = births.dump()
    # add a new sheet 'deaths' and dump the array deaths in it
    wb['deaths'] = deaths.dump()
    # actually write data in the Workbook
    wb.save()

# the Workbook is automatically closed when getting out the block defined by the with statement

To write an array in an HDF5 file, you must use the to_hdf function and provide the key that will be associated with the array:

[9]:

# save the array pop in the file 'population.h5' and associate it with the key 'pop'
pop.to_hdf('population.h5', 'pop')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-5e7420cb5ab4> in <module>
      1 # save the array pop in the file 'population.h5' and associate it with the key 'pop'
----> 2 pop.to_hdf('population.h5', 'pop')

NameError: name 'pop' is not defined

Specifying Wide VS Narrow format (CSV, Excel)¶

By default, all reading functions assume that arrays are stored in the wide format, meaning that their last axis is represented horizontally:

country \ time	2013	2014	2015
Belgium	11137974	11180840	11237274
France	65600350	65942267	66456279

By setting the wide argument to False, reading functions will assume instead that arrays are stored in the narrow format, i.e. one column per axis plus one value column:

country	time	value
Belgium	2013	11137974
Belgium	2014	11180840
Belgium	2015	11237274
France	2013	65600350
France	2014	65942267
France	2015	66456279

[10]:

# set 'wide' argument to False to indicate that the array is stored in the 'narrow' format
pop_BE_FR = read_csv(csv_dir + '/pop_narrow_format.csv', wide=False)
pop_BE_FR

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-f355dbc17a36> in <module>
      1 # set 'wide' argument to False to indicate that the array is stored in the 'narrow' format
----> 2 pop_BE_FR = read_csv(csv_dir + '/pop_narrow_format.csv', wide=False)
      3 pop_BE_FR

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    316             series = df[df.columns[-1]]
    317             series.name = df.index.name
--> 318             return from_series(series, sort_rows=sort_columns, **kwargs)
    319
    320     # handle 1D arrays

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_series(s, sort_rows, fill_value, meta, **kwargs)
    120     a1   b1  6.0  7.0
    121     """
--> 122     if isinstance(s.index, pd.core.index.MultiIndex):
    123         # TODO: use argument sort=False when it will be available
    124         # (see https://github.com/pandas-dev/pandas/issues/15105)

AttributeError: module 'pandas.core' has no attribute 'index'

[11]:

# same for the read_excel function
pop_BE_FR = read_excel(filepath_excel, sheet='pop_narrow_format', wide=False)
pop_BE_FR

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-c57b6e5ab300> in <module>
      1 # same for the read_excel function
----> 2 pop_BE_FR = read_excel(filepath_excel, sheet='pop_narrow_format', wide=False)
      3 pop_BE_FR

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in read_excel(filepath, sheet, nb_axes, index_col, fill_value, na, sort_rows, sort_columns, wide, engine, range, **kwargs)
    222         df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs)
    223         return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None,
--> 224                           fill_value=fill_value, wide=wide)
    225
    226

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    316             series = df[df.columns[-1]]
    317             series.name = df.index.name
--> 318             return from_series(series, sort_rows=sort_columns, **kwargs)
    319
    320     # handle 1D arrays

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_series(s, sort_rows, fill_value, meta, **kwargs)
    120     a1   b1  6.0  7.0
    121     """
--> 122     if isinstance(s.index, pd.core.index.MultiIndex):
    123         # TODO: use argument sort=False when it will be available
    124         # (see https://github.com/pandas-dev/pandas/issues/15105)

AttributeError: module 'pandas.core' has no attribute 'index'

By default, writing functions will set the name of the column containing the data to ‘value’. You can choose the name of this column by using the value_name argument. For example, using value_name='population' you can export the previous array as:

country	time	population
Belgium	2013	11137974
Belgium	2014	11180840
Belgium	2015	11237274
France	2013	65600350
France	2014	65942267
France	2015	66456279

[12]:

# dump the array pop_BE_FR in a narrow format (one column per axis plus one value column).
# By default, the name of the column containing data is set to 'value'
pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False)

# same but replace 'value' by 'population'
pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False, value_name='population')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-53c5a64c2ad7> in <module>
      1 # dump the array pop_BE_FR in a narrow format (one column per axis plus one value column).
      2 # By default, the name of the column containing data is set to 'value'
----> 3 pop_BE_FR.to_csv('pop_narrow_format.csv', wide=False)
      4
      5 # same but replace 'value' by 'population'

NameError: name 'pop_BE_FR' is not defined

[13]:

# same for the to_excel method
pop_BE_FR.to_excel('population.xlsx', 'pop_narrow_format', wide=False, value_name='population')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-9f8405f0a952> in <module>
      1 # same for the to_excel method
----> 2 pop_BE_FR.to_excel('population.xlsx', 'pop_narrow_format', wide=False, value_name='population')

NameError: name 'pop_BE_FR' is not defined

Like with the to_excel method, it is possible to export arrays in a narrow format using open_excel. To do so, you must set the wide argument of the dump method to False:

with open_excel('population.xlsx') as wb:
    # dump the array pop_BE_FR in a narrow format:
    # one column per axis plus one value column.
    # Argument value_name can be used to change the name of the
    # column containing the data (default name is 'value')
    wb['pop_narrow_format'] = pop_BE_FR.dump(wide=False, value_name='population')
    # don't forget to call save()
    wb.save()

# in the sheet 'pop_narrow_format', data is written as:
# | country | time | value    |
# | ------- | ---- | -------- |
# | Belgium | 2013 | 11137974 |
# | Belgium | 2014 | 11180840 |
# | Belgium | 2015 | 11237274 |
# | France  | 2013 | 65600350 |
# | France  | 2014 | 65942267 |
# | France  | 2015 | 66456279 |

Specifying Position in Sheet (Excel)¶

If you want to read an array from an Excel sheet which does not start at cell A1 (when there is more than one array stored in the same sheet for example), you will need to use the range argument.

Warning: Note that the range argument is only available if you have the library xlwings installed (Windows).

# the 'range' argument must be used to load data not starting at cell A1.
# This is useful when there is several arrays stored in the same sheet
births = read_excel(filepath_excel, sheet='pop_births_deaths', range='A9:E15')

Using open_excel, ranges are passed in brackets:

with open_excel(filepath_excel) as wb:
    # store sheet 'pop_births_deaths' in a temporary variable sh
    sh = wb['pop_births_deaths']
    # load the array pop from range A1:E7
    pop = sh['A1:E7'].load()
    # load the array births from range A9:E15
    births = sh['A9:E15'].load()
    # load the array deaths from range A17:E23
    deaths = sh['A17:E23'].load()

# the Workbook is automatically closed when getting out the block defined by the with statement

When exporting arrays to Excel files, data is written starting at cell A1 by default. Using the position argument of the to_excel method, it is possible to specify the top left cell of the dumped data. This can be useful when you want to export several arrays in the same sheet for example

Warning: Note that the position argument is only available if you have the library xlwings installed (Windows).

filename = 'population.xlsx'
sheetname = 'pop_births_deaths'

# save the arrays pop, births and deaths in the same sheet 'pop_births_and_deaths'.
# The 'position' argument is used to shift the location of the second and third arrays to be dumped
pop.to_excel(filename, sheetname)
births.to_excel(filename, sheetname, position='A9')
deaths.to_excel(filename, sheetname, position='A17')

Using open_excel, the position is passed in brackets (this allows you to also add extra informations):

with open_excel('population.xlsx') as wb:
    # add a new sheet 'pop_births_deaths' and write 'population' in the first cell
    # note: you can use wb['new_sheet_name'] = '' to create an empty sheet
    wb['pop_births_deaths'] = 'population'
    # store sheet 'pop_births_deaths' in a temporary variable sh
    sh = wb['pop_births_deaths']
    # dump the array pop in sheet 'pop_births_deaths' starting at cell A2
    sh['A2'] = pop.dump()
    # add 'births' in cell A10
    sh['A10'] = 'births'
    # dump the array births in sheet 'pop_births_deaths' starting at cell A11
    sh['A11'] = births.dump()
    # add 'deaths' in cell A19
    sh['A19'] = 'deaths'
    # dump the array deaths in sheet 'pop_births_deaths' starting at cell A20
    sh['A20'] = deaths.dump()
    # don't forget to call save()
    wb.save()

# the Workbook is automatically closed when getting out the block defined by the with statement

Exporting data without headers (Excel)¶

For some reasons, you may want to export only the data of an array without axes. For example, you may want to insert a new column containing extra information. As an exercise, let us consider we want to add the capital city for each country present in the array containing the total population by country:

country	capital city	2013	2014	2015
Belgium	Brussels	11137974	11180840	11237274
France	Paris	65600350	65942267	66456279
Germany	Berlin	80523746	80767463	81197537

Assuming you have prepared an excel sheet as below:

country	capital city	2013	2014	2015
Belgium	Brussels
France	Paris
Germany	Berlin

you can then dump the data at right place by setting the header argument of to_excel to False and specifying the position of the data in sheet:

pop_by_country = pop.sum('gender')

# export only the data of the array pop_by_country starting at cell C2
pop_by_country.to_excel('population.xlsx', 'pop_by_country', header=False, position='C2')

Using open_excel, you can easily prepare the sheet and then export only data at the right place by either setting the header argument of the dump method to False or avoiding to call dump:

with open_excel('population.xlsx') as wb:
    # create new empty sheet 'pop_by_country'
    wb['pop_by_country'] = ''
    # store sheet 'pop_by_country' in a temporary variable sh
    sh = wb['pop_by_country']
    # write extra information (description)
    sh['A1'] = 'Population at 1st January by country'
    # export column names
    sh['A2'] = ['country', 'capital city']
    sh['C2'] = pop_by_country.time.labels
    # export countries as first column
    sh['A3'].options(transpose=True).value = pop_by_country.country.labels
    # export capital cities as second column
    sh['B3'].options(transpose=True).value = ['Brussels', 'Paris', 'Berlin']
    # export only data of pop_by_country
    sh['C3'] = pop_by_country.dump(header=False)
    # or equivalently
    sh['C3'] = pop_by_country
    # don't forget to call save()
    wb.save()

# the Workbook is automatically closed when getting out the block defined by the with statement

Specifying the Number of Axes at Reading (CSV, Excel)¶

By default, read_csv and read_excel will search the position of the first cell containing the special character \ in the header line in order to determine the number of axes of the array to read. The special character \ is used to separate the name of the two last axes. If there is no special character \, read_csv and read_excel will consider that the array to read has only one dimension. For an array stored as:

country	gender \ time	2013	2014	2015
Belgium	Male	5472856	5493792	5524068
Belgium	Female	5665118	5687048	5713206
France	Male	31772665	31936596	32175328
France	Female	33827685	34005671	34280951
Germany	Male	39380976	39556923	39835457
Germany	Female	41142770	41210540	41362080

read_csv and read_excel will find the special character \ in the second cell meaning it expects three axes (country, gender and time).

Sometimes, you need to read an array for which the name of the last axis is implicit:

country	gender	2013	2014	2015
Belgium	Male	5472856	5493792	5524068
Belgium	Female	5665118	5687048	5713206
France	Male	31772665	31936596	32175328
France	Female	33827685	34005671	34280951
Germany	Male	39380976	39556923	39835457
Germany	Female	41142770	41210540	41362080

For such case, you will have to inform read_csv and read_excel of the number of axes of the output array by setting the nb_axes argument:

[14]:

# read the 3 x 2 x 3 array stored in the file 'pop_missing_axis_name.csv' wihout using 'nb_axes' argument.
pop = read_csv(csv_dir + '/pop_missing_axis_name.csv')
# shape and data type of the output array are not what we expected
pop.info

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-14-46a6fc76fcd2> in <module>
      1 # read the 3 x 2 x 3 array stored in the file 'pop_missing_axis_name.csv' wihout using 'nb_axes' argument.
----> 2 pop = read_csv(csv_dir + '/pop_missing_axis_name.csv')
      3 # shape and data type of the output array are not what we expected
      4 pop.info

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[15]:

# by setting the 'nb_axes' argument, you can indicate to read_csv the number of axes of the output array
pop = read_csv(csv_dir + '/pop_missing_axis_name.csv', nb_axes=3)

# give a name to the last axis
pop = pop.rename(-1, 'time')

# shape and data type of the output array are what we expected
pop.info

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-15-5e4ef9813b05> in <module>
      1 # by setting the 'nb_axes' argument, you can indicate to read_csv the number of axes of the output array
----> 2 pop = read_csv(csv_dir + '/pop_missing_axis_name.csv', nb_axes=3)
      3
      4 # give a name to the last axis
      5 pop = pop.rename(-1, 'time')

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[16]:

# same for the read_excel function
pop = read_excel(filepath_excel, sheet='pop_missing_axis_name', nb_axes=3)
pop = pop.rename(-1, 'time')
pop.info

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-16-9e97e7b5f94e> in <module>
      1 # same for the read_excel function
----> 2 pop = read_excel(filepath_excel, sheet='pop_missing_axis_name', nb_axes=3)
      3 pop = pop.rename(-1, 'time')
      4 pop.info

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in read_excel(filepath, sheet, nb_axes, index_col, fill_value, na, sort_rows, sort_columns, wide, engine, range, **kwargs)
    222         df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs)
    223         return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None,
--> 224                           fill_value=fill_value, wide=wide)
    225
    226

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

NaNs and Missing Data Handling at Reading (CSV, Excel)¶

Sometimes, there is no data available for some label combinations. In the example below, the rows corresponding to France - Male and Germany - Female are missing:

country	gender \ time	2013	2014	2015
Belgium	Male	5472856	5493792	5524068
Belgium	Female	5665118	5687048	5713206
France	Female	33827685	34005671	34280951
Germany	Male	39380976	39556923	39835457

By default, read_csv and read_excel will fill cells associated with missing label combinations with nans. Be aware that, in that case, an int array will be converted to a float array.

[17]:

# by default, cells associated will missing label combinations are filled with nans.
# In that case, the output array is converted to a float array
read_csv(csv_dir + '/pop_missing_values.csv')

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-7598edd9f359> in <module>
      1 # by default, cells associated will missing label combinations are filled with nans.
      2 # In that case, the output array is converted to a float array
----> 3 read_csv(csv_dir + '/pop_missing_values.csv')

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

However, it is possible to choose which value to use to fill missing cells using the fill_value argument:

[18]:

read_csv(csv_dir + '/pop_missing_values.csv', fill_value=0)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-07a4dc5d864a> in <module>
----> 1 read_csv(csv_dir + '/pop_missing_values.csv', fill_value=0)

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[19]:

# same for the read_excel function
read_excel(filepath_excel, sheet='pop_missing_values', fill_value=0)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-debca9e01abc> in <module>
      1 # same for the read_excel function
----> 2 read_excel(filepath_excel, sheet='pop_missing_values', fill_value=0)

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in read_excel(filepath, sheet, nb_axes, index_col, fill_value, na, sort_rows, sort_columns, wide, engine, range, **kwargs)
    222         df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs)
    223         return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None,
--> 224                           fill_value=fill_value, wide=wide)
    225
    226

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

Sorting Axes at Reading (CSV, Excel, HDF5)¶

The sort_rows and sort_columns arguments of the reading functions allows you to sort rows and columns alphabetically:

[20]:

# sort labels at reading --> Male and Female labels are inverted
read_csv(csv_dir + '/pop.csv', sort_rows=True)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-20-2a097cec7595> in <module>
      1 # sort labels at reading --> Male and Female labels are inverted
----> 2 read_csv(csv_dir + '/pop.csv', sort_rows=True)

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[21]:

read_excel(filepath_excel, sheet='births', sort_rows=True)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-21-10dae479fc29> in <module>
----> 1 read_excel(filepath_excel, sheet='births', sort_rows=True)

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in read_excel(filepath, sheet, nb_axes, index_col, fill_value, na, sort_rows, sort_columns, wide, engine, range, **kwargs)
    222         df = pd.read_excel(filepath, sheet, index_col=index_col, engine=engine, **kwargs)
    223         return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, raw=index_col is None,
--> 224                           fill_value=fill_value, wide=wide)
    225
    226

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[22]:

read_hdf(filepath_hdf, key='deaths').sort_axes()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-22-378ea28c9b5a> in <module>
----> 1 read_hdf(filepath_hdf, key='deaths').sort_axes()

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
     81             cartesian_prod = writer != 'LArray'
     82             res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83                              parse_header=False, cartesian_prod=cartesian_prod)
     84             if _meta is not None:
     85                 res.meta = _meta

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    241             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
    242                              'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243         axes_labels = index_to_labels(df.index, sort=False)
    244
    245     # Pandas treats column labels as column names (strings) so we need to convert them to values

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

Metadata (HDF5)¶

Since the version 0.29 of LArray, it is possible to add metadata to arrays:

[23]:

pop.meta.title = 'Population at 1st January'
pop.meta.origin = 'Table demo_jpan from Eurostat'

pop.info

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-ab1445aa42ff> in <module>
----> 1 pop.meta.title = 'Population at 1st January'
      2 pop.meta.origin = 'Table demo_jpan from Eurostat'
      3
      4 pop.info

NameError: name 'pop' is not defined

These metadata are automatically saved and loaded when working with the HDF5 file format:

[24]:

pop.to_hdf('population.h5', 'pop')

new_pop = read_hdf('population.h5', 'pop')
new_pop.info

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-bafaf422e3d9> in <module>
----> 1 pop.to_hdf('population.h5', 'pop')
      2
      3 new_pop = read_hdf('population.h5', 'pop')
      4 new_pop.info

NameError: name 'pop' is not defined

Warning: Currently, metadata associated with arrays cannot be saved and loaded when working with CSV and Excel files. This restriction does not apply however to metadata associated with sessions.

Loading and Dumping Sessions¶

One of the main advantages of grouping arrays, axes and groups in session objects is that you can load and save all of them in one shot. Like arrays, it is possible to associate metadata to a session. These can be saved and loaded in all file formats.

Loading Sessions (CSV, Excel, HDF5)¶

To load the items of a session, you have two options:

Instantiate a new session and pass the path to the Excel/HDF5 file or to the directory containing CSV files to the Session constructor:

[25]:

# create a new Session object and load all arrays, axes, groups and metadata
# from all CSV files located in the passed directory
csv_dir = get_example_filepath('demography_eurostat')
session = Session(csv_dir)

# create a new Session object and load all arrays, axes, groups and metadata
# stored in the passed Excel file
filepath_excel = get_example_filepath('demography_eurostat.xlsx')
session = Session(filepath_excel)

# create a new Session object and load all arrays, axes, groups and metadata
# stored in the passed HDF5 file
filepath_hdf = get_example_filepath('demography_eurostat.h5')
session = Session(filepath_hdf)

print(session.summary())

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-25-a92fe605d750> in <module>
      2 # from all CSV files located in the passed directory
      3 csv_dir = get_example_filepath('demography_eurostat')
----> 4 session = Session(csv_dir)
      5
      6 # create a new Session object and load all arrays, axes, groups and metadata

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in __init__(self, *args, **kwargs)
     94             if isinstance(a0, str):
     95                 # assume a0 is a filename
---> 96                 self.load(a0)
     97             else:
     98                 # iterable of tuple or dict-like

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
    426         else:
    427             handler = handler_cls(fname)
--> 428         metadata, objects = handler.read(names, display=display, **kwargs)
    429         for k, v in objects.items():
    430             self[k] = v

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
    119         ignore_exceptions = kwargs.pop('ignore_exceptions', False)
    120         self._open_for_read()
--> 121         metadata = self._read_metadata()
    122         key_types = self.list_items()
    123         if keys is not None:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in _read_metadata(self)
    327         filepath = self._to_filepath('__metadata__')
    328         if os.path.isfile(filepath):
--> 329             meta = read_csv(filepath, wide=False)
    330             return Metadata.from_array(meta)
    331         else:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    231         raw = False
    232
--> 233     return df_asarray(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, raw=raw, wide=wide)
    234
    235

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    316             series = df[df.columns[-1]]
    317             series.name = df.index.name
--> 318             return from_series(series, sort_rows=sort_columns, **kwargs)
    319
    320     # handle 1D arrays

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_series(s, sort_rows, fill_value, meta, **kwargs)
    120     a1   b1  6.0  7.0
    121     """
--> 122     if isinstance(s.index, pd.core.index.MultiIndex):
    123         # TODO: use argument sort=False when it will be available
    124         # (see https://github.com/pandas-dev/pandas/issues/15105)

AttributeError: module 'pandas.core' has no attribute 'index'

Call the load method on an existing session and pass the path to the Excel/HDF5 file or to the directory containing CSV files as first argument:

[26]:

# create a session containing 3 axes, 2 groups and one array 'pop'
filepath = get_example_filepath('pop_only.xlsx')
session = Session(filepath)

print(session.summary())

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-26-56e1ec09f1ab> in <module>
      1 # create a session containing 3 axes, 2 groups and one array 'pop'
      2 filepath = get_example_filepath('pop_only.xlsx')
----> 3 session = Session(filepath)
      4
      5 print(session.summary())

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in __init__(self, *args, **kwargs)
     94             if isinstance(a0, str):
     95                 # assume a0 is a filename
---> 96                 self.load(a0)
     97             else:
     98                 # iterable of tuple or dict-like

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
    426         else:
    427             handler = handler_cls(fname)
--> 428         metadata, objects = handler.read(names, display=display, **kwargs)
    429         for k, v in objects.items():
    430             self[k] = v

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
    128                 print("loading", type, "object", key, "...", end=' ')
    129             try:
--> 130                 res[key] = self._read_item(key, type, *args, **kwargs)
    131             except Exception:
    132                 if not ignore_exceptions:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in _read_item(self, key, type, *args, **kwargs)
    252         if type == 'Array':
    253             df = self.handle.parse(key, *args, **kwargs)
--> 254             return df_asarray(df, raw=True)
    255         else:
    256             raise TypeError()

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    236     if cartesian_prod:
    237         df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns,
--> 238                                                fill_value=fill_value, **kwargs)
    239     else:
    240         if sort_rows or sort_columns:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in cartesian_product_df(df, sort_rows, sort_columns, fill_value, **kwargs)
     54 def cartesian_product_df(df, sort_rows=False, sort_columns=False, fill_value=nan, **kwargs):
     55     idx = df.index
---> 56     labels = index_to_labels(idx, sort=sort_rows)
     57     if isinstance(idx, pd.core.index.MultiIndex):
     58         if sort_rows:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

[27]:

# call the load method on the previous session and add the 'births' and 'deaths' arrays to it
filepath = get_example_filepath('births_and_deaths.xlsx')
session.load(filepath)

print(session.summary())

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-fe4df71a7d14> in <module>
      1 # call the load method on the previous session and add the 'births' and 'deaths' arrays to it
      2 filepath = get_example_filepath('births_and_deaths.xlsx')
----> 3 session.load(filepath)
      4
      5 print(session.summary())

NameError: name 'session' is not defined

The load method offers some options:

Using the names argument, you can specify which items to load:

[28]:

session = Session()

# use the names argument to only load births and deaths arrays
session.load(filepath_hdf, names=['births', 'deaths'])

print(session.summary())

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-28-8f78684c5696> in <module>
      2
      3 # use the names argument to only load births and deaths arrays
----> 4 session.load(filepath_hdf, names=['births', 'deaths'])
      5
      6 print(session.summary())

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
    426         else:
    427             handler = handler_cls(fname)
--> 428         metadata, objects = handler.read(names, display=display, **kwargs)
    429         for k, v in objects.items():
    430             self[k] = v

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
    128                 print("loading", type, "object", key, "...", end=' ')
    129             try:
--> 130                 res[key] = self._read_item(key, type, *args, **kwargs)
    131             except Exception:
    132                 if not ignore_exceptions:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in _read_item(self, key, type, *args, **kwargs)
    137         else:
    138             raise TypeError()
--> 139         return read_hdf(self.handle, hdf_key, *args, **kwargs)
    140
    141     def _dump_item(self, key, value, *args, **kwargs):

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
     81             cartesian_prod = writer != 'LArray'
     82             res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83                              parse_header=False, cartesian_prod=cartesian_prod)
     84             if _meta is not None:
     85                 res.meta = _meta

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    241             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
    242                              'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243         axes_labels = index_to_labels(df.index, sort=False)
    244
    245     # Pandas treats column labels as column names (strings) so we need to convert them to values

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

Setting the display argument to True, the load method will print a message each time a new item is loaded:

[29]:

session = Session()

# with display=True, the load method will print a message
# each time a new item is loaded
session.load(filepath_hdf, display=True)

opening /home/docs/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/tests/data/examples.h5
loading Array object births ...

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-29-be27048b30bd> in <module>
      3 # with display=True, the load method will print a message
      4 # each time a new item is loaded
----> 5 session.load(filepath_hdf, display=True)

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in load(self, fname, names, engine, display, **kwargs)
    426         else:
    427             handler = handler_cls(fname)
--> 428         metadata, objects = handler.read(names, display=display, **kwargs)
    429         for k, v in objects.items():
    430             self[k] = v

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in read(self, keys, *args, **kwargs)
    128                 print("loading", type, "object", key, "...", end=' ')
    129             try:
--> 130                 res[key] = self._read_item(key, type, *args, **kwargs)
    131             except Exception:
    132                 if not ignore_exceptions:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in _read_item(self, key, type, *args, **kwargs)
    137         else:
    138             raise TypeError()
--> 139         return read_hdf(self.handle, hdf_key, *args, **kwargs)
    140
    141     def _dump_item(self, key, value, *args, **kwargs):

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/hdf.py in read_hdf(filepath_or_buffer, key, fill_value, na, sort_rows, sort_columns, name, **kwargs)
     81             cartesian_prod = writer != 'LArray'
     82             res = df_asarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
---> 83                              parse_header=False, cartesian_prod=cartesian_prod)
     84             if _meta is not None:
     85                 res.meta = _meta

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in df_asarray(df, sort_rows, sort_columns, raw, parse_header, wide, cartesian_prod, **kwargs)
    338         unfold_last_axis_name = isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]
    339         res = from_frame(df, sort_rows=sort_rows, sort_columns=sort_columns, parse_header=parse_header,
--> 340                          unfold_last_axis_name=unfold_last_axis_name, cartesian_prod=cartesian_prod, **kwargs)
    341
    342     # ugly hack to avoid anonymous axes converted as axes with name 'Unnamed: x' by pandas

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in from_frame(df, sort_rows, sort_columns, parse_header, unfold_last_axis_name, fill_value, meta, cartesian_prod, **kwargs)
    241             raise ValueError('sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
    242                              'Please call the method sort_axes on the returned array to sort rows or columns')
--> 243         axes_labels = index_to_labels(df.index, sort=False)
    244
    245     # Pandas treats column labels as column names (strings) so we need to convert them to values

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/pandas.py in index_to_labels(idx, sort)
     41     Returns unique labels for each dimension.
     42     """
---> 43     if isinstance(idx, pd.core.index.MultiIndex):
     44         if sort:
     45             return list(idx.levels)

AttributeError: module 'pandas.core' has no attribute 'index'

Dumping Sessions (CSV, Excel, HDF5)¶

To save a session, you need to call the save method. The first argument is the path to a Excel/HDF5 file or to a directory if items are saved to CSV files:

[30]:

# save items of a session in CSV files.
# Here, the save method will create a 'population' directory in which CSV files will be written
session.save('population')

# save session to an HDF5 file
session.save('population.h5')

# save session to an Excel file
session.save('population.xlsx')

# load session saved in 'population.h5' to see its content
Session('population.h5')

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-30-20aa34ff2da2> in <module>
      7
      8 # save session to an Excel file
----> 9 session.save('population.xlsx')
     10
     11 # load session saved in 'population.h5' to see its content

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/core/session.py in save(self, fname, names, engine, overwrite, display, **kwargs)
    495             names_set = set(names)
    496             items = [(k, v) for k, v in items if k in names_set]
--> 497         handler.dump(meta, items, display=display, **kwargs)
    498
    499     def to_globals(self, names=None, depth=0, warn=True, inplace=False):

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/common.py in dump(self, metadata, key_values, *args, **kwargs)
    170                     print("Cannot dump {}. {} is not a supported type".format(key, type(value).__name__))
    171         self.save()
--> 172         self.close()
    173         self._update_original_file()

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/excel.py in close(self)
    280
    281     def close(self):
--> 282         self.handle.close()
    283
    284

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/excel/_base.py in close(self)
    779     def close(self):
    780         """synonym for save, to make it more file-like"""
--> 781         return self.save()
    782
    783

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/excel/_openpyxl.py in save(self)
     41         Save workbook to disk.
     42         """
---> 43         return self.book.save(self.path)
     44
     45     @classmethod

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/workbook/workbook.py in save(self, filename)
    390         if self.write_only and not self.worksheets:
    391             self.create_sheet()
--> 392         save_workbook(self, filename)
    393
    394

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/writer/excel.py in save_workbook(workbook, filename)
    291     archive = ZipFile(filename, 'w', ZIP_DEFLATED, allowZip64=True)
    292     writer = ExcelWriter(workbook, archive)
--> 293     writer.save()
    294     return True
    295

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/writer/excel.py in save(self)
    273     def save(self):
    274         """Write data into the archive."""
--> 275         self.write_data()
    276         self._archive.close()
    277

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/writer/excel.py in write_data(self)
     87         writer = WorkbookWriter(self.workbook)
     88         archive.writestr(ARC_ROOT_RELS, writer.write_root_rels())
---> 89         archive.writestr(ARC_WORKBOOK, writer.write())
     90         archive.writestr(ARC_WORKBOOK_RELS, writer.write_rels())
     91

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/workbook/_writer.py in write(self)
    146         self.write_names()
    147         self.write_pivots()
--> 148         self.write_views()
    149         self.write_refs()
    150

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/workbook/_writer.py in write_views(self)
    133
    134     def write_views(self):
--> 135         active = get_active_sheet(self.wb)
    136         if self.wb.views:
    137             self.wb.views[0].activeTab = active

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/openpyxl/workbook/_writer.py in get_active_sheet(wb)
     31     visible_sheets = [idx for idx, sheet in enumerate(wb._sheets) if sheet.sheet_state == "visible"]
     32     if not visible_sheets:
---> 33         raise IndexError("At least one sheet must be visible")
     34
     35     idx = wb._active_sheet_index

IndexError: At least one sheet must be visible

Note: Concerning the CSV and Excel formats, the metadata is saved in one Excel sheet (CSV file) named __metadata__(.csv). This sheet (CSV file) name cannot be changed.

The save method has several arguments:

Using the names argument, you can specify which items to save:

[31]:

# use the names argument to only save births and deaths arrays
session.save('population.h5', names=['births', 'deaths'])

# load session saved in 'population.h5' to see its content
Session('population.h5')

[31]:

Session()

By default, dumping a session to an Excel or HDF5 file will overwrite it. By setting the overwrite argument to False, you can choose to update the existing Excel or HDF5 file:

[32]:

pop = read_csv('./population/pop.csv')
ses_pop = Session([('pop', pop)])

# by setting overwrite to False, the destination file is updated instead of overwritten.
# The items already stored in the file but not present in the session are left intact.
# On the contrary, the items that exist in both the file and the session are completely overwritten.
ses_pop.save('population.h5', overwrite=False)

# load session saved in 'population.h5' to see its content
Session('population.h5')

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-32-ae724498303f> in <module>
----> 1 pop = read_csv('./population/pop.csv')
      2 ses_pop = Session([('pop', pop)])
      3
      4 # by setting overwrite to False, the destination file is updated instead of overwritten.
      5 # The items already stored in the file but not present in the session are left intact.

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/util/misc.py in wrapper(*args, **kwargs)
    700                 else:
    701                     kwargs[new_arg_name] = new_arg_value
--> 702             return func(*args, **kwargs)
    703         return wrapper
    704     return _deprecate_kwarg

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/larray-0.32-py3.6.egg/larray/inout/csv.py in read_csv(filepath_or_buffer, nb_axes, index_col, sep, headersep, fill_value, na, sort_rows, sort_columns, wide, dialect, **kwargs)
    214             index_col = [0]
    215
--> 216     df = pd.read_csv(filepath_or_buffer, index_col=index_col, sep=sep, **kwargs)
    217     if dialect == 'liam2':
    218         if len(df) == 1:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    674         )
    675
--> 676         return _read(filepath_or_buffer, kwds)
    677
    678     parser_f.__name__ = name

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    446
    447     # Create the parser.
--> 448     parser = TextFileReader(fp_or_buf, **kwds)
    449
    450     if chunksize or iterator:

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    878             self.options["has_index_names"] = kwds["has_index_names"]
    879
--> 880         self._make_engine(self.engine)
    881
    882     def close(self):

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1112     def _make_engine(self, engine="c"):
   1113         if engine == "c":
-> 1114             self._engine = CParserWrapper(self.f, **self.options)
   1115         else:
   1116             if engine == "python":

~/checkouts/readthedocs.org/user_builds/larray/conda/0.32/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1889         kwds["usecols"] = self.usecols
   1890
-> 1891         self._reader = parsers.TextReader(src, **kwds)
   1892         self.unnamed_cols = self._reader.unnamed_cols
   1893

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File ./population/pop.csv does not exist: './population/pop.csv'

Setting the display argument to True, the save method will print a message each time an item is dumped:

[33]:

# with display=True, the save method will print a message
# each time an item is dumped
session.save('population.h5', display=True)