Source code for larray.inout.stata

import pandas as pd

from larray.core.array import Array
from larray.inout.pandas import from_frame

__all__ = ['read_stata']



[docs]
def read_stata(filepath_or_buffer, index_col=None, sort_rows=False, sort_columns=False, **kwargs) -> Array:
    r"""
    Read Stata .dta file and returns an Array with the contents.

    Parameters
    ----------
    filepath_or_buffer : str or file-like object
        Path to .dta file or a file handle.
    index_col : str or None, optional
        Name of column to set as index. Defaults to None.
    sort_rows : bool, optional
        Whether to sort the rows alphabetically (sorting is more efficient than not sorting).
        This only makes sense in combination with index_col. Defaults to False.
    sort_columns : bool, optional
        Whether to sort the columns alphabetically (sorting is more efficient than not sorting).
        Defaults to False.

    Returns
    -------
    Array

    See Also
    --------
    Array.to_stata

    Notes
    -----
    The round trip to Stata (Array.to_stata followed by read_stata) loose the name of the "column" axis.

    Examples
    --------
    >>> read_stata('test.dta')                   # doctest: +SKIP
    {0}\{1}  row  country  sex
          0    0       BE    F
          1    1       FR    M
          2    2       FR    F
    >>> read_stata('test.dta', index_col='row')  # doctest: +SKIP
    row\{1}  country  sex
          0       BE    F
          1       FR    M
          2       FR    F
    """
    df = pd.read_stata(filepath_or_buffer, index_col=index_col, **kwargs)
    return from_frame(df,
                      sort_rows=sort_rows,
                      sort_columns=sort_columns,
                      copy=False)