Source code for autogluon.core.dataset


import pandas as pd

from .utils.loaders import load_pd

__all__ = ['TabularDataset']


[docs]class TabularDataset(pd.DataFrame): """ A dataset in tabular format (with rows = samples, columns = features/variables). This object is essentially a pandas DataFrame (with some extra attributes) and all existing pandas methods can be applied to it. For full list of methods/attributes, see pandas Dataframe documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html Parameters ---------- data : :class:`pd.DataFrame` or str If str, path to data file (CSV or Parquet format). If you already have your data in a :class:`pd.DataFrame`, you can specify it here. Attributes ---------- file_path: (str) Path to data file from which this `TabularDataset` was created. None if `data` was a :class:`pd.DataFrame`. Note: In addition to these attributes, `TabularDataset` also shares all the same attributes and methods of a pandas Dataframe. For a detailed list, see: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html Examples -------- >>> from autogluon.core.dataset import TabularDataset >>> train_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv') >>> train_data.head(30) >>> train_data.columns """ _metadata = ['file_path'] # preserved properties that will be copied to a new instance of TabularDataset @property def _constructor(self): return TabularDataset @property def _constructor_sliced(self): return pd.Series def __init__(self, data, **kwargs): if isinstance(data, str): file_path = data data = load_pd.load(file_path) else: file_path = None super().__init__(data, **kwargs) self.file_path = file_path