Skip to content

dfloaders

dfloaders

Module for dataframe loaders

Classes

RemoteDiskCachedDfLoader

RemoteDiskCachedDfLoader(
    storage, cache_dir, working_dir=None
)

Bases: DfLoader

SimpleLoader for parquet files from cache or remote storage

Initialize a SimpleLoader for parquet files from cache or remote storage

Source code in niceml/data/dataloaders/dfloaders.py
def __init__(
    self,
    storage: StorageInterface,
    cache_dir: str,
    working_dir: Optional[str] = None,
):
    """Initialize a SimpleLoader for parquet files from cache or remote storage"""
    self.storage = storage
    self.cache_path = cache_dir
    self.working_dir = working_dir
Functions
load_df
load_df(df_path)

Loads and returns dataframe from cache

Source code in niceml/data/dataloaders/dfloaders.py
def load_df(self, df_path: str) -> pd.DataFrame:
    """Loads and returns dataframe from cache"""
    target_path = (
        self.storage.join_paths(self.working_dir, df_path)
        if self.working_dir
        else df_path
    )
    cached_filepath = join(self.cache_path, target_path)
    if isfile(cached_filepath):
        dataframe = read_parquet(cached_filepath)
    else:
        dataframe = LoadParquetFile().load_data(target_path, self.storage)
        write_parquet(dataframe, cached_filepath)
    return dataframe

RemoteDiskCachedDfLoaderFactory

RemoteDiskCachedDfLoaderFactory(cache_dir)

Bases: DfLoaderFactory

Factory of RemoteDiskCachedDfLoader

Initialize a Factory for RemoteDiskCachedDfLoader

Source code in niceml/data/dataloaders/dfloaders.py
def __init__(self, cache_dir: str):
    """Initialize a Factory for RemoteDiskCachedDfLoader"""

    self.cache_path = cache_dir
Functions
create_df_loader
create_df_loader(storage, working_dir)

Returns RemoteDiskCachedDfLoader

Source code in niceml/data/dataloaders/dfloaders.py
def create_df_loader(self, storage: StorageInterface, working_dir: str) -> DfLoader:
    """Returns RemoteDiskCachedDfLoader"""
    return RemoteDiskCachedDfLoader(storage, self.cache_path, working_dir)

SimpleDfLoader

SimpleDfLoader(storage=None, working_dir=None)

Bases: DfLoader

SimpleLoader for parquet files

SimpleLoader for parquet files

Source code in niceml/data/dataloaders/dfloaders.py
def __init__(
    self,
    storage: Optional[StorageInterface] = None,
    working_dir: Optional[str] = None,
):
    """SimpleLoader for parquet files"""

    self.storage = storage or LocalStorage()
    self.working_dir = working_dir
Functions
load_df
load_df(df_path)

Loads and returns a dataframe from a given parquet file path

Source code in niceml/data/dataloaders/dfloaders.py
def load_df(self, df_path: str) -> pd.DataFrame:
    """Loads and returns a dataframe from a given parquet file path"""
    target_path = join(self.working_dir, df_path) if self.working_dir else df_path
    return LoadParquetFile().load_data(target_path, self.storage)

SimpleDfLoaderFactory

Bases: DfLoaderFactory

SimpleLoader for parquet files

Functions
create_df_loader
create_df_loader(storage, working_dir)

Returns SimpleDfLoader

Source code in niceml/data/dataloaders/dfloaders.py
def create_df_loader(self, storage: StorageInterface, working_dir: str) -> DfLoader:
    """Returns SimpleDfLoader"""
    return SimpleDfLoader(storage, working_dir)

Functions