Source code for bi_etl.lookups.autodisk_range_lookup

# -*- coding: utf-8 -*-
"""
Created on Jan 5, 2016

@author: Derek Wood
"""
# https://www.python.org/dev/peps/pep-0563/
from __future__ import annotations

import typing

from bi_etl.components.row.row import Row
from bi_etl.config.bi_etl_config_base import BI_ETL_Config_Base
from bi_etl.lookups.autodisk_lookup import AutoDiskLookup
from bi_etl.lookups.disk_range_lookup import DiskRangeLookup
from bi_etl.lookups.range_lookup import RangeLookup

if typing.TYPE_CHECKING:
    from bi_etl.components.etlcomponent import ETLComponent


[docs] class AutoDiskRangeLookup(AutoDiskLookup, RangeLookup): """ Automatic memory / disk lookup cache. This version divides the cache into N chunks (default is 10). If RAM usage gets beyond limits, it starts moving chunks to disk. Once a chunk is on disk, it stays there. TODO: For use cases where the lookup will be used in a mostly sequential fashion, it would be useful to have a version that uses ranges instead of a hash function. Then when find_in_cache is called on a disk segment, we could swap a different segment out and bring that segment in. That's a lot more complicated. We'd also want to maintain a last used date for each segment so that if we add rows to the cache, we can choose the best segment to swap to disk. Also worth considering is that if we bring a segment in from disk, it would best to keep the disk version. At that point any additions to that segment would need to go to both places. """
[docs] def __init__(self, lookup_name: str, lookup_keys: list, parent_component: ETLComponent, begin_date, end_date, config: BI_ETL_Config_Base = None, use_value_cache: bool = True, path=None, ): """ Optional parameter path controls where the data is persisted """ RangeLookup.__init__(self, lookup_name=lookup_name, lookup_keys=lookup_keys, use_value_cache=use_value_cache, parent_component=parent_component, begin_date=begin_date, end_date=end_date, config=config, ) AutoDiskLookup.__init__(self, lookup_name=lookup_name, lookup_keys=lookup_keys, parent_component=parent_component, config=config, path=path, begin_date=begin_date, end_date=end_date, use_value_cache=use_value_cache, init_parent=False, # Don't have it call the parent init because RangeLookup will have done that ) self.MemoryLookupClass = RangeLookup self.DiskLookupClass = DiskRangeLookup
[docs] def cache_row( self, row: Row, allow_update: bool = True, allow_insert: bool = True, ): """ Adds the given row to the cache for this lookup. Parameters ---------- row: Row The row to cache allow_update: boolean Allow this method to update an existing row in the cache. allow_insert: boolean Allow this method to insert a new row into the cache Raises ------ ValueError If allow_update is False and an already existing row (lookup key) is passed in. """ AutoDiskLookup.cache_row(self, row, allow_update=allow_update)
[docs] def find_in_cache(self, row, **kwargs): return RangeLookup.find_in_cache(self, row=row, **kwargs)