Source code for leads.data_persistence.analyzer.preprocess

from typing import Sequence as _Sequence, Any as _Any, SupportsFloat as _SupportsFloat

from leads.data_persistence.analyzer import utils as _utils
from .._computational import array as _array, ndarray as _ndarray


[docs] class Preprocessor(object): def __init__(self, data_seq: _Sequence[dict[str, _Any]]) -> None: self._data_seq: _Sequence[dict[str, _Any]] = data_seq
[docs] def to_tensor(self, channels: tuple[str, ...] = ("time", "speed", "latitude", "longitude")) -> _ndarray: r = [] for row in self._data_seq: r_row = [] for channel in channels: d = row[channel] if not isinstance(d, _SupportsFloat): raise TypeError(f"{d} ({channel}) is not a float and cannot be converted to a float") if getattr(_utils, f"{channel}_invalid", lambda _: False)(d): raise ValueError(f"Invalid value for {channel} ({d}) at row {len(r)}") r_row.append(d) r.append(r_row) return _array(r)