import re from collections.abc import Iterable from typing import TypeVar import pandas as pd T = TypeVar("T") def any_of_in(substrings: Iterable[str], string: str) -> bool: return any(map(lambda substring: substring in string, substrings)) def flatten(arr: Iterable[list[T]]) -> list[T]: return sum(arr, []) def unfold_house_ranges(token: str) -> list[str]: addresses = [] pairs_strings = re.findall(r"([\d]+-[\d]+)", token) for pair_string in pairs_strings: a, b = pair_string.split("-") a, b = int(a), int(b) if b > a: addresses += [ re.sub(r"([\d]+-[\d]+)", number, token) for number in map(str, range(a, b + 1)) ] else: token = token.replace("-", "/") if not addresses: addresses.append(token) return addresses def is_valid_token(string: str) -> bool: return string not in ("", "уг.", "д.") def create_token(obj: str = "", token_class: str = ""): return pd.Series( { "obj": obj, "class": token_class, } )