48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
import re
|
||
from collections.abc import Iterable
|
||
from typing import TypeVar
|
||
|
||
import pandas as pd
|
||
|
||
T = TypeVar("T")
|
||
|
||
|
||
def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
||
return any(map(lambda substring: substring in string, substrings))
|
||
|
||
|
||
def flatten(arr: Iterable[list[T]]) -> list[T]:
|
||
return sum(arr, [])
|
||
|
||
|
||
def unfold_house_ranges(token: str) -> list[str]:
|
||
addresses = []
|
||
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
|
||
for pair_string in pairs_strings:
|
||
a, b = pair_string.split("-")
|
||
a, b = int(a), int(b)
|
||
|
||
if b > a:
|
||
addresses += [
|
||
re.sub(r"([\d]+-[\d]+)", number, token)
|
||
for number in map(str, range(a, b + 1))
|
||
]
|
||
else:
|
||
token = token.replace("-", "/")
|
||
if not addresses:
|
||
addresses.append(token)
|
||
return addresses
|
||
|
||
|
||
def is_valid_token(string: str) -> bool:
|
||
return string not in ("", "уг.", "д.")
|
||
|
||
|
||
def create_token(obj: str = "", token_class: str = ""):
|
||
return pd.Series(
|
||
{
|
||
"obj": obj,
|
||
"class": token_class,
|
||
}
|
||
)
|