2023-10-29 15:59:55 +03:00

48 lines
1.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from collections.abc import Iterable
from typing import TypeVar
import pandas as pd
T = TypeVar("T")
def any_of_in(substrings: Iterable[str], string: str) -> bool:
return any(map(lambda substring: substring in string, substrings))
def flatten(arr: Iterable[list[T]]) -> list[T]:
return sum(arr, [])
def unfold_house_ranges(token: str) -> list[str]:
addresses = []
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
for pair_string in pairs_strings:
a, b = pair_string.split("-")
a, b = int(a), int(b)
if b > a:
addresses += [
re.sub(r"([\d]+-[\d]+)", number, token)
for number in map(str, range(a, b + 1))
]
else:
token = token.replace("-", "/")
if not addresses:
addresses.append(token)
return addresses
def is_valid_token(string: str) -> bool:
return string not in ("", "уг.", "д.")
def create_token(obj: str = "", token_class: str = ""):
return pd.Series(
{
"obj": obj,
"class": token_class,
}
)