Code formatting

This commit is contained in:
Dmitriy Shishkov 2023-10-29 15:59:55 +03:00
parent 931ff1270b
commit 3bd1deb8db
Signed by: dm1sh
GPG Key ID: 027994B0AA357688
5 changed files with 22 additions and 10 deletions

View File

@ -92,9 +92,12 @@ def find_room(token: pd.Series, pre_token: pd.Series) -> str:
def find_litera(token: pd.Series, pre_token: pd.Series) -> str:
if find_room(token, pre_token):
return ""
if any_of_in(LETTER, token["obj"].lower()) or re.search(
r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"]
# fmt: off
if (
any_of_in(LETTER, token["obj"].lower()) or
re.search(r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"])
):
#fmt: on
return "l"
if (
(
@ -199,6 +202,7 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
brackets = re.search(r"\(.+\)", token["obj"])
if brackets:
token["obj"] = re.sub(r"\(.+\)", "()", token["obj"])
token["class"] += find_district(token, pre_token)
token["class"] += find_countryside(token, pre_token)
token["class"] += find_territory(token, pre_token)
@ -208,8 +212,11 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
token["class"] += find_edifice(token, pre_token)
token["class"] += find_litera(token, pre_token)
token["class"] += find_room(token, pre_token)
if token["class"] == "":
token["class"] = "w"
if brackets:
token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
return token

View File

@ -167,9 +167,12 @@ class AddressSplitter(Sequence):
return list(tokens)
def cut_address(self) -> pd.Series:
while len(self.accumulator["class"]) > 0 and CLASSES.index(
self.prev_class()
) > CLASSES.index(self.next_class()):
# fmt: off
while (
len(self.accumulator["class"]) > 0
and CLASSES.index(self.prev_class()) > CLASSES.index(self.next_class())
):
# fmt: on
match self.accumulator["class"][-1]:
case "h":
self.accumulator["addresses"] = re.sub(

View File

@ -6,6 +6,7 @@ import pandas as pd
T = TypeVar("T")
def any_of_in(substrings: Iterable[str], string: str) -> bool:
return any(map(lambda substring: substring in string, substrings))
@ -13,6 +14,7 @@ def any_of_in(substrings: Iterable[str], string: str) -> bool:
def flatten(arr: Iterable[list[T]]) -> list[T]:
return sum(arr, [])
def unfold_house_ranges(token: str) -> list[str]:
addresses = []
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
@ -42,4 +44,4 @@ def create_token(obj: str = "", token_class: str = ""):
"obj": obj,
"class": token_class,
}
)
)

View File

@ -1,9 +1,9 @@
from typing import Optional
from .lenenergo import LenenergoParser
from .building_id import concurrent_fetch_builing_ids
from .preprocess import preprocess_df
from .address import split_addresses
from .building_id import concurrent_fetch_builing_ids
from .lenenergo import LenenergoParser
from .preprocess import preprocess_df
def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser:

View File

@ -1,10 +1,10 @@
from .config import (
DB_URL,
POSTGRES_DB,
POSTGRES_HOST,
POSTGRES_PASSWORD,
POSTGRES_PORT,
POSTGRES_USER,
DB_URL,
)
db_credentials = {"conninfo": DB_URL}