Code formatting

This commit is contained in:
Dmitriy Shishkov 2023-10-29 15:59:55 +03:00
parent 931ff1270b
commit 3bd1deb8db
Signed by: dm1sh
GPG Key ID: 027994B0AA357688
5 changed files with 22 additions and 10 deletions

View File

@ -92,9 +92,12 @@ def find_room(token: pd.Series, pre_token: pd.Series) -> str:
def find_litera(token: pd.Series, pre_token: pd.Series) -> str: def find_litera(token: pd.Series, pre_token: pd.Series) -> str:
if find_room(token, pre_token): if find_room(token, pre_token):
return "" return ""
if any_of_in(LETTER, token["obj"].lower()) or re.search( # fmt: off
r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"] if (
any_of_in(LETTER, token["obj"].lower()) or
re.search(r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"])
): ):
#fmt: on
return "l" return "l"
if ( if (
( (
@ -199,6 +202,7 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
brackets = re.search(r"\(.+\)", token["obj"]) brackets = re.search(r"\(.+\)", token["obj"])
if brackets: if brackets:
token["obj"] = re.sub(r"\(.+\)", "()", token["obj"]) token["obj"] = re.sub(r"\(.+\)", "()", token["obj"])
token["class"] += find_district(token, pre_token) token["class"] += find_district(token, pre_token)
token["class"] += find_countryside(token, pre_token) token["class"] += find_countryside(token, pre_token)
token["class"] += find_territory(token, pre_token) token["class"] += find_territory(token, pre_token)
@ -208,8 +212,11 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
token["class"] += find_edifice(token, pre_token) token["class"] += find_edifice(token, pre_token)
token["class"] += find_litera(token, pre_token) token["class"] += find_litera(token, pre_token)
token["class"] += find_room(token, pre_token) token["class"] += find_room(token, pre_token)
if token["class"] == "": if token["class"] == "":
token["class"] = "w" token["class"] = "w"
if brackets: if brackets:
token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"]) token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
return token return token

View File

@ -167,9 +167,12 @@ class AddressSplitter(Sequence):
return list(tokens) return list(tokens)
def cut_address(self) -> pd.Series: def cut_address(self) -> pd.Series:
while len(self.accumulator["class"]) > 0 and CLASSES.index( # fmt: off
self.prev_class() while (
) > CLASSES.index(self.next_class()): len(self.accumulator["class"]) > 0
and CLASSES.index(self.prev_class()) > CLASSES.index(self.next_class())
):
# fmt: on
match self.accumulator["class"][-1]: match self.accumulator["class"][-1]:
case "h": case "h":
self.accumulator["addresses"] = re.sub( self.accumulator["addresses"] = re.sub(

View File

@ -6,6 +6,7 @@ import pandas as pd
T = TypeVar("T") T = TypeVar("T")
def any_of_in(substrings: Iterable[str], string: str) -> bool: def any_of_in(substrings: Iterable[str], string: str) -> bool:
return any(map(lambda substring: substring in string, substrings)) return any(map(lambda substring: substring in string, substrings))
@ -13,6 +14,7 @@ def any_of_in(substrings: Iterable[str], string: str) -> bool:
def flatten(arr: Iterable[list[T]]) -> list[T]: def flatten(arr: Iterable[list[T]]) -> list[T]:
return sum(arr, []) return sum(arr, [])
def unfold_house_ranges(token: str) -> list[str]: def unfold_house_ranges(token: str) -> list[str]:
addresses = [] addresses = []
pairs_strings = re.findall(r"([\d]+-[\d]+)", token) pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
@ -42,4 +44,4 @@ def create_token(obj: str = "", token_class: str = ""):
"obj": obj, "obj": obj,
"class": token_class, "class": token_class,
} }
) )

View File

@ -1,9 +1,9 @@
from typing import Optional from typing import Optional
from .lenenergo import LenenergoParser
from .building_id import concurrent_fetch_builing_ids
from .preprocess import preprocess_df
from .address import split_addresses from .address import split_addresses
from .building_id import concurrent_fetch_builing_ids
from .lenenergo import LenenergoParser
from .preprocess import preprocess_df
def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser: def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser:

View File

@ -1,10 +1,10 @@
from .config import ( from .config import (
DB_URL,
POSTGRES_DB, POSTGRES_DB,
POSTGRES_HOST, POSTGRES_HOST,
POSTGRES_PASSWORD, POSTGRES_PASSWORD,
POSTGRES_PORT, POSTGRES_PORT,
POSTGRES_USER, POSTGRES_USER,
DB_URL,
) )
db_credentials = {"conninfo": DB_URL} db_credentials = {"conninfo": DB_URL}