Code formatting

2023-10-29 15:59:55 +03:00
parent 931ff1270b
commit 3bd1deb8db
5 changed files with 22 additions and 10 deletions
--- a/parser/address/classifier.py
+++ b/parser/address/classifier.py
@ -92,9 +92,12 @@ def find_room(token: pd.Series, pre_token: pd.Series) -> str:
 def find_litera(token: pd.Series, pre_token: pd.Series) -> str:
    if find_room(token, pre_token):
        return ""
-    if any_of_in(LETTER, token["obj"].lower()) or re.search(
-        r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"]
+    # fmt: off
+    if (
+        any_of_in(LETTER, token["obj"].lower()) or
+        re.search(r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"])
    ):
+    #fmt: on
        return "l"
    if (
        (
@ -199,6 +202,7 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
    brackets = re.search(r"\(.+\)", token["obj"])
    if brackets:
        token["obj"] = re.sub(r"\(.+\)", "()", token["obj"])
+
    token["class"] += find_district(token, pre_token)
    token["class"] += find_countryside(token, pre_token)
    token["class"] += find_territory(token, pre_token)
@ -208,8 +212,11 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
    token["class"] += find_edifice(token, pre_token)
    token["class"] += find_litera(token, pre_token)
    token["class"] += find_room(token, pre_token)
+
    if token["class"] == "":
        token["class"] = "w"
+
    if brackets:
        token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
+
    return token
--- a/parser/address/splitter.py
+++ b/parser/address/splitter.py
@ -167,9 +167,12 @@ class AddressSplitter(Sequence):
        return list(tokens)

    def cut_address(self) -> pd.Series:
-        while len(self.accumulator["class"]) > 0 and CLASSES.index(
-            self.prev_class()
-        ) > CLASSES.index(self.next_class()):
+        # fmt: off
+        while (
+            len(self.accumulator["class"]) > 0
+            and CLASSES.index(self.prev_class()) > CLASSES.index(self.next_class())
+        ):
+        # fmt: on
            match self.accumulator["class"][-1]:
                case "h":
                    self.accumulator["addresses"] = re.sub(
--- a/parser/address/utils.py
+++ b/parser/address/utils.py
@ -6,6 +6,7 @@ import pandas as pd

 T = TypeVar("T")

+
 def any_of_in(substrings: Iterable[str], string: str) -> bool:
    return any(map(lambda substring: substring in string, substrings))

@ -13,6 +14,7 @@ def any_of_in(substrings: Iterable[str], string: str) -> bool:
 def flatten(arr: Iterable[list[T]]) -> list[T]:
    return sum(arr, [])

+
 def unfold_house_ranges(token: str) -> list[str]:
    addresses = []
    pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
@ -42,4 +44,4 @@ def create_token(obj: str = "", token_class: str = ""):
            "obj": obj,
            "class": token_class,
        }
-    )
+    )
--- a/parser/pipeline.py
+++ b/parser/pipeline.py
@ -1,9 +1,9 @@
 from typing import Optional

-from .lenenergo import LenenergoParser
-from .building_id import concurrent_fetch_builing_ids
-from .preprocess import preprocess_df
 from .address import split_addresses
+from .building_id import concurrent_fetch_builing_ids
+from .lenenergo import LenenergoParser
+from .preprocess import preprocess_df


 def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser:
--- a/runner/database.py
+++ b/runner/database.py
@ -1,10 +1,10 @@
 from .config import (
+    DB_URL,
    POSTGRES_DB,
    POSTGRES_HOST,
    POSTGRES_PASSWORD,
    POSTGRES_PORT,
    POSTGRES_USER,
-    DB_URL,
 )

 db_credentials = {"conninfo": DB_URL}