Code formatting
This commit is contained in:
parent
931ff1270b
commit
3bd1deb8db
@ -92,9 +92,12 @@ def find_room(token: pd.Series, pre_token: pd.Series) -> str:
|
|||||||
def find_litera(token: pd.Series, pre_token: pd.Series) -> str:
|
def find_litera(token: pd.Series, pre_token: pd.Series) -> str:
|
||||||
if find_room(token, pre_token):
|
if find_room(token, pre_token):
|
||||||
return ""
|
return ""
|
||||||
if any_of_in(LETTER, token["obj"].lower()) or re.search(
|
# fmt: off
|
||||||
r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"]
|
if (
|
||||||
|
any_of_in(LETTER, token["obj"].lower()) or
|
||||||
|
re.search(r"\d{1,3}([А-Я]|[а-я])( |$)", token["obj"])
|
||||||
):
|
):
|
||||||
|
#fmt: on
|
||||||
return "l"
|
return "l"
|
||||||
if (
|
if (
|
||||||
(
|
(
|
||||||
@ -199,6 +202,7 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
|
|||||||
brackets = re.search(r"\(.+\)", token["obj"])
|
brackets = re.search(r"\(.+\)", token["obj"])
|
||||||
if brackets:
|
if brackets:
|
||||||
token["obj"] = re.sub(r"\(.+\)", "()", token["obj"])
|
token["obj"] = re.sub(r"\(.+\)", "()", token["obj"])
|
||||||
|
|
||||||
token["class"] += find_district(token, pre_token)
|
token["class"] += find_district(token, pre_token)
|
||||||
token["class"] += find_countryside(token, pre_token)
|
token["class"] += find_countryside(token, pre_token)
|
||||||
token["class"] += find_territory(token, pre_token)
|
token["class"] += find_territory(token, pre_token)
|
||||||
@ -208,8 +212,11 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
|
|||||||
token["class"] += find_edifice(token, pre_token)
|
token["class"] += find_edifice(token, pre_token)
|
||||||
token["class"] += find_litera(token, pre_token)
|
token["class"] += find_litera(token, pre_token)
|
||||||
token["class"] += find_room(token, pre_token)
|
token["class"] += find_room(token, pre_token)
|
||||||
|
|
||||||
if token["class"] == "":
|
if token["class"] == "":
|
||||||
token["class"] = "w"
|
token["class"] = "w"
|
||||||
|
|
||||||
if brackets:
|
if brackets:
|
||||||
token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
|
token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
|
||||||
|
|
||||||
return token
|
return token
|
||||||
|
@ -167,9 +167,12 @@ class AddressSplitter(Sequence):
|
|||||||
return list(tokens)
|
return list(tokens)
|
||||||
|
|
||||||
def cut_address(self) -> pd.Series:
|
def cut_address(self) -> pd.Series:
|
||||||
while len(self.accumulator["class"]) > 0 and CLASSES.index(
|
# fmt: off
|
||||||
self.prev_class()
|
while (
|
||||||
) > CLASSES.index(self.next_class()):
|
len(self.accumulator["class"]) > 0
|
||||||
|
and CLASSES.index(self.prev_class()) > CLASSES.index(self.next_class())
|
||||||
|
):
|
||||||
|
# fmt: on
|
||||||
match self.accumulator["class"][-1]:
|
match self.accumulator["class"][-1]:
|
||||||
case "h":
|
case "h":
|
||||||
self.accumulator["addresses"] = re.sub(
|
self.accumulator["addresses"] = re.sub(
|
||||||
|
@ -6,6 +6,7 @@ import pandas as pd
|
|||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
||||||
return any(map(lambda substring: substring in string, substrings))
|
return any(map(lambda substring: substring in string, substrings))
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
|||||||
def flatten(arr: Iterable[list[T]]) -> list[T]:
|
def flatten(arr: Iterable[list[T]]) -> list[T]:
|
||||||
return sum(arr, [])
|
return sum(arr, [])
|
||||||
|
|
||||||
|
|
||||||
def unfold_house_ranges(token: str) -> list[str]:
|
def unfold_house_ranges(token: str) -> list[str]:
|
||||||
addresses = []
|
addresses = []
|
||||||
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
|
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
|
||||||
@ -42,4 +44,4 @@ def create_token(obj: str = "", token_class: str = ""):
|
|||||||
"obj": obj,
|
"obj": obj,
|
||||||
"class": token_class,
|
"class": token_class,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from .lenenergo import LenenergoParser
|
|
||||||
from .building_id import concurrent_fetch_builing_ids
|
|
||||||
from .preprocess import preprocess_df
|
|
||||||
from .address import split_addresses
|
from .address import split_addresses
|
||||||
|
from .building_id import concurrent_fetch_builing_ids
|
||||||
|
from .lenenergo import LenenergoParser
|
||||||
|
from .preprocess import preprocess_df
|
||||||
|
|
||||||
|
|
||||||
def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser:
|
def pipeline(parser: Optional[LenenergoParser] = None) -> LenenergoParser:
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
from .config import (
|
from .config import (
|
||||||
|
DB_URL,
|
||||||
POSTGRES_DB,
|
POSTGRES_DB,
|
||||||
POSTGRES_HOST,
|
POSTGRES_HOST,
|
||||||
POSTGRES_PASSWORD,
|
POSTGRES_PASSWORD,
|
||||||
POSTGRES_PORT,
|
POSTGRES_PORT,
|
||||||
POSTGRES_USER,
|
POSTGRES_USER,
|
||||||
DB_URL,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
db_credentials = {"conninfo": DB_URL}
|
db_credentials = {"conninfo": DB_URL}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user