Format code, renamed modules
This commit is contained in:
parent
4e619ae414
commit
d53ffda0f2
31
parser/__init__.py
Normal file
31
parser/__init__.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from .address import split_addresses
|
||||||
|
from .building_id import (
|
||||||
|
async_fetch_building_ids,
|
||||||
|
concurrent_fetch_builing_ids,
|
||||||
|
fetch_builing_ids,
|
||||||
|
get_building_id,
|
||||||
|
)
|
||||||
|
from .preprocess import (
|
||||||
|
COL_NS,
|
||||||
|
ICOL_NS,
|
||||||
|
group_by_index,
|
||||||
|
preprocess_df,
|
||||||
|
preprocess_read_df,
|
||||||
|
)
|
||||||
|
from .rosseti import RossetiParser
|
||||||
|
from .util import pipeline
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
"RossetiParser",
|
||||||
|
"split_addresses",
|
||||||
|
"get_building_id",
|
||||||
|
"fetch_builing_ids",
|
||||||
|
"async_fetch_building_ids",
|
||||||
|
"concurrent_fetch_builing_ids",
|
||||||
|
"preprocess_df",
|
||||||
|
"COL_NS",
|
||||||
|
"ICOL_NS",
|
||||||
|
"preprocess_read_df",
|
||||||
|
"group_by_index",
|
||||||
|
"pipeline",
|
||||||
|
)
|
@ -1,7 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import schedule
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import schedule
|
||||||
|
|
||||||
from . import pipeline
|
from . import pipeline
|
||||||
|
|
||||||
|
|
||||||
@ -11,7 +12,7 @@ def job():
|
|||||||
|
|
||||||
|
|
||||||
if len(sys.argv) == 2:
|
if len(sys.argv) == 2:
|
||||||
if sys.argv[1] == '-h' or sys.argv[1] == '--help':
|
if sys.argv[1] == "-h" or sys.argv[1] == "--help":
|
||||||
print("python -m parser [<running period in hours>]")
|
print("python -m parser [<running period in hours>]")
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
87
parser/address.py
Normal file
87
parser/address.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Iterable, List, TypeVar
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
STREET_PREFIXES = ("ул.", "бул.", "пр.", "ул", "бул", "пр", "ш.", "ш", "пер.", "пер")
|
||||||
|
HOUSES_PREFIXES = ("д.", "д")
|
||||||
|
|
||||||
|
|
||||||
|
def unfold_house_ranges(token: str) -> str:
|
||||||
|
pairs_strings = re.findall(r"([\d]+-[\d]+)", token)
|
||||||
|
for pair_string in pairs_strings:
|
||||||
|
a, b = pair_string.split("-")
|
||||||
|
a, b = int(a), int(b)
|
||||||
|
|
||||||
|
if b > a:
|
||||||
|
token = token.replace(pair_string, ", ".join(map(str, range(a, b + 1))))
|
||||||
|
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def unfold_houses_list(token: str) -> List[str]:
|
||||||
|
token = unfold_house_ranges(token)
|
||||||
|
|
||||||
|
reg = re.compile(r"(д|д\.)? ?\d+[а-яА-Я\/]*\d*(,|$| )")
|
||||||
|
|
||||||
|
if len(re.findall(reg, token)) > 1:
|
||||||
|
tokens = token.split(",")
|
||||||
|
return [*[tokens[0] + " " + house_token for house_token in tokens[1:]]]
|
||||||
|
return [token]
|
||||||
|
|
||||||
|
|
||||||
|
def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
||||||
|
return any(map(lambda substring: substring in string, substrings))
|
||||||
|
|
||||||
|
|
||||||
|
def flatten(arr: Iterable[List[T]]) -> List[T]:
|
||||||
|
return sum(arr, [])
|
||||||
|
|
||||||
|
|
||||||
|
def split_address(address: str) -> List[str]:
|
||||||
|
if ";" in address:
|
||||||
|
return flatten(map(unfold_houses_list, address.split(";")))
|
||||||
|
elif "," in address:
|
||||||
|
tokens = re.split(r"(,)", address)
|
||||||
|
|
||||||
|
tokens = list(map(str.strip, filter(lambda token: token != "", tokens)))
|
||||||
|
|
||||||
|
res = []
|
||||||
|
accumulator = ""
|
||||||
|
|
||||||
|
for i in range(len(tokens)):
|
||||||
|
if any_of_in(STREET_PREFIXES, tokens[i].lower()) and any_of_in(
|
||||||
|
STREET_PREFIXES, accumulator.lower()
|
||||||
|
):
|
||||||
|
res += unfold_houses_list(accumulator)
|
||||||
|
accumulator = ""
|
||||||
|
|
||||||
|
accumulator += tokens[i]
|
||||||
|
|
||||||
|
res += unfold_houses_list(accumulator)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
return [address]
|
||||||
|
|
||||||
|
|
||||||
|
def process_row(row: pd.Series[str]) -> pd.Series[str]:
|
||||||
|
row = row.copy()
|
||||||
|
|
||||||
|
if pd.isnull(row["Улица"]):
|
||||||
|
row["Улица"] = [None]
|
||||||
|
else:
|
||||||
|
addresses = split_address(row["Улица"])
|
||||||
|
row["Улица"] = addresses
|
||||||
|
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def split_addresses(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
merged_df = df.apply(process_row, axis=1).reset_index()
|
||||||
|
|
||||||
|
return merged_df.explode("Улица", ignore_index=True)
|
73
parser/building_id.py
Normal file
73
parser/building_id.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
||||||
|
|
||||||
|
|
||||||
|
def get_building_id(street: str) -> GeoTupleType:
|
||||||
|
if pd.isnull(street):
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
"https://geocode.gate.petersburg.ru/parse/eas",
|
||||||
|
params={"street": street},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
res = r.json()
|
||||||
|
|
||||||
|
if "error" in res:
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
return res["Building_ID"], res["Latitude"], res["Longitude"]
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df[["ID здания", "Широта", "Долгота"]] = df.apply(
|
||||||
|
lambda row: get_building_id(row["Улица"]), axis=1, result_type="expand"
|
||||||
|
)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
async def async_fetch_building_id(
|
||||||
|
session: aiohttp.ClientSession, street: str
|
||||||
|
) -> GeoTupleType:
|
||||||
|
if pd.isnull(street):
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
async with session.get(
|
||||||
|
"https://geocode.gate.petersburg.ru/parse/eas", params={"street": street}
|
||||||
|
) as r:
|
||||||
|
res = await r.json()
|
||||||
|
|
||||||
|
if "error" in res:
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
return res["Building_ID"], res["Latitude"], res["Longitude"]
|
||||||
|
|
||||||
|
|
||||||
|
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
tasks.append(
|
||||||
|
asyncio.ensure_future(async_fetch_building_id(session, row["Улица"]))
|
||||||
|
)
|
||||||
|
|
||||||
|
res = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
df[["ID здания", "Широта", "Долгота"]] = res
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
|
||||||
|
return asyncio.run(async_fetch_building_ids(df))
|
63
parser/preprocess.py
Normal file
63
parser/preprocess.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, List
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
COL_NS = {
|
||||||
|
"region": "Регион РФ (область, край, город фед. значения, округ)",
|
||||||
|
"area": "Административный район",
|
||||||
|
"town": "Населённый пункт",
|
||||||
|
"street": "Улица",
|
||||||
|
"start_date": "Плановая дата начала отключения электроснабжения",
|
||||||
|
"start_time": "Плановое время начала отключения электроснабжения",
|
||||||
|
"finish_date": "Плановая дата восстановления отключения электроснабжения",
|
||||||
|
"finish_time": "Плановое время восстановления отключения электроснабжения",
|
||||||
|
"branch": "Филиал",
|
||||||
|
"res": "РЭС",
|
||||||
|
"comment": "Комментарий",
|
||||||
|
"building_id": "ID здания",
|
||||||
|
"lat": "Широта",
|
||||||
|
"lng": "Долгота",
|
||||||
|
}
|
||||||
|
|
||||||
|
ICOL_NS = dict(map(reversed, COL_NS.items()))
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df.rename(columns=ICOL_NS, inplace=True)
|
||||||
|
|
||||||
|
for a in ("start", "finish"):
|
||||||
|
df[f"{a}"] = pd.to_datetime(
|
||||||
|
df[f"{a}_date"].astype(str) + " " + df[f"{a}_time"].astype(str),
|
||||||
|
dayfirst=True,
|
||||||
|
)
|
||||||
|
df.drop(columns=[f"{a}_date", f"{a}_time"], inplace=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_read_df(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
for name in ("start", "finish"):
|
||||||
|
df[name] = pd.to_datetime(df[name])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def join_columns(col: pd.Series[Any]) -> List[Any] | Any:
|
||||||
|
first = col.iloc[0]
|
||||||
|
|
||||||
|
if col.name in ("street", "building_id", "lat", "lng") and pd.notnull(first):
|
||||||
|
return list(col)
|
||||||
|
|
||||||
|
return first
|
||||||
|
|
||||||
|
|
||||||
|
def group_by_index(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
groupped = df.groupby("index")
|
||||||
|
|
||||||
|
res_df = groupped.apply(lambda index_df: index_df.apply(join_columns)).drop(
|
||||||
|
columns="index"
|
||||||
|
)
|
||||||
|
|
||||||
|
return res_df
|
@ -1,14 +1,16 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
import io
|
import io
|
||||||
from typing import Mapping, Optional, Tuple, no_type_check
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Mapping, Optional, Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import requests
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class RossetiParser:
|
class RossetiParser:
|
||||||
def __init__(self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None) -> None:
|
def __init__(
|
||||||
|
self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None
|
||||||
|
) -> None:
|
||||||
self.base_url = "https://rosseti-lenenergo.ru/planned_work"
|
self.base_url = "https://rosseti-lenenergo.ru/planned_work"
|
||||||
|
|
||||||
if today is None:
|
if today is None:
|
||||||
@ -34,52 +36,51 @@ class RossetiParser:
|
|||||||
date_start = self.__format_date(today)
|
date_start = self.__format_date(today)
|
||||||
date_finish = self.__format_date(today + timedelta(days=ndays))
|
date_finish = self.__format_date(today + timedelta(days=ndays))
|
||||||
|
|
||||||
return {
|
return {"date_start": date_start, "date_finish": date_finish}
|
||||||
'date_start': date_start,
|
|
||||||
'date_finish': date_finish
|
|
||||||
}
|
|
||||||
|
|
||||||
def __get_page(self, url: str, params: Mapping[str, str]) -> None:
|
def __get_page(self, url: str, params: Mapping[str, str]) -> None:
|
||||||
r = requests.get(url, params)
|
r = requests.get(url, params)
|
||||||
|
|
||||||
self.soup = BeautifulSoup(r.text, features='html.parser')
|
self.soup = BeautifulSoup(r.text, features="html.parser")
|
||||||
|
|
||||||
def __parse_nav(self) -> Tuple[str, str]:
|
def __parse_nav(self) -> Tuple[str, str]:
|
||||||
navigator = self.soup.find('span', attrs={'class': 'page-nav-i'})
|
navigator = self.soup.find("span", attrs={"class": "page-nav-i"})
|
||||||
|
|
||||||
next_uri = navigator.find('a', attrs={'class': 'next'})['href']
|
next_uri = navigator.find("a", attrs={"class": "next"})["href"]
|
||||||
last_uri = navigator.find_all('a')[-1]['href']
|
last_uri = navigator.find_all("a")[-1]["href"]
|
||||||
|
|
||||||
return next_uri, last_uri
|
return next_uri, last_uri
|
||||||
|
|
||||||
def __parse_table(self) -> pd.DataFrame:
|
def __parse_table(self) -> pd.DataFrame:
|
||||||
table = self.soup.find(
|
table = self.soup.find("table", attrs={"class": "tableous_facts funds"})
|
||||||
'table', attrs={'class': 'tableous_facts funds'})
|
|
||||||
|
|
||||||
return pd.read_html(io.StringIO(str(table)))[0]
|
return pd.read_html(io.StringIO(str(table)))[0]
|
||||||
|
|
||||||
def __save_page(self, uri: str) -> None:
|
def __save_page(self, uri: str) -> None:
|
||||||
print(f'Processing page "{uri}"')
|
print(f'Processing page "{uri}"')
|
||||||
self.__get_page(self.base_url + uri, self.__params)
|
self.__get_page(self.base_url + uri, self.__params)
|
||||||
self.df = pd.concat(
|
self.df = pd.concat((self.df, self.__parse_table()), ignore_index=True)
|
||||||
(self.df, self.__parse_table()), ignore_index=True)
|
|
||||||
|
|
||||||
def __set_columns(self) -> None:
|
def __set_columns(self) -> None:
|
||||||
self.df.columns = pd.Index((
|
self.df.columns = pd.Index(
|
||||||
"Регион РФ (область, край, город фед. значения, округ)",
|
(
|
||||||
"Административный район",
|
"Регион РФ (область, край, город фед. значения, округ)",
|
||||||
"Населённый пункт",
|
"Административный район",
|
||||||
"Улица",
|
"Населённый пункт",
|
||||||
"Плановая дата начала отключения электроснабжения",
|
"Улица",
|
||||||
"Плановое время начала отключения электроснабжения",
|
"Плановая дата начала отключения электроснабжения",
|
||||||
"Плановая дата восстановления отключения электроснабжения",
|
"Плановое время начала отключения электроснабжения",
|
||||||
"Плановое время восстановления отключения электроснабжения",
|
"Плановая дата восстановления отключения электроснабжения",
|
||||||
"Филиал",
|
"Плановое время восстановления отключения электроснабжения",
|
||||||
"РЭС",
|
"Филиал",
|
||||||
"Комментарий",
|
"РЭС",
|
||||||
))
|
"Комментарий",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def fetch(self, ndays: Optional[int] = None, today: Optional[datetime] = None) -> None:
|
def fetch(
|
||||||
|
self, ndays: Optional[int] = None, today: Optional[datetime] = None
|
||||||
|
) -> None:
|
||||||
if ndays is None:
|
if ndays is None:
|
||||||
ndays = self.ndays
|
ndays = self.ndays
|
||||||
if today is None:
|
if today is None:
|
||||||
@ -87,7 +88,7 @@ class RossetiParser:
|
|||||||
|
|
||||||
self.__params = self.__compose_date_params(ndays, today)
|
self.__params = self.__compose_date_params(ndays, today)
|
||||||
|
|
||||||
self.__save_page('')
|
self.__save_page("")
|
||||||
|
|
||||||
next_uri, last_uri = self.__parse_nav()
|
next_uri, last_uri = self.__parse_nav()
|
||||||
|
|
@ -1,6 +1,11 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df
|
from . import (
|
||||||
|
RossetiParser,
|
||||||
|
concurrent_fetch_builing_ids,
|
||||||
|
preprocess_df,
|
||||||
|
split_addresses,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
@ -1,9 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
REFETCH_PERIOD_H = int(os.environ.get('REFETCH_PERIOD_H', '4'))
|
|
||||||
|
|
||||||
POSTGRES_USER = os.environ.get('POSTGRES_USER', 'rosseti')
|
|
||||||
POSTGRES_PASSWORD = os.environ.get('POSTGRES_PASSWORD', 'rosseti')
|
|
||||||
POSTGRES_DB = os.environ.get('POSTGRES_DB', 'rosseti')
|
|
||||||
POSTGRES_HOST = os.environ.get('POSTGRES_HOST', 'localhost')
|
|
||||||
POSTGRES_PORT = int(os.environ.get('POSTGRES_PORT', '5432'))
|
|
@ -1,5 +0,0 @@
|
|||||||
from .rosseti import RossetiParser
|
|
||||||
from .address import split_addresses
|
|
||||||
from .building_id import get_building_id, fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids
|
|
||||||
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
|
||||||
from .util import pipeline
|
|
@ -1,88 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
from typing import List, Iterable, TypeVar, Any
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import re
|
|
||||||
|
|
||||||
T = TypeVar('T')
|
|
||||||
|
|
||||||
STREET_PREFIXES = ('ул.', 'бул.', 'пр.', 'ул', 'бул',
|
|
||||||
'пр', 'ш.', 'ш', 'пер.', 'пер')
|
|
||||||
HOUSES_PREFIXES = ('д.', 'д')
|
|
||||||
|
|
||||||
|
|
||||||
def unfold_house_ranges(token: str) -> str:
|
|
||||||
pairs_strings = re.findall(r'([\d]+-[\d]+)', token)
|
|
||||||
for pair_string in pairs_strings:
|
|
||||||
a, b = pair_string.split('-')
|
|
||||||
a, b = int(a), int(b)
|
|
||||||
|
|
||||||
if b > a:
|
|
||||||
token = token.replace(
|
|
||||||
pair_string, ', '.join(map(str, range(a, b+1))))
|
|
||||||
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def unfold_houses_list(token: str) -> List[str]:
|
|
||||||
token = unfold_house_ranges(token)
|
|
||||||
|
|
||||||
reg = re.compile(r'(д|д\.)? ?\d+[а-яА-Я\/]*\d*(,|$| )')
|
|
||||||
|
|
||||||
if len(re.findall(reg, token)) > 1:
|
|
||||||
tokens = token.split(',')
|
|
||||||
return [*[tokens[0] + ' ' + house_token for house_token in tokens[1:]]]
|
|
||||||
return [token]
|
|
||||||
|
|
||||||
|
|
||||||
def any_of_in(substrings: Iterable[str], string: str) -> bool:
|
|
||||||
return any(map(lambda substring: substring in string, substrings))
|
|
||||||
|
|
||||||
|
|
||||||
def flatten(arr: Iterable[List[T]]) -> List[T]:
|
|
||||||
return sum(arr, [])
|
|
||||||
|
|
||||||
|
|
||||||
def split_address(address: str) -> List[str]:
|
|
||||||
if ';' in address:
|
|
||||||
return flatten(map(unfold_houses_list, address.split(';')))
|
|
||||||
elif ',' in address:
|
|
||||||
tokens = re.split(r'(,)', address)
|
|
||||||
|
|
||||||
tokens = list(map(str.strip, filter(
|
|
||||||
lambda token: token != '', tokens)))
|
|
||||||
|
|
||||||
res = []
|
|
||||||
accumulator = ''
|
|
||||||
|
|
||||||
for i in range(len(tokens)):
|
|
||||||
if (any_of_in(STREET_PREFIXES, tokens[i].lower()) and
|
|
||||||
any_of_in(STREET_PREFIXES, accumulator.lower())):
|
|
||||||
res += unfold_houses_list(accumulator)
|
|
||||||
accumulator = ''
|
|
||||||
|
|
||||||
accumulator += tokens[i]
|
|
||||||
|
|
||||||
res += unfold_houses_list(accumulator)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
return [address]
|
|
||||||
|
|
||||||
|
|
||||||
def process_row(row: pd.Series[str]) -> pd.Series[str]:
|
|
||||||
row = row.copy()
|
|
||||||
|
|
||||||
if pd.isnull(row['Улица']):
|
|
||||||
row['Улица'] = [None]
|
|
||||||
else:
|
|
||||||
addresses = split_address(row['Улица'])
|
|
||||||
row['Улица'] = addresses
|
|
||||||
|
|
||||||
return row
|
|
||||||
|
|
||||||
|
|
||||||
def split_addresses(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
merged_df = df.apply(process_row, axis=1).reset_index()
|
|
||||||
|
|
||||||
return merged_df.explode('Улица', ignore_index=True)
|
|
@ -1,72 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
from typing import Optional, Tuple, Any, List
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import asyncio
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
|
||||||
|
|
||||||
|
|
||||||
def get_building_id(street: str) -> GeoTupleType:
|
|
||||||
if pd.isnull(street):
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
r = requests.get('https://geocode.gate.petersburg.ru/parse/eas', params={
|
|
||||||
'street': street
|
|
||||||
}, timeout=10)
|
|
||||||
|
|
||||||
res = r.json()
|
|
||||||
|
|
||||||
if 'error' in res:
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
return res['Building_ID'], res['Latitude'], res['Longitude']
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
df[['ID здания', 'Широта', 'Долгота']] = df.apply(
|
|
||||||
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType:
|
|
||||||
if pd.isnull(street):
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={
|
|
||||||
'street': street
|
|
||||||
}) as r:
|
|
||||||
res = await r.json()
|
|
||||||
|
|
||||||
if 'error' in res:
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
return res['Building_ID'], res['Latitude'], res['Longitude']
|
|
||||||
|
|
||||||
|
|
||||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
tasks = []
|
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
tasks.append(
|
|
||||||
asyncio.ensure_future(
|
|
||||||
async_fetch_building_id(session, row['Улица'])
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
res = await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
df[['ID здания', 'Широта', 'Долгота']] = res
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
|
|
||||||
return asyncio.run(
|
|
||||||
async_fetch_building_ids(df)
|
|
||||||
)
|
|
@ -1,62 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
from typing import Any, List
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
COL_NS = {
|
|
||||||
'region': 'Регион РФ (область, край, город фед. значения, округ)',
|
|
||||||
'area': 'Административный район',
|
|
||||||
'town': 'Населённый пункт',
|
|
||||||
'street': 'Улица',
|
|
||||||
'start_date': 'Плановая дата начала отключения электроснабжения',
|
|
||||||
'start_time': 'Плановое время начала отключения электроснабжения',
|
|
||||||
'finish_date': 'Плановая дата восстановления отключения электроснабжения',
|
|
||||||
'finish_time': 'Плановое время восстановления отключения электроснабжения',
|
|
||||||
'branch': 'Филиал',
|
|
||||||
'res': 'РЭС',
|
|
||||||
'comment': 'Комментарий',
|
|
||||||
'building_id': 'ID здания',
|
|
||||||
'lat': 'Широта',
|
|
||||||
'lng': 'Долгота'
|
|
||||||
}
|
|
||||||
|
|
||||||
ICOL_NS = dict(map(reversed, COL_NS.items()))
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
df.rename(columns=ICOL_NS, inplace=True)
|
|
||||||
|
|
||||||
for a in ('start', 'finish'):
|
|
||||||
df[f'{a}'] = pd.to_datetime(
|
|
||||||
df[f'{a}_date'].astype(str) + ' ' + df[f'{a}_time'].astype(str),
|
|
||||||
dayfirst=True
|
|
||||||
)
|
|
||||||
df.drop(columns=[f'{a}_date', f'{a}_time'], inplace=True)
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_read_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
for name in ('start', 'finish'):
|
|
||||||
df[name] = pd.to_datetime(df[name])
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def join_columns(col: pd.Series[Any]) -> List[Any] | Any:
|
|
||||||
first = col.iloc[0]
|
|
||||||
|
|
||||||
if col.name in ('street', 'building_id', 'lat', 'lng') and pd.notnull(first):
|
|
||||||
return list(col)
|
|
||||||
|
|
||||||
return first
|
|
||||||
|
|
||||||
|
|
||||||
def group_by_index(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
groupped = df.groupby('index')
|
|
||||||
|
|
||||||
res_df = groupped.apply(
|
|
||||||
lambda index_df: index_df.apply(join_columns)
|
|
||||||
).drop(columns='index')
|
|
||||||
|
|
||||||
return res_df
|
|
9
runner/config.py
Normal file
9
runner/config.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4"))
|
||||||
|
|
||||||
|
POSTGRES_USER = os.environ.get("POSTGRES_USER", "rosseti")
|
||||||
|
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD", "rosseti")
|
||||||
|
POSTGRES_DB = os.environ.get("POSTGRES_DB", "rosseti")
|
||||||
|
POSTGRES_HOST = os.environ.get("POSTGRES_HOST", "localhost")
|
||||||
|
POSTGRES_PORT = int(os.environ.get("POSTGRES_PORT", "5432"))
|
@ -1,15 +1,12 @@
|
|||||||
from typing import List, Optional
|
|
||||||
from functools import reduce
|
|
||||||
import datetime
|
import datetime
|
||||||
|
from functools import reduce
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from sqlalchemy import func, True_
|
from parser import get_building_id
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy.sql import operators
|
|
||||||
from sqlalchemy.sql.expression import BinaryExpression
|
from sqlalchemy.sql.expression import BinaryExpression
|
||||||
|
|
||||||
from rosseti_parser import get_building_id
|
|
||||||
|
|
||||||
from . import models, schemas
|
from . import models, schemas
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +33,7 @@ def create_record(db: Session, record: schemas.Record):
|
|||||||
|
|
||||||
|
|
||||||
def contains_lower(name, val):
|
def contains_lower(name, val):
|
||||||
if type(val) == str:
|
if isinstance(val, str):
|
||||||
return getattr(models.Record, name).icontains(val)
|
return getattr(models.Record, name).icontains(val)
|
||||||
else:
|
else:
|
||||||
return getattr(models.Record, name) == val
|
return getattr(models.Record, name) == val
|
||||||
@ -53,15 +50,17 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec
|
|||||||
query = None
|
query = None
|
||||||
|
|
||||||
if filters.start:
|
if filters.start:
|
||||||
query = (models.Record.start <= filters.start)
|
query = models.Record.start <= filters.start
|
||||||
if filters.finish:
|
if filters.finish:
|
||||||
query = and_if_can(models.Record.finish >= filters.finish, query)
|
query = and_if_can(models.Record.finish >= filters.finish, query)
|
||||||
|
|
||||||
filters = list(
|
filters = list(
|
||||||
filter(lambda x: x[1] is not None and x[0] not in ('start, finish'), filters))
|
filter(lambda x: x[1] is not None and x[0] not in ("start, finish"), filters)
|
||||||
|
)
|
||||||
|
|
||||||
query = reduce(lambda acc, ftr: and_if_can(
|
query = reduce(
|
||||||
contains_lower(*ftr), acc), filters, query)
|
lambda acc, ftr: and_if_can(contains_lower(*ftr), acc), filters, query
|
||||||
|
)
|
||||||
|
|
||||||
if query is None:
|
if query is None:
|
||||||
res = db.query(models.Record).all()
|
res = db.query(models.Record).all()
|
||||||
@ -74,14 +73,11 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec
|
|||||||
def search_all(db: Session, prompt: str) -> List[schemas.Record]:
|
def search_all(db: Session, prompt: str) -> List[schemas.Record]:
|
||||||
prompt = prompt.strip()
|
prompt = prompt.strip()
|
||||||
|
|
||||||
query = reduce(lambda acc, name: acc | contains_lower(name, prompt), (
|
query = reduce(
|
||||||
'region',
|
lambda acc, name: acc | contains_lower(name, prompt),
|
||||||
'area',
|
("region", "area", "town", "street", "branch", "res"),
|
||||||
'town',
|
contains_lower("comment", prompt),
|
||||||
'street',
|
)
|
||||||
'branch',
|
|
||||||
'res'
|
|
||||||
), contains_lower('comment', prompt))
|
|
||||||
|
|
||||||
building_id, *_ = get_building_id(prompt)
|
building_id, *_ = get_building_id(prompt)
|
||||||
|
|
||||||
@ -95,24 +91,19 @@ def search_all(db: Session, prompt: str) -> List[schemas.Record]:
|
|||||||
|
|
||||||
def check_outage(db: Session, building_id: int) -> schemas.CheckResponse:
|
def check_outage(db: Session, building_id: int) -> schemas.CheckResponse:
|
||||||
building_query = db.query(models.Record).filter(
|
building_query = db.query(models.Record).filter(
|
||||||
(models.Record.building_id == building_id))
|
(models.Record.building_id == building_id)
|
||||||
|
)
|
||||||
|
|
||||||
if building_query.count() == 0:
|
if building_query.count() == 0:
|
||||||
raise HTTPException(404, 'No such building')
|
raise HTTPException(404, "No such building")
|
||||||
|
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
|
||||||
res = building_query.filter(
|
res = building_query.filter(
|
||||||
(models.Record.start <= now) &
|
(models.Record.start <= now) & (now <= models.Record.finish)
|
||||||
(now <= models.Record.finish)
|
|
||||||
).first()
|
).first()
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
return {
|
return {"is_outage": False}
|
||||||
'is_outage': False
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {"is_outage": True, "when_finish": res.finish}
|
||||||
'is_outage': True,
|
|
||||||
'when_finish': res.finish
|
|
||||||
}
|
|
@ -1,11 +1,16 @@
|
|||||||
from typing import Generator
|
from typing import Generator
|
||||||
import os
|
|
||||||
|
|
||||||
from sqlalchemy import create_engine, URL
|
from sqlalchemy import URL, create_engine
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from sqlalchemy.orm import sessionmaker, Session
|
from sqlalchemy.orm import Session, sessionmaker
|
||||||
|
|
||||||
from .config import POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST, POSTGRES_PORT, POSTGRES_DB
|
from .config import (
|
||||||
|
POSTGRES_DB,
|
||||||
|
POSTGRES_HOST,
|
||||||
|
POSTGRES_PASSWORD,
|
||||||
|
POSTGRES_PORT,
|
||||||
|
POSTGRES_USER,
|
||||||
|
)
|
||||||
|
|
||||||
engine = create_engine(
|
engine = create_engine(
|
||||||
URL.create(
|
URL.create(
|
||||||
@ -16,12 +21,13 @@ engine = create_engine(
|
|||||||
port=POSTGRES_PORT,
|
port=POSTGRES_PORT,
|
||||||
database=POSTGRES_DB,
|
database=POSTGRES_DB,
|
||||||
),
|
),
|
||||||
client_encoding='utf8',
|
client_encoding="utf8",
|
||||||
)
|
)
|
||||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
# Dependency
|
# Dependency
|
||||||
def get_db() -> Generator[Session, None, None]:
|
def get_db() -> Generator[Session, None, None]:
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
@ -1,13 +1,10 @@
|
|||||||
from rosseti_parser import pipeline, preprocess_read_df
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import logging
|
|
||||||
|
|
||||||
from .database import get_db
|
import pandas as pd
|
||||||
|
from parser import pipeline
|
||||||
|
|
||||||
from . import models
|
from . import models
|
||||||
|
from .database import get_db
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
|
|
||||||
def job():
|
def job():
|
@ -1,14 +1,13 @@
|
|||||||
from contextlib import asynccontextmanager
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from fastapi import FastAPI
|
|
||||||
import schedule
|
import schedule
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
from . import models, router, schemas
|
from . import models, router, schemas
|
||||||
from .database import engine
|
|
||||||
from .scheduler import run_continuously, run_threaded
|
|
||||||
from .job import job
|
|
||||||
from .config import REFETCH_PERIOD_H
|
from .config import REFETCH_PERIOD_H
|
||||||
|
from .database import engine
|
||||||
|
from .job import job
|
||||||
|
from .scheduler import run_continuously, run_threaded
|
||||||
|
|
||||||
models.Base.metadata.create_all(bind=engine)
|
models.Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
@ -25,13 +24,12 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
stop_run_continuously()
|
stop_run_continuously()
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
app.include_router(router.router)
|
app.include_router(router.router)
|
||||||
|
|
||||||
|
|
||||||
@app.get('/', response_model=schemas.Healthcheck)
|
@app.get("/", response_model=schemas.Healthcheck)
|
||||||
def Healthcheck():
|
def Healthcheck():
|
||||||
return {
|
return {"up_since": start_stamp}
|
||||||
"up_since": start_stamp
|
|
||||||
}
|
|
@ -1,11 +1,10 @@
|
|||||||
from sqlalchemy import Boolean, Column, Integer, String, DateTime, Float
|
from sqlalchemy import Column, DateTime, Float, Integer, String
|
||||||
from sqlalchemy.orm import relationship
|
|
||||||
|
|
||||||
from .database import Base
|
from .database import Base
|
||||||
|
|
||||||
|
|
||||||
class Record(Base):
|
class Record(Base):
|
||||||
__tablename__ = 'records'
|
__tablename__ = "records"
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
index = Column(Integer)
|
index = Column(Integer)
|
@ -1,19 +1,17 @@
|
|||||||
from fastapi import HTTPException, Depends
|
from typing import Annotated, List
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from typing import List, Annotated
|
|
||||||
|
|
||||||
from fastapi import APIRouter
|
from . import controller, schemas
|
||||||
|
from .database import get_db
|
||||||
|
|
||||||
from . import models, schemas, controller
|
router = APIRouter(prefix="/api")
|
||||||
from .database import SessionLocal, get_db
|
|
||||||
|
|
||||||
router = APIRouter(prefix='/api')
|
|
||||||
|
|
||||||
|
|
||||||
@router.get('/list', response_model=List[schemas.Record], summary="Search by filters")
|
@router.get("/list", response_model=List[schemas.Record], summary="Search by filters")
|
||||||
def list_rows(
|
def list_rows(
|
||||||
filters: Annotated[schemas.RecordRequest, Depends()],
|
filters: Annotated[schemas.RecordRequest, Depends()], db: Session = Depends(get_db)
|
||||||
db: Session = Depends(get_db)
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Searches rows with specified filters.
|
Searches rows with specified filters.
|
||||||
@ -40,7 +38,7 @@ def list_rows(
|
|||||||
return controller.search_each(db, filters)
|
return controller.search_each(db, filters)
|
||||||
|
|
||||||
|
|
||||||
@router.get('/search', response_model=List[schemas.Record], summary="Search by query")
|
@router.get("/search", response_model=List[schemas.Record], summary="Search by query")
|
||||||
def search_rows(query: str, db: Session = Depends(get_db)):
|
def search_rows(query: str, db: Session = Depends(get_db)):
|
||||||
"""
|
"""
|
||||||
Selects rows with cells containing case insensitive prompt as its part.
|
Selects rows with cells containing case insensitive prompt as its part.
|
||||||
@ -58,7 +56,9 @@ def search_rows(query: str, db: Session = Depends(get_db)):
|
|||||||
return controller.search_all(db, query)
|
return controller.search_all(db, query)
|
||||||
|
|
||||||
|
|
||||||
@router.get('/check', response_model=schemas.CheckResponse, summary="Check when outage ends")
|
@router.get(
|
||||||
|
"/check", response_model=schemas.CheckResponse, summary="Check when outage ends"
|
||||||
|
)
|
||||||
def check(building_id: int, db: Session = Depends(get_db)):
|
def check(building_id: int, db: Session = Depends(get_db)):
|
||||||
"""
|
"""
|
||||||
Checks if there is an active outage for building_id and if there is, also returns when will it end
|
Checks if there is an active outage for building_id and if there is, also returns when will it end
|
||||||
@ -66,7 +66,7 @@ def check(building_id: int, db: Session = Depends(get_db)):
|
|||||||
return controller.check_outage(db, building_id)
|
return controller.check_outage(db, building_id)
|
||||||
|
|
||||||
|
|
||||||
@router.put('/create', response_model=schemas.Record)
|
@router.put("/create", response_model=schemas.Record)
|
||||||
def create_record(record: schemas.RecordCreate, db: Session = Depends(get_db)):
|
def create_record(record: schemas.RecordCreate, db: Session = Depends(get_db)):
|
||||||
"""
|
"""
|
||||||
Not for public usage
|
Not for public usage
|
@ -38,5 +38,6 @@ class CheckResponse(BaseModel):
|
|||||||
is_outage: bool
|
is_outage: bool
|
||||||
when_finish: Optional[datetime.datetime] = None
|
when_finish: Optional[datetime.datetime] = None
|
||||||
|
|
||||||
|
|
||||||
class Healthcheck(BaseModel):
|
class Healthcheck(BaseModel):
|
||||||
up_since: datetime.datetime
|
up_since: datetime.datetime
|
Loading…
x
Reference in New Issue
Block a user