diff --git a/rosseti_parser/README.md b/parser/README.md similarity index 100% rename from rosseti_parser/README.md rename to parser/README.md diff --git a/parser/__init__.py b/parser/__init__.py new file mode 100644 index 0000000..8e61371 --- /dev/null +++ b/parser/__init__.py @@ -0,0 +1,31 @@ +from .address import split_addresses +from .building_id import ( + async_fetch_building_ids, + concurrent_fetch_builing_ids, + fetch_builing_ids, + get_building_id, +) +from .preprocess import ( + COL_NS, + ICOL_NS, + group_by_index, + preprocess_df, + preprocess_read_df, +) +from .rosseti import RossetiParser +from .util import pipeline + +__all__ = ( + "RossetiParser", + "split_addresses", + "get_building_id", + "fetch_builing_ids", + "async_fetch_building_ids", + "concurrent_fetch_builing_ids", + "preprocess_df", + "COL_NS", + "ICOL_NS", + "preprocess_read_df", + "group_by_index", + "pipeline", +) diff --git a/rosseti_parser/__main__.py b/parser/__main__.py similarity index 90% rename from rosseti_parser/__main__.py rename to parser/__main__.py index 02291f6..e55c05a 100644 --- a/rosseti_parser/__main__.py +++ b/parser/__main__.py @@ -1,7 +1,8 @@ import sys -import schedule import time +import schedule + from . import pipeline @@ -11,7 +12,7 @@ def job(): if len(sys.argv) == 2: - if sys.argv[1] == '-h' or sys.argv[1] == '--help': + if sys.argv[1] == "-h" or sys.argv[1] == "--help": print("python -m parser []") exit(0) diff --git a/parser/address.py b/parser/address.py new file mode 100644 index 0000000..4b22430 --- /dev/null +++ b/parser/address.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import re +from typing import Iterable, List, TypeVar + +import pandas as pd + +T = TypeVar("T") + +STREET_PREFIXES = ("ул.", "бул.", "пр.", "ул", "бул", "пр", "ш.", "ш", "пер.", "пер") +HOUSES_PREFIXES = ("д.", "д") + + +def unfold_house_ranges(token: str) -> str: + pairs_strings = re.findall(r"([\d]+-[\d]+)", token) + for pair_string in pairs_strings: + a, b = pair_string.split("-") + a, b = int(a), int(b) + + if b > a: + token = token.replace(pair_string, ", ".join(map(str, range(a, b + 1)))) + + return token + + +def unfold_houses_list(token: str) -> List[str]: + token = unfold_house_ranges(token) + + reg = re.compile(r"(д|д\.)? ?\d+[а-яА-Я\/]*\d*(,|$| )") + + if len(re.findall(reg, token)) > 1: + tokens = token.split(",") + return [*[tokens[0] + " " + house_token for house_token in tokens[1:]]] + return [token] + + +def any_of_in(substrings: Iterable[str], string: str) -> bool: + return any(map(lambda substring: substring in string, substrings)) + + +def flatten(arr: Iterable[List[T]]) -> List[T]: + return sum(arr, []) + + +def split_address(address: str) -> List[str]: + if ";" in address: + return flatten(map(unfold_houses_list, address.split(";"))) + elif "," in address: + tokens = re.split(r"(,)", address) + + tokens = list(map(str.strip, filter(lambda token: token != "", tokens))) + + res = [] + accumulator = "" + + for i in range(len(tokens)): + if any_of_in(STREET_PREFIXES, tokens[i].lower()) and any_of_in( + STREET_PREFIXES, accumulator.lower() + ): + res += unfold_houses_list(accumulator) + accumulator = "" + + accumulator += tokens[i] + + res += unfold_houses_list(accumulator) + + return res + + return [address] + + +def process_row(row: pd.Series[str]) -> pd.Series[str]: + row = row.copy() + + if pd.isnull(row["Улица"]): + row["Улица"] = [None] + else: + addresses = split_address(row["Улица"]) + row["Улица"] = addresses + + return row + + +def split_addresses(df: pd.DataFrame) -> pd.DataFrame: + merged_df = df.apply(process_row, axis=1).reset_index() + + return merged_df.explode("Улица", ignore_index=True) diff --git a/parser/building_id.py b/parser/building_id.py new file mode 100644 index 0000000..a18368e --- /dev/null +++ b/parser/building_id.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import asyncio +from typing import Optional, Tuple + +import aiohttp +import pandas as pd +import requests + +GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]] + + +def get_building_id(street: str) -> GeoTupleType: + if pd.isnull(street): + return None, None, None + + r = requests.get( + "https://geocode.gate.petersburg.ru/parse/eas", + params={"street": street}, + timeout=10, + ) + + res = r.json() + + if "error" in res: + return None, None, None + + return res["Building_ID"], res["Latitude"], res["Longitude"] + + +def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame: + df[["ID здания", "Широта", "Долгота"]] = df.apply( + lambda row: get_building_id(row["Улица"]), axis=1, result_type="expand" + ) + + return df + + +async def async_fetch_building_id( + session: aiohttp.ClientSession, street: str +) -> GeoTupleType: + if pd.isnull(street): + return None, None, None + + async with session.get( + "https://geocode.gate.petersburg.ru/parse/eas", params={"street": street} + ) as r: + res = await r.json() + + if "error" in res: + return None, None, None + + return res["Building_ID"], res["Latitude"], res["Longitude"] + + +async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame: + async with aiohttp.ClientSession() as session: + tasks = [] + + for _, row in df.iterrows(): + tasks.append( + asyncio.ensure_future(async_fetch_building_id(session, row["Улица"])) + ) + + res = await asyncio.gather(*tasks) + + df[["ID здания", "Широта", "Долгота"]] = res + + return df + + +def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame: + return asyncio.run(async_fetch_building_ids(df)) diff --git a/parser/preprocess.py b/parser/preprocess.py new file mode 100644 index 0000000..61fa8d6 --- /dev/null +++ b/parser/preprocess.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from typing import Any, List + +import pandas as pd + +COL_NS = { + "region": "Регион РФ (область, край, город фед. значения, округ)", + "area": "Административный район", + "town": "Населённый пункт", + "street": "Улица", + "start_date": "Плановая дата начала отключения электроснабжения", + "start_time": "Плановое время начала отключения электроснабжения", + "finish_date": "Плановая дата восстановления отключения электроснабжения", + "finish_time": "Плановое время восстановления отключения электроснабжения", + "branch": "Филиал", + "res": "РЭС", + "comment": "Комментарий", + "building_id": "ID здания", + "lat": "Широта", + "lng": "Долгота", +} + +ICOL_NS = dict(map(reversed, COL_NS.items())) + + +def preprocess_df(df: pd.DataFrame) -> pd.DataFrame: + df.rename(columns=ICOL_NS, inplace=True) + + for a in ("start", "finish"): + df[f"{a}"] = pd.to_datetime( + df[f"{a}_date"].astype(str) + " " + df[f"{a}_time"].astype(str), + dayfirst=True, + ) + df.drop(columns=[f"{a}_date", f"{a}_time"], inplace=True) + + return df + + +def preprocess_read_df(df: pd.DataFrame) -> pd.DataFrame: + for name in ("start", "finish"): + df[name] = pd.to_datetime(df[name]) + + return df + + +def join_columns(col: pd.Series[Any]) -> List[Any] | Any: + first = col.iloc[0] + + if col.name in ("street", "building_id", "lat", "lng") and pd.notnull(first): + return list(col) + + return first + + +def group_by_index(df: pd.DataFrame) -> pd.DataFrame: + groupped = df.groupby("index") + + res_df = groupped.apply(lambda index_df: index_df.apply(join_columns)).drop( + columns="index" + ) + + return res_df diff --git a/rosseti_parser/rosseti.py b/parser/rosseti.py similarity index 54% rename from rosseti_parser/rosseti.py rename to parser/rosseti.py index ab76ed5..15b0c36 100644 --- a/rosseti_parser/rosseti.py +++ b/parser/rosseti.py @@ -1,14 +1,16 @@ -from datetime import datetime, timedelta import io -from typing import Mapping, Optional, Tuple, no_type_check +from datetime import datetime, timedelta +from typing import Mapping, Optional, Tuple -from bs4 import BeautifulSoup -import requests import pandas as pd +import requests +from bs4 import BeautifulSoup class RossetiParser: - def __init__(self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None) -> None: + def __init__( + self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None + ) -> None: self.base_url = "https://rosseti-lenenergo.ru/planned_work" if today is None: @@ -34,52 +36,51 @@ class RossetiParser: date_start = self.__format_date(today) date_finish = self.__format_date(today + timedelta(days=ndays)) - return { - 'date_start': date_start, - 'date_finish': date_finish - } + return {"date_start": date_start, "date_finish": date_finish} def __get_page(self, url: str, params: Mapping[str, str]) -> None: r = requests.get(url, params) - self.soup = BeautifulSoup(r.text, features='html.parser') + self.soup = BeautifulSoup(r.text, features="html.parser") def __parse_nav(self) -> Tuple[str, str]: - navigator = self.soup.find('span', attrs={'class': 'page-nav-i'}) + navigator = self.soup.find("span", attrs={"class": "page-nav-i"}) - next_uri = navigator.find('a', attrs={'class': 'next'})['href'] - last_uri = navigator.find_all('a')[-1]['href'] + next_uri = navigator.find("a", attrs={"class": "next"})["href"] + last_uri = navigator.find_all("a")[-1]["href"] return next_uri, last_uri def __parse_table(self) -> pd.DataFrame: - table = self.soup.find( - 'table', attrs={'class': 'tableous_facts funds'}) + table = self.soup.find("table", attrs={"class": "tableous_facts funds"}) return pd.read_html(io.StringIO(str(table)))[0] def __save_page(self, uri: str) -> None: print(f'Processing page "{uri}"') self.__get_page(self.base_url + uri, self.__params) - self.df = pd.concat( - (self.df, self.__parse_table()), ignore_index=True) + self.df = pd.concat((self.df, self.__parse_table()), ignore_index=True) def __set_columns(self) -> None: - self.df.columns = pd.Index(( - "Регион РФ (область, край, город фед. значения, округ)", - "Административный район", - "Населённый пункт", - "Улица", - "Плановая дата начала отключения электроснабжения", - "Плановое время начала отключения электроснабжения", - "Плановая дата восстановления отключения электроснабжения", - "Плановое время восстановления отключения электроснабжения", - "Филиал", - "РЭС", - "Комментарий", - )) + self.df.columns = pd.Index( + ( + "Регион РФ (область, край, город фед. значения, округ)", + "Административный район", + "Населённый пункт", + "Улица", + "Плановая дата начала отключения электроснабжения", + "Плановое время начала отключения электроснабжения", + "Плановая дата восстановления отключения электроснабжения", + "Плановое время восстановления отключения электроснабжения", + "Филиал", + "РЭС", + "Комментарий", + ) + ) - def fetch(self, ndays: Optional[int] = None, today: Optional[datetime] = None) -> None: + def fetch( + self, ndays: Optional[int] = None, today: Optional[datetime] = None + ) -> None: if ndays is None: ndays = self.ndays if today is None: @@ -87,7 +88,7 @@ class RossetiParser: self.__params = self.__compose_date_params(ndays, today) - self.__save_page('') + self.__save_page("") next_uri, last_uri = self.__parse_nav() diff --git a/rosseti_parser/util.py b/parser/util.py similarity index 75% rename from rosseti_parser/util.py rename to parser/util.py index e6eb10c..9bba3ae 100644 --- a/rosseti_parser/util.py +++ b/parser/util.py @@ -1,6 +1,11 @@ from typing import Optional -from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df +from . import ( + RossetiParser, + concurrent_fetch_builing_ids, + preprocess_df, + split_addresses, +) def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser: diff --git a/parser_api/config.py b/parser_api/config.py deleted file mode 100644 index ddd8d08..0000000 --- a/parser_api/config.py +++ /dev/null @@ -1,9 +0,0 @@ -import os - -REFETCH_PERIOD_H = int(os.environ.get('REFETCH_PERIOD_H', '4')) - -POSTGRES_USER = os.environ.get('POSTGRES_USER', 'rosseti') -POSTGRES_PASSWORD = os.environ.get('POSTGRES_PASSWORD', 'rosseti') -POSTGRES_DB = os.environ.get('POSTGRES_DB', 'rosseti') -POSTGRES_HOST = os.environ.get('POSTGRES_HOST', 'localhost') -POSTGRES_PORT = int(os.environ.get('POSTGRES_PORT', '5432')) diff --git a/rosseti_parser/__init__.py b/rosseti_parser/__init__.py deleted file mode 100644 index 402d19c..0000000 --- a/rosseti_parser/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .rosseti import RossetiParser -from .address import split_addresses -from .building_id import get_building_id, fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids -from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index -from .util import pipeline diff --git a/rosseti_parser/address.py b/rosseti_parser/address.py deleted file mode 100644 index 9f0cbe7..0000000 --- a/rosseti_parser/address.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import annotations -from typing import List, Iterable, TypeVar, Any - -import pandas as pd -import re - -T = TypeVar('T') - -STREET_PREFIXES = ('ул.', 'бул.', 'пр.', 'ул', 'бул', - 'пр', 'ш.', 'ш', 'пер.', 'пер') -HOUSES_PREFIXES = ('д.', 'д') - - -def unfold_house_ranges(token: str) -> str: - pairs_strings = re.findall(r'([\d]+-[\d]+)', token) - for pair_string in pairs_strings: - a, b = pair_string.split('-') - a, b = int(a), int(b) - - if b > a: - token = token.replace( - pair_string, ', '.join(map(str, range(a, b+1)))) - - return token - - -def unfold_houses_list(token: str) -> List[str]: - token = unfold_house_ranges(token) - - reg = re.compile(r'(д|д\.)? ?\d+[а-яА-Я\/]*\d*(,|$| )') - - if len(re.findall(reg, token)) > 1: - tokens = token.split(',') - return [*[tokens[0] + ' ' + house_token for house_token in tokens[1:]]] - return [token] - - -def any_of_in(substrings: Iterable[str], string: str) -> bool: - return any(map(lambda substring: substring in string, substrings)) - - -def flatten(arr: Iterable[List[T]]) -> List[T]: - return sum(arr, []) - - -def split_address(address: str) -> List[str]: - if ';' in address: - return flatten(map(unfold_houses_list, address.split(';'))) - elif ',' in address: - tokens = re.split(r'(,)', address) - - tokens = list(map(str.strip, filter( - lambda token: token != '', tokens))) - - res = [] - accumulator = '' - - for i in range(len(tokens)): - if (any_of_in(STREET_PREFIXES, tokens[i].lower()) and - any_of_in(STREET_PREFIXES, accumulator.lower())): - res += unfold_houses_list(accumulator) - accumulator = '' - - accumulator += tokens[i] - - res += unfold_houses_list(accumulator) - - return res - - return [address] - - -def process_row(row: pd.Series[str]) -> pd.Series[str]: - row = row.copy() - - if pd.isnull(row['Улица']): - row['Улица'] = [None] - else: - addresses = split_address(row['Улица']) - row['Улица'] = addresses - - return row - - -def split_addresses(df: pd.DataFrame) -> pd.DataFrame: - merged_df = df.apply(process_row, axis=1).reset_index() - - return merged_df.explode('Улица', ignore_index=True) diff --git a/rosseti_parser/building_id.py b/rosseti_parser/building_id.py deleted file mode 100644 index 44410c7..0000000 --- a/rosseti_parser/building_id.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import annotations -from typing import Optional, Tuple, Any, List - -import requests -import pandas as pd -import numpy as np -import asyncio -import aiohttp - -GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]] - - -def get_building_id(street: str) -> GeoTupleType: - if pd.isnull(street): - return None, None, None - - r = requests.get('https://geocode.gate.petersburg.ru/parse/eas', params={ - 'street': street - }, timeout=10) - - res = r.json() - - if 'error' in res: - return None, None, None - - return res['Building_ID'], res['Latitude'], res['Longitude'] - - -def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame: - df[['ID здания', 'Широта', 'Долгота']] = df.apply( - lambda row: get_building_id(row['Улица']), axis=1, result_type='expand') - - return df - - -async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType: - if pd.isnull(street): - return None, None, None - - async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={ - 'street': street - }) as r: - res = await r.json() - - if 'error' in res: - return None, None, None - - return res['Building_ID'], res['Latitude'], res['Longitude'] - - -async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame: - async with aiohttp.ClientSession() as session: - tasks = [] - - for _, row in df.iterrows(): - tasks.append( - asyncio.ensure_future( - async_fetch_building_id(session, row['Улица']) - ) - ) - - res = await asyncio.gather(*tasks) - - df[['ID здания', 'Широта', 'Долгота']] = res - - return df - - -def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame: - return asyncio.run( - async_fetch_building_ids(df) - ) diff --git a/rosseti_parser/preprocess.py b/rosseti_parser/preprocess.py deleted file mode 100644 index 533d24b..0000000 --- a/rosseti_parser/preprocess.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import annotations -from typing import Any, List - -import pandas as pd - -COL_NS = { - 'region': 'Регион РФ (область, край, город фед. значения, округ)', - 'area': 'Административный район', - 'town': 'Населённый пункт', - 'street': 'Улица', - 'start_date': 'Плановая дата начала отключения электроснабжения', - 'start_time': 'Плановое время начала отключения электроснабжения', - 'finish_date': 'Плановая дата восстановления отключения электроснабжения', - 'finish_time': 'Плановое время восстановления отключения электроснабжения', - 'branch': 'Филиал', - 'res': 'РЭС', - 'comment': 'Комментарий', - 'building_id': 'ID здания', - 'lat': 'Широта', - 'lng': 'Долгота' -} - -ICOL_NS = dict(map(reversed, COL_NS.items())) - - -def preprocess_df(df: pd.DataFrame) -> pd.DataFrame: - df.rename(columns=ICOL_NS, inplace=True) - - for a in ('start', 'finish'): - df[f'{a}'] = pd.to_datetime( - df[f'{a}_date'].astype(str) + ' ' + df[f'{a}_time'].astype(str), - dayfirst=True - ) - df.drop(columns=[f'{a}_date', f'{a}_time'], inplace=True) - - return df - - -def preprocess_read_df(df: pd.DataFrame) -> pd.DataFrame: - for name in ('start', 'finish'): - df[name] = pd.to_datetime(df[name]) - - return df - - -def join_columns(col: pd.Series[Any]) -> List[Any] | Any: - first = col.iloc[0] - - if col.name in ('street', 'building_id', 'lat', 'lng') and pd.notnull(first): - return list(col) - - return first - - -def group_by_index(df: pd.DataFrame) -> pd.DataFrame: - groupped = df.groupby('index') - - res_df = groupped.apply( - lambda index_df: index_df.apply(join_columns) - ).drop(columns='index') - - return res_df diff --git a/parser_api/README.md b/runner/README.md similarity index 100% rename from parser_api/README.md rename to runner/README.md diff --git a/parser_api/__init__.py b/runner/__init__.py similarity index 100% rename from parser_api/__init__.py rename to runner/__init__.py diff --git a/runner/config.py b/runner/config.py new file mode 100644 index 0000000..f163acd --- /dev/null +++ b/runner/config.py @@ -0,0 +1,9 @@ +import os + +REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4")) + +POSTGRES_USER = os.environ.get("POSTGRES_USER", "rosseti") +POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD", "rosseti") +POSTGRES_DB = os.environ.get("POSTGRES_DB", "rosseti") +POSTGRES_HOST = os.environ.get("POSTGRES_HOST", "localhost") +POSTGRES_PORT = int(os.environ.get("POSTGRES_PORT", "5432")) diff --git a/parser_api/controller.py b/runner/controller.py similarity index 69% rename from parser_api/controller.py rename to runner/controller.py index cab2336..30211e9 100644 --- a/parser_api/controller.py +++ b/runner/controller.py @@ -1,15 +1,12 @@ -from typing import List, Optional -from functools import reduce import datetime +from functools import reduce +from typing import List, Optional from fastapi import HTTPException -from sqlalchemy import func, True_ +from parser import get_building_id from sqlalchemy.orm import Session -from sqlalchemy.sql import operators from sqlalchemy.sql.expression import BinaryExpression -from rosseti_parser import get_building_id - from . import models, schemas @@ -36,7 +33,7 @@ def create_record(db: Session, record: schemas.Record): def contains_lower(name, val): - if type(val) == str: + if isinstance(val, str): return getattr(models.Record, name).icontains(val) else: return getattr(models.Record, name) == val @@ -53,15 +50,17 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec query = None if filters.start: - query = (models.Record.start <= filters.start) + query = models.Record.start <= filters.start if filters.finish: query = and_if_can(models.Record.finish >= filters.finish, query) filters = list( - filter(lambda x: x[1] is not None and x[0] not in ('start, finish'), filters)) + filter(lambda x: x[1] is not None and x[0] not in ("start, finish"), filters) + ) - query = reduce(lambda acc, ftr: and_if_can( - contains_lower(*ftr), acc), filters, query) + query = reduce( + lambda acc, ftr: and_if_can(contains_lower(*ftr), acc), filters, query + ) if query is None: res = db.query(models.Record).all() @@ -74,14 +73,11 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec def search_all(db: Session, prompt: str) -> List[schemas.Record]: prompt = prompt.strip() - query = reduce(lambda acc, name: acc | contains_lower(name, prompt), ( - 'region', - 'area', - 'town', - 'street', - 'branch', - 'res' - ), contains_lower('comment', prompt)) + query = reduce( + lambda acc, name: acc | contains_lower(name, prompt), + ("region", "area", "town", "street", "branch", "res"), + contains_lower("comment", prompt), + ) building_id, *_ = get_building_id(prompt) @@ -95,24 +91,19 @@ def search_all(db: Session, prompt: str) -> List[schemas.Record]: def check_outage(db: Session, building_id: int) -> schemas.CheckResponse: building_query = db.query(models.Record).filter( - (models.Record.building_id == building_id)) + (models.Record.building_id == building_id) + ) if building_query.count() == 0: - raise HTTPException(404, 'No such building') + raise HTTPException(404, "No such building") now = datetime.datetime.now() res = building_query.filter( - (models.Record.start <= now) & - (now <= models.Record.finish) + (models.Record.start <= now) & (now <= models.Record.finish) ).first() if res is None: - return { - 'is_outage': False - } + return {"is_outage": False} - return { - 'is_outage': True, - 'when_finish': res.finish - } + return {"is_outage": True, "when_finish": res.finish} diff --git a/parser_api/database.py b/runner/database.py similarity index 70% rename from parser_api/database.py rename to runner/database.py index 85b163f..f438836 100644 --- a/parser_api/database.py +++ b/runner/database.py @@ -1,11 +1,16 @@ from typing import Generator -import os -from sqlalchemy import create_engine, URL +from sqlalchemy import URL, create_engine from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker, Session +from sqlalchemy.orm import Session, sessionmaker -from .config import POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST, POSTGRES_PORT, POSTGRES_DB +from .config import ( + POSTGRES_DB, + POSTGRES_HOST, + POSTGRES_PASSWORD, + POSTGRES_PORT, + POSTGRES_USER, +) engine = create_engine( URL.create( @@ -16,12 +21,13 @@ engine = create_engine( port=POSTGRES_PORT, database=POSTGRES_DB, ), - client_encoding='utf8', + client_encoding="utf8", ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) Base = declarative_base() + # Dependency def get_db() -> Generator[Session, None, None]: db = SessionLocal() diff --git a/parser_api/job.py b/runner/job.py similarity index 83% rename from parser_api/job.py rename to runner/job.py index bb7a66a..77a7ebc 100644 --- a/parser_api/job.py +++ b/runner/job.py @@ -1,13 +1,10 @@ -from rosseti_parser import pipeline, preprocess_read_df -import pandas as pd -import numpy as np from datetime import datetime -import logging -from .database import get_db +import pandas as pd +from parser import pipeline + from . import models - -from io import StringIO +from .database import get_db def job(): diff --git a/parser_api/main.py b/runner/main.py similarity index 80% rename from parser_api/main.py rename to runner/main.py index d8957ce..cad95e6 100644 --- a/parser_api/main.py +++ b/runner/main.py @@ -1,14 +1,13 @@ -from contextlib import asynccontextmanager import datetime -from fastapi import FastAPI import schedule +from fastapi import FastAPI from . import models, router, schemas -from .database import engine -from .scheduler import run_continuously, run_threaded -from .job import job from .config import REFETCH_PERIOD_H +from .database import engine +from .job import job +from .scheduler import run_continuously, run_threaded models.Base.metadata.create_all(bind=engine) @@ -25,13 +24,12 @@ async def lifespan(app: FastAPI): stop_run_continuously() + app = FastAPI(lifespan=lifespan) app.include_router(router.router) -@app.get('/', response_model=schemas.Healthcheck) +@app.get("/", response_model=schemas.Healthcheck) def Healthcheck(): - return { - "up_since": start_stamp - } + return {"up_since": start_stamp} diff --git a/parser_api/models.py b/runner/models.py similarity index 81% rename from parser_api/models.py rename to runner/models.py index 8518a81..e27d5ed 100644 --- a/parser_api/models.py +++ b/runner/models.py @@ -1,11 +1,10 @@ -from sqlalchemy import Boolean, Column, Integer, String, DateTime, Float -from sqlalchemy.orm import relationship +from sqlalchemy import Column, DateTime, Float, Integer, String from .database import Base class Record(Base): - __tablename__ = 'records' + __tablename__ = "records" id = Column(Integer, primary_key=True, index=True) index = Column(Integer) diff --git a/parser_api/router.py b/runner/router.py similarity index 70% rename from parser_api/router.py rename to runner/router.py index db059b3..8326372 100644 --- a/parser_api/router.py +++ b/runner/router.py @@ -1,19 +1,17 @@ -from fastapi import HTTPException, Depends +from typing import Annotated, List + +from fastapi import APIRouter, Depends from sqlalchemy.orm import Session -from typing import List, Annotated -from fastapi import APIRouter +from . import controller, schemas +from .database import get_db -from . import models, schemas, controller -from .database import SessionLocal, get_db - -router = APIRouter(prefix='/api') +router = APIRouter(prefix="/api") -@router.get('/list', response_model=List[schemas.Record], summary="Search by filters") +@router.get("/list", response_model=List[schemas.Record], summary="Search by filters") def list_rows( - filters: Annotated[schemas.RecordRequest, Depends()], - db: Session = Depends(get_db) + filters: Annotated[schemas.RecordRequest, Depends()], db: Session = Depends(get_db) ): """ Searches rows with specified filters. @@ -40,7 +38,7 @@ def list_rows( return controller.search_each(db, filters) -@router.get('/search', response_model=List[schemas.Record], summary="Search by query") +@router.get("/search", response_model=List[schemas.Record], summary="Search by query") def search_rows(query: str, db: Session = Depends(get_db)): """ Selects rows with cells containing case insensitive prompt as its part. @@ -58,7 +56,9 @@ def search_rows(query: str, db: Session = Depends(get_db)): return controller.search_all(db, query) -@router.get('/check', response_model=schemas.CheckResponse, summary="Check when outage ends") +@router.get( + "/check", response_model=schemas.CheckResponse, summary="Check when outage ends" +) def check(building_id: int, db: Session = Depends(get_db)): """ Checks if there is an active outage for building_id and if there is, also returns when will it end @@ -66,7 +66,7 @@ def check(building_id: int, db: Session = Depends(get_db)): return controller.check_outage(db, building_id) -@router.put('/create', response_model=schemas.Record) +@router.put("/create", response_model=schemas.Record) def create_record(record: schemas.RecordCreate, db: Session = Depends(get_db)): """ Not for public usage diff --git a/parser_api/scheduler.py b/runner/scheduler.py similarity index 100% rename from parser_api/scheduler.py rename to runner/scheduler.py diff --git a/parser_api/schemas.py b/runner/schemas.py similarity index 96% rename from parser_api/schemas.py rename to runner/schemas.py index 31590f9..480978a 100644 --- a/parser_api/schemas.py +++ b/runner/schemas.py @@ -38,5 +38,6 @@ class CheckResponse(BaseModel): is_outage: bool when_finish: Optional[datetime.datetime] = None + class Healthcheck(BaseModel): - up_since: datetime.datetime \ No newline at end of file + up_since: datetime.datetime