import io from datetime import datetime, timedelta from typing import Mapping, Optional, Tuple import pandas as pd import requests from bs4 import BeautifulSoup class LenenergoParser: def __init__( self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None ) -> None: self.base_url = "https://rosseti-lenenergo.ru/planned_work" if today is None: today = datetime.today() self.ndays = ndays self.today = today self.df = pd.DataFrame() if file_path is None: self.fetch(ndays) else: self.load_df(file_path) def __str__(self) -> str: return f"From {self.today.date()} for {self.ndays} days with {len(self.df)} records" @staticmethod def __format_date(date: datetime) -> str: return date.strftime("%d.%m.%y") def __compose_date_params(self, ndays: int, today: datetime) -> Mapping[str, str]: date_start = self.__format_date(today) date_finish = self.__format_date(today + timedelta(days=ndays)) return {"date_start": date_start, "date_finish": date_finish} def __get_page(self, url: str, params: Mapping[str, str]) -> None: r = requests.get(url, params) self.soup = BeautifulSoup(r.text, features="html.parser") def __parse_nav(self) -> Tuple[str, str]: navigator = self.soup.find("span", attrs={"class": "page-nav-i"}) next_uri = navigator.find("a", attrs={"class": "next"})["href"] last_uri = navigator.find_all("a")[-1]["href"] return next_uri, last_uri def __parse_table(self) -> pd.DataFrame: table = self.soup.find("table", attrs={"class": "tableous_facts funds"}) return pd.read_html(io.StringIO(str(table)))[0] def __save_page(self, uri: str) -> None: print(f'Processing page "{uri}"') self.__get_page(self.base_url + uri, self.__params) self.df = pd.concat((self.df, self.__parse_table()), ignore_index=True) def __set_columns(self) -> None: self.df.columns = pd.Index( ( "Регион РФ (область, край, город фед. значения, округ)", "Административный район", "Населённый пункт", "Улица", "Плановая дата начала отключения электроснабжения", "Плановое время начала отключения электроснабжения", "Плановая дата восстановления отключения электроснабжения", "Плановое время восстановления отключения электроснабжения", "Филиал", "РЭС", "Комментарий", ) ) def fetch( self, ndays: Optional[int] = None, today: Optional[datetime] = None ) -> None: if ndays is None: ndays = self.ndays if today is None: today = self.today self.__params = self.__compose_date_params(ndays, today) self.__save_page("") next_uri, last_uri = self.__parse_nav() while next_uri != last_uri: self.__save_page(next_uri) next_uri, _ = self.__parse_nav() self.__save_page(next_uri) self.__set_columns() def save_df(self, file_path: str) -> None: print(f'Saved as "{file_path}"') self.df.to_csv(file_path, index=False) def load_df(self, file_path: str) -> None: print(f'Read from "{file_path}"') self.df = pd.read_csv(file_path)