111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
import io
|
|
from datetime import datetime, timedelta
|
|
from typing import Mapping, Optional, Tuple
|
|
|
|
import pandas as pd
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class RossetiParser:
|
|
def __init__(
|
|
self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None
|
|
) -> None:
|
|
self.base_url = "https://rosseti-lenenergo.ru/planned_work"
|
|
|
|
if today is None:
|
|
today = datetime.today()
|
|
|
|
self.ndays = ndays
|
|
self.today = today
|
|
self.df = pd.DataFrame()
|
|
|
|
if file_path is None:
|
|
self.fetch(ndays)
|
|
else:
|
|
self.load_df(file_path)
|
|
|
|
def __str__(self) -> str:
|
|
return f"From {self.today.date()} for {self.ndays} days with {len(self.df)} records"
|
|
|
|
@staticmethod
|
|
def __format_date(date: datetime) -> str:
|
|
return date.strftime("%d.%m.%y")
|
|
|
|
def __compose_date_params(self, ndays: int, today: datetime) -> Mapping[str, str]:
|
|
date_start = self.__format_date(today)
|
|
date_finish = self.__format_date(today + timedelta(days=ndays))
|
|
|
|
return {"date_start": date_start, "date_finish": date_finish}
|
|
|
|
def __get_page(self, url: str, params: Mapping[str, str]) -> None:
|
|
r = requests.get(url, params)
|
|
|
|
self.soup = BeautifulSoup(r.text, features="html.parser")
|
|
|
|
def __parse_nav(self) -> Tuple[str, str]:
|
|
navigator = self.soup.find("span", attrs={"class": "page-nav-i"})
|
|
|
|
next_uri = navigator.find("a", attrs={"class": "next"})["href"]
|
|
last_uri = navigator.find_all("a")[-1]["href"]
|
|
|
|
return next_uri, last_uri
|
|
|
|
def __parse_table(self) -> pd.DataFrame:
|
|
table = self.soup.find("table", attrs={"class": "tableous_facts funds"})
|
|
|
|
return pd.read_html(io.StringIO(str(table)))[0]
|
|
|
|
def __save_page(self, uri: str) -> None:
|
|
print(f'Processing page "{uri}"')
|
|
self.__get_page(self.base_url + uri, self.__params)
|
|
self.df = pd.concat((self.df, self.__parse_table()), ignore_index=True)
|
|
|
|
def __set_columns(self) -> None:
|
|
self.df.columns = pd.Index(
|
|
(
|
|
"Регион РФ (область, край, город фед. значения, округ)",
|
|
"Административный район",
|
|
"Населённый пункт",
|
|
"Улица",
|
|
"Плановая дата начала отключения электроснабжения",
|
|
"Плановое время начала отключения электроснабжения",
|
|
"Плановая дата восстановления отключения электроснабжения",
|
|
"Плановое время восстановления отключения электроснабжения",
|
|
"Филиал",
|
|
"РЭС",
|
|
"Комментарий",
|
|
)
|
|
)
|
|
|
|
def fetch(
|
|
self, ndays: Optional[int] = None, today: Optional[datetime] = None
|
|
) -> None:
|
|
if ndays is None:
|
|
ndays = self.ndays
|
|
if today is None:
|
|
today = self.today
|
|
|
|
self.__params = self.__compose_date_params(ndays, today)
|
|
|
|
self.__save_page("")
|
|
|
|
next_uri, last_uri = self.__parse_nav()
|
|
|
|
while next_uri != last_uri:
|
|
self.__save_page(next_uri)
|
|
|
|
next_uri, _ = self.__parse_nav()
|
|
|
|
self.__save_page(next_uri)
|
|
|
|
self.__set_columns()
|
|
|
|
def save_df(self, file_path: str) -> None:
|
|
print(f'Saved as "{file_path}"')
|
|
self.df.to_csv(file_path, index=False)
|
|
|
|
def load_df(self, file_path: str) -> None:
|
|
print(f'Read from "{file_path}"')
|
|
self.df = pd.read_csv(file_path)
|