diff --git a/parser/__init__.py b/parser/__init__.py index 53186ad..49cc956 100644 --- a/parser/__init__.py +++ b/parser/__init__.py @@ -1,5 +1,5 @@ -import io from datetime import datetime, timedelta +import io from typing import Mapping, Optional, Tuple, no_type_check from bs4 import BeautifulSoup @@ -8,11 +8,15 @@ import pandas as pd class RossetiParser: - def __init__(self, ndays=7, today=datetime.today(), file_path: Optional[str] = None): + def __init__(self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None): self.base_url = "https://rosseti-lenenergo.ru/planned_work" + if today is None: + today = datetime.today() + self.ndays = ndays self.today = today + self.df = pd.DataFrame() if file_path is None: self.fetch(ndays) @@ -20,7 +24,7 @@ class RossetiParser: self.load_df(file_path) def __str__(self): - return f"From {self.today.date()} for {self.ndays} days with {len(self.df)}_records" + return f"From {self.today.date()} for {self.ndays} days with {len(self.df)} records" @staticmethod def __format_date(date: datetime) -> str: @@ -54,6 +58,12 @@ class RossetiParser: return pd.read_html(io.StringIO(str(table)))[0] + def __save_page(self, uri: str): + print(f'Processing page "{uri}"') + self.__get_page(self.base_url + uri, self.__params) + self.df = pd.concat( + (self.df, self.__parse_table()), ignore_index=True) + def __set_columns(self): self.df.columns = ( "Регион РФ (область, край, город фед. значения, округ)", @@ -75,42 +85,25 @@ class RossetiParser: if today is None: today = self.today - params = self.__compose_date_params(ndays, today) + self.__params = self.__compose_date_params(ndays, today) - self.__get_page(self.base_url, params) + self.__save_page('') next_uri, last_uri = self.__parse_nav() - self.df = self.__parse_table() - - def get(next_uri: str): - self.__get_page(self.base_url + next_uri, params) # Instead of adding params to every request session could be used to store them with cookies - self.df = pd.concat( - (self.df, self.__parse_table()), ignore_index=True) - while next_uri != last_uri: - get(next_uri) + self.__save_page(next_uri) next_uri, _ = self.__parse_nav() - get(next_uri) + self.__save_page(next_uri) self.__set_columns() def save_df(self, file_path: str): + print(f'Saved as "{file_path}"') self.df.to_csv(file_path) def load_df(self, file_path: str): + print(f'Read from "{file}"') self.df = pd.read_csv(file_path) - - -def main(): - parser = RossetiParser() - - print(parser) - - parser.save_df('./data.csv') - - -if __name__ == "__main__": - main()