A small refactoring of parser
This commit is contained in:
parent
bdca9a6ebf
commit
01b93f121b
@ -1,5 +1,5 @@
|
|||||||
import io
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
import io
|
||||||
from typing import Mapping, Optional, Tuple, no_type_check
|
from typing import Mapping, Optional, Tuple, no_type_check
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -8,11 +8,15 @@ import pandas as pd
|
|||||||
|
|
||||||
|
|
||||||
class RossetiParser:
|
class RossetiParser:
|
||||||
def __init__(self, ndays=7, today=datetime.today(), file_path: Optional[str] = None):
|
def __init__(self, ndays=7, today: Optional[datetime] = None, file_path: Optional[str] = None):
|
||||||
self.base_url = "https://rosseti-lenenergo.ru/planned_work"
|
self.base_url = "https://rosseti-lenenergo.ru/planned_work"
|
||||||
|
|
||||||
|
if today is None:
|
||||||
|
today = datetime.today()
|
||||||
|
|
||||||
self.ndays = ndays
|
self.ndays = ndays
|
||||||
self.today = today
|
self.today = today
|
||||||
|
self.df = pd.DataFrame()
|
||||||
|
|
||||||
if file_path is None:
|
if file_path is None:
|
||||||
self.fetch(ndays)
|
self.fetch(ndays)
|
||||||
@ -20,7 +24,7 @@ class RossetiParser:
|
|||||||
self.load_df(file_path)
|
self.load_df(file_path)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f"From {self.today.date()} for {self.ndays} days with {len(self.df)}_records"
|
return f"From {self.today.date()} for {self.ndays} days with {len(self.df)} records"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __format_date(date: datetime) -> str:
|
def __format_date(date: datetime) -> str:
|
||||||
@ -54,6 +58,12 @@ class RossetiParser:
|
|||||||
|
|
||||||
return pd.read_html(io.StringIO(str(table)))[0]
|
return pd.read_html(io.StringIO(str(table)))[0]
|
||||||
|
|
||||||
|
def __save_page(self, uri: str):
|
||||||
|
print(f'Processing page "{uri}"')
|
||||||
|
self.__get_page(self.base_url + uri, self.__params)
|
||||||
|
self.df = pd.concat(
|
||||||
|
(self.df, self.__parse_table()), ignore_index=True)
|
||||||
|
|
||||||
def __set_columns(self):
|
def __set_columns(self):
|
||||||
self.df.columns = (
|
self.df.columns = (
|
||||||
"Регион РФ (область, край, город фед. значения, округ)",
|
"Регион РФ (область, край, город фед. значения, округ)",
|
||||||
@ -75,42 +85,25 @@ class RossetiParser:
|
|||||||
if today is None:
|
if today is None:
|
||||||
today = self.today
|
today = self.today
|
||||||
|
|
||||||
params = self.__compose_date_params(ndays, today)
|
self.__params = self.__compose_date_params(ndays, today)
|
||||||
|
|
||||||
self.__get_page(self.base_url, params)
|
self.__save_page('')
|
||||||
|
|
||||||
next_uri, last_uri = self.__parse_nav()
|
next_uri, last_uri = self.__parse_nav()
|
||||||
|
|
||||||
self.df = self.__parse_table()
|
|
||||||
|
|
||||||
def get(next_uri: str):
|
|
||||||
self.__get_page(self.base_url + next_uri, params) # Instead of adding params to every request session could be used to store them with cookies
|
|
||||||
self.df = pd.concat(
|
|
||||||
(self.df, self.__parse_table()), ignore_index=True)
|
|
||||||
|
|
||||||
while next_uri != last_uri:
|
while next_uri != last_uri:
|
||||||
get(next_uri)
|
self.__save_page(next_uri)
|
||||||
|
|
||||||
next_uri, _ = self.__parse_nav()
|
next_uri, _ = self.__parse_nav()
|
||||||
|
|
||||||
get(next_uri)
|
self.__save_page(next_uri)
|
||||||
|
|
||||||
self.__set_columns()
|
self.__set_columns()
|
||||||
|
|
||||||
def save_df(self, file_path: str):
|
def save_df(self, file_path: str):
|
||||||
|
print(f'Saved as "{file_path}"')
|
||||||
self.df.to_csv(file_path)
|
self.df.to_csv(file_path)
|
||||||
|
|
||||||
def load_df(self, file_path: str):
|
def load_df(self, file_path: str):
|
||||||
|
print(f'Read from "{file}"')
|
||||||
self.df = pd.read_csv(file_path)
|
self.df = pd.read_csv(file_path)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = RossetiParser()
|
|
||||||
|
|
||||||
print(parser)
|
|
||||||
|
|
||||||
parser.save_df('./data.csv')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user