From c735ca2ee5b7ee5cc4bdb475e60487ad03d8b8a5 Mon Sep 17 00:00:00 2001 From: dm1sh Date: Sun, 29 Oct 2023 17:09:26 +0300 Subject: [PATCH] Added readme --- .env.example | 6 +++++- README.md | 37 +++++++++++++++++++++++++++++++++++++ parser/README.md | 18 ++++++++++++++---- parser/__init__.py | 8 +++++--- runner/README.md | 2 +- runner/config.py | 1 - runner/job.py | 1 - 7 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 README.md diff --git a/.env.example b/.env.example index 777ed60..4ebad9e 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,9 @@ REFETCH_PERIOD_H=6 +STORE_NULL_BID=True + POSTGRES_USER=lenenergo POSTGRES_PASSWORD=lenenergo POSTGRES_DB=lenenergo -POSTGRES_HOST=db \ No newline at end of file +POSTGRES_HOST=db +# or +DB_URL=postgresql://lenenergo:lenenergo@localhost:5432 diff --git a/README.md b/README.md new file mode 100644 index 0000000..61fc5fc --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Lenenergo Parser + +## DB columns settings + +Append to `runner/config.py` + +```python +DB_COLUMNS_MAP[""] = "" +``` + +## Running instructions + +```bash +docker build . -it lenenergo_parser +docker run -d \ + [-e REFETCH_PERIOD_H=4] \ # Refetch period + [-e STORE_NULL_BID=True] \ # Store rows with null building_id + # DB auth variants + [-e POSTGRES_USER=lenenergo] \ + [-e POSTGRES_PASSWORD=lenenergo] \ + [-e POSTGRES_DB=lenenergo] \ + [-e POSTGRES_HOST=localhost] \ + [-e POSTGRES_PORT=5432] \ + # or + [DB_URL=postgresql://lenenergo:lenenergo@localhost:5432/lenenergo] \ + lenenergo_parser +``` + +## Dev instructions + +```bash +python -m venv .venv + +pip install -r requirements.txt + +python -m runner +``` diff --git a/parser/README.md b/parser/README.md index 9a26d18..268faa0 100644 --- a/parser/README.md +++ b/parser/README.md @@ -31,12 +31,18 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame ``` - `get_building_id`: ```python -def get_building_id(street: str) -> Tuple[Optional[int], Optional[float], Optional[float]] +def get_building_id(street: str) -> GeoTupleType ``` - `fetch_builing_ids`: ```python def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame ``` +- `async_fetch_building_id`: +```python +async def async_fetch_building_id( + session: aiohttp.ClientSession, street: str +) -> GeoTupleType +``` - `async_fetch_building_ids`: ```python async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame @@ -51,11 +57,15 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame ``` - `COL_NS`: ```python -COL_NS: Dict[str, str] +COL_NS: dict[str, str] ``` - `ICOL_NS`: ```python -ICOL_NS: Dict[str, str] +ICOL_NS: dict[str, str] +``` +- `PR_COL_NS`: +```python +PR_COL_NS: tuple[str] ``` - `preprocess_read_df`: ```python @@ -82,4 +92,4 @@ pip install -r requirements.txt python -m parser [<Период в часах>] ``` -Формат сохраняемых файлов: `data_%d-%m-%y_%H:%M.csv` +Формат сохраняемых файлов: `data_%d-%m-%y_%H.%M.csv` diff --git a/parser/__init__.py b/parser/__init__.py index 8a0c1ae..b154344 100644 --- a/parser/__init__.py +++ b/parser/__init__.py @@ -1,5 +1,6 @@ from .address import split_addresses from .building_id import ( + GeoTupleType, async_fetch_building_id, async_fetch_building_ids, concurrent_fetch_builing_ids, @@ -18,15 +19,16 @@ from .preprocess import ( ) __all__ = ( + "COL_NS", + "GeoTupleType", + "ICOL_NS", + "PR_COL_NS", "async_fetch_building_id", "async_fetch_building_ids", - "COL_NS", - "PR_COL_NS", "concurrent_fetch_builing_ids", "fetch_builing_ids", "get_building_id", "group_by_index", - "ICOL_NS", "LenenergoParser", "pipeline", "preprocess_df", diff --git a/runner/README.md b/runner/README.md index eda6017..00686ba 100644 --- a/runner/README.md +++ b/runner/README.md @@ -10,4 +10,4 @@ python -m venv .venv pip install -r requirements.txt python -m runner -``` \ No newline at end of file +``` diff --git a/runner/config.py b/runner/config.py index add0d99..77ebfbd 100644 --- a/runner/config.py +++ b/runner/config.py @@ -1,5 +1,4 @@ import os - from parser import PR_COL_NS REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4")) diff --git a/runner/job.py b/runner/job.py index a2af302..62fc8c6 100644 --- a/runner/job.py +++ b/runner/job.py @@ -7,7 +7,6 @@ import psycopg from .config import DB_COLUMNS_MAP, STORE_NULL_BID from .database import db_credentials - sql_statement = "".join( ("COPY records (", ", ".join(DB_COLUMNS_MAP.values()), ") FROM STDIN") )