Added readme

This commit is contained in:
Dmitriy Shishkov 2023-10-29 17:09:26 +03:00
parent 2efb4d4846
commit c735ca2ee5
Signed by: dm1sh
GPG Key ID: 027994B0AA357688
7 changed files with 62 additions and 11 deletions

View File

@ -1,5 +1,9 @@
REFETCH_PERIOD_H=6
STORE_NULL_BID=True
POSTGRES_USER=lenenergo
POSTGRES_PASSWORD=lenenergo
POSTGRES_DB=lenenergo
POSTGRES_HOST=db
POSTGRES_HOST=db
# or
DB_URL=postgresql://lenenergo:lenenergo@localhost:5432

37
README.md Normal file
View File

@ -0,0 +1,37 @@
# Lenenergo Parser
## DB columns settings
Append to `runner/config.py`
```python
DB_COLUMNS_MAP["<COL_NS key>"] = "<corresponding db column name>"
```
## Running instructions
```bash
docker build . -it lenenergo_parser
docker run -d \
[-e REFETCH_PERIOD_H=4] \ # Refetch period
[-e STORE_NULL_BID=True] \ # Store rows with null building_id
# DB auth variants
[-e POSTGRES_USER=lenenergo] \
[-e POSTGRES_PASSWORD=lenenergo] \
[-e POSTGRES_DB=lenenergo] \
[-e POSTGRES_HOST=localhost] \
[-e POSTGRES_PORT=5432] \
# or
[DB_URL=postgresql://lenenergo:lenenergo@localhost:5432/lenenergo] \
lenenergo_parser
```
## Dev instructions
```bash
python -m venv .venv
pip install -r requirements.txt
python -m runner
```

View File

@ -31,12 +31,18 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
```
- `get_building_id`:
```python
def get_building_id(street: str) -> Tuple[Optional[int], Optional[float], Optional[float]]
def get_building_id(street: str) -> GeoTupleType
```
- `fetch_builing_ids`:
```python
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
```
- `async_fetch_building_id`:
```python
async def async_fetch_building_id(
session: aiohttp.ClientSession, street: str
) -> GeoTupleType
```
- `async_fetch_building_ids`:
```python
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
@ -51,11 +57,15 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
```
- `COL_NS`:
```python
COL_NS: Dict[str, str]
COL_NS: dict[str, str]
```
- `ICOL_NS`:
```python
ICOL_NS: Dict[str, str]
ICOL_NS: dict[str, str]
```
- `PR_COL_NS`:
```python
PR_COL_NS: tuple[str]
```
- `preprocess_read_df`:
```python
@ -82,4 +92,4 @@ pip install -r requirements.txt
python -m parser [<Период в часах>]
```
Формат сохраняемых файлов: `data_%d-%m-%y_%H:%M.csv`
Формат сохраняемых файлов: `data_%d-%m-%y_%H.%M.csv`

View File

@ -1,5 +1,6 @@
from .address import split_addresses
from .building_id import (
GeoTupleType,
async_fetch_building_id,
async_fetch_building_ids,
concurrent_fetch_builing_ids,
@ -18,15 +19,16 @@ from .preprocess import (
)
__all__ = (
"COL_NS",
"GeoTupleType",
"ICOL_NS",
"PR_COL_NS",
"async_fetch_building_id",
"async_fetch_building_ids",
"COL_NS",
"PR_COL_NS",
"concurrent_fetch_builing_ids",
"fetch_builing_ids",
"get_building_id",
"group_by_index",
"ICOL_NS",
"LenenergoParser",
"pipeline",
"preprocess_df",

View File

@ -10,4 +10,4 @@ python -m venv .venv
pip install -r requirements.txt
python -m runner
```
```

View File

@ -1,5 +1,4 @@
import os
from parser import PR_COL_NS
REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4"))

View File

@ -7,7 +7,6 @@ import psycopg
from .config import DB_COLUMNS_MAP, STORE_NULL_BID
from .database import db_credentials
sql_statement = "".join(
("COPY records (", ", ".join(DB_COLUMNS_MAP.values()), ") FROM STDIN")
)