Added readme
This commit is contained in:
parent
2efb4d4846
commit
c735ca2ee5
@ -1,5 +1,9 @@
|
|||||||
REFETCH_PERIOD_H=6
|
REFETCH_PERIOD_H=6
|
||||||
|
STORE_NULL_BID=True
|
||||||
|
|
||||||
POSTGRES_USER=lenenergo
|
POSTGRES_USER=lenenergo
|
||||||
POSTGRES_PASSWORD=lenenergo
|
POSTGRES_PASSWORD=lenenergo
|
||||||
POSTGRES_DB=lenenergo
|
POSTGRES_DB=lenenergo
|
||||||
POSTGRES_HOST=db
|
POSTGRES_HOST=db
|
||||||
|
# or
|
||||||
|
DB_URL=postgresql://lenenergo:lenenergo@localhost:5432
|
||||||
|
37
README.md
Normal file
37
README.md
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# Lenenergo Parser
|
||||||
|
|
||||||
|
## DB columns settings
|
||||||
|
|
||||||
|
Append to `runner/config.py`
|
||||||
|
|
||||||
|
```python
|
||||||
|
DB_COLUMNS_MAP["<COL_NS key>"] = "<corresponding db column name>"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running instructions
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build . -it lenenergo_parser
|
||||||
|
docker run -d \
|
||||||
|
[-e REFETCH_PERIOD_H=4] \ # Refetch period
|
||||||
|
[-e STORE_NULL_BID=True] \ # Store rows with null building_id
|
||||||
|
# DB auth variants
|
||||||
|
[-e POSTGRES_USER=lenenergo] \
|
||||||
|
[-e POSTGRES_PASSWORD=lenenergo] \
|
||||||
|
[-e POSTGRES_DB=lenenergo] \
|
||||||
|
[-e POSTGRES_HOST=localhost] \
|
||||||
|
[-e POSTGRES_PORT=5432] \
|
||||||
|
# or
|
||||||
|
[DB_URL=postgresql://lenenergo:lenenergo@localhost:5432/lenenergo] \
|
||||||
|
lenenergo_parser
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dev instructions
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv .venv
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
python -m runner
|
||||||
|
```
|
@ -31,12 +31,18 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
|
|||||||
```
|
```
|
||||||
- `get_building_id`:
|
- `get_building_id`:
|
||||||
```python
|
```python
|
||||||
def get_building_id(street: str) -> Tuple[Optional[int], Optional[float], Optional[float]]
|
def get_building_id(street: str) -> GeoTupleType
|
||||||
```
|
```
|
||||||
- `fetch_builing_ids`:
|
- `fetch_builing_ids`:
|
||||||
```python
|
```python
|
||||||
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||||
```
|
```
|
||||||
|
- `async_fetch_building_id`:
|
||||||
|
```python
|
||||||
|
async def async_fetch_building_id(
|
||||||
|
session: aiohttp.ClientSession, street: str
|
||||||
|
) -> GeoTupleType
|
||||||
|
```
|
||||||
- `async_fetch_building_ids`:
|
- `async_fetch_building_ids`:
|
||||||
```python
|
```python
|
||||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
|
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||||
@ -51,11 +57,15 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
|||||||
```
|
```
|
||||||
- `COL_NS`:
|
- `COL_NS`:
|
||||||
```python
|
```python
|
||||||
COL_NS: Dict[str, str]
|
COL_NS: dict[str, str]
|
||||||
```
|
```
|
||||||
- `ICOL_NS`:
|
- `ICOL_NS`:
|
||||||
```python
|
```python
|
||||||
ICOL_NS: Dict[str, str]
|
ICOL_NS: dict[str, str]
|
||||||
|
```
|
||||||
|
- `PR_COL_NS`:
|
||||||
|
```python
|
||||||
|
PR_COL_NS: tuple[str]
|
||||||
```
|
```
|
||||||
- `preprocess_read_df`:
|
- `preprocess_read_df`:
|
||||||
```python
|
```python
|
||||||
@ -82,4 +92,4 @@ pip install -r requirements.txt
|
|||||||
python -m parser [<Период в часах>]
|
python -m parser [<Период в часах>]
|
||||||
```
|
```
|
||||||
|
|
||||||
Формат сохраняемых файлов: `data_%d-%m-%y_%H:%M.csv`
|
Формат сохраняемых файлов: `data_%d-%m-%y_%H.%M.csv`
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from .address import split_addresses
|
from .address import split_addresses
|
||||||
from .building_id import (
|
from .building_id import (
|
||||||
|
GeoTupleType,
|
||||||
async_fetch_building_id,
|
async_fetch_building_id,
|
||||||
async_fetch_building_ids,
|
async_fetch_building_ids,
|
||||||
concurrent_fetch_builing_ids,
|
concurrent_fetch_builing_ids,
|
||||||
@ -18,15 +19,16 @@ from .preprocess import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
|
"COL_NS",
|
||||||
|
"GeoTupleType",
|
||||||
|
"ICOL_NS",
|
||||||
|
"PR_COL_NS",
|
||||||
"async_fetch_building_id",
|
"async_fetch_building_id",
|
||||||
"async_fetch_building_ids",
|
"async_fetch_building_ids",
|
||||||
"COL_NS",
|
|
||||||
"PR_COL_NS",
|
|
||||||
"concurrent_fetch_builing_ids",
|
"concurrent_fetch_builing_ids",
|
||||||
"fetch_builing_ids",
|
"fetch_builing_ids",
|
||||||
"get_building_id",
|
"get_building_id",
|
||||||
"group_by_index",
|
"group_by_index",
|
||||||
"ICOL_NS",
|
|
||||||
"LenenergoParser",
|
"LenenergoParser",
|
||||||
"pipeline",
|
"pipeline",
|
||||||
"preprocess_df",
|
"preprocess_df",
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from parser import PR_COL_NS
|
from parser import PR_COL_NS
|
||||||
|
|
||||||
REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4"))
|
REFETCH_PERIOD_H = int(os.environ.get("REFETCH_PERIOD_H", "4"))
|
||||||
|
@ -7,7 +7,6 @@ import psycopg
|
|||||||
from .config import DB_COLUMNS_MAP, STORE_NULL_BID
|
from .config import DB_COLUMNS_MAP, STORE_NULL_BID
|
||||||
from .database import db_credentials
|
from .database import db_credentials
|
||||||
|
|
||||||
|
|
||||||
sql_statement = "".join(
|
sql_statement = "".join(
|
||||||
("COPY records (", ", ".join(DB_COLUMNS_MAP.values()), ") FROM STDIN")
|
("COPY records (", ", ".join(DB_COLUMNS_MAP.values()), ") FROM STDIN")
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user