Added async building_id fetching and defaulted it

This commit is contained in:
Dmitriy Shishkov 2023-09-21 22:37:10 +03:00
parent c705a0ed02
commit 464d0d3640
Signed by: dm1sh
GPG Key ID: 027994B0AA357688
7 changed files with 210 additions and 310 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,10 @@
aiohttp==3.8.5
aiosignal==1.3.1
annotated-types==0.5.0 annotated-types==0.5.0
anyio==3.7.1 anyio==3.7.1
asttokens==2.4.0 asttokens==2.4.0
async-timeout==4.0.3
attrs==23.1.0
autopep8==2.0.4 autopep8==2.0.4
backcall==0.2.0 backcall==0.2.0
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
@ -14,6 +18,7 @@ decorator==5.1.1
executing==1.2.0 executing==1.2.0
fastapi==0.103.1 fastapi==0.103.1
fonttools==4.42.1 fonttools==4.42.1
frozenlist==1.4.0
greenlet==2.0.2 greenlet==2.0.2
idna==3.4 idna==3.4
ipykernel==6.25.2 ipykernel==6.25.2
@ -25,6 +30,7 @@ kiwisolver==1.4.5
lxml==4.9.3 lxml==4.9.3
matplotlib==3.8.0 matplotlib==3.8.0
matplotlib-inline==0.1.6 matplotlib-inline==0.1.6
multidict==6.0.4
mypy==1.5.1 mypy==1.5.1
mypy-extensions==1.0.0 mypy-extensions==1.0.0
nest-asyncio==1.5.8 nest-asyncio==1.5.8
@ -70,3 +76,4 @@ typing_extensions==4.8.0
tzdata==2023.3 tzdata==2023.3
urllib3==2.0.4 urllib3==2.0.4
wcwidth==0.2.6 wcwidth==0.2.6
yarl==1.9.2

View File

@ -1,15 +1,21 @@
aiohttp==3.8.5
aiosignal==1.3.1
annotated-types==0.5.0 annotated-types==0.5.0
anyio==3.7.1 anyio==3.7.1
async-timeout==4.0.3
attrs==23.1.0
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
bs4==0.0.1 bs4==0.0.1
certifi==2023.7.22 certifi==2023.7.22
charset-normalizer==3.2.0 charset-normalizer==3.2.0
click==8.1.7 click==8.1.7
fastapi==0.103.1 fastapi==0.103.1
frozenlist==1.4.0
greenlet==2.0.2 greenlet==2.0.2
h11==0.14.0 h11==0.14.0
idna==3.4 idna==3.4
lxml==4.9.3 lxml==4.9.3
multidict==6.0.4
numpy==1.26.0 numpy==1.26.0
pandas==2.1.1 pandas==2.1.1
psycopg==3.1.10 psycopg==3.1.10
@ -30,3 +36,4 @@ typing_extensions==4.8.0
tzdata==2023.3 tzdata==2023.3
urllib3==2.0.5 urllib3==2.0.5
uvicorn==0.23.2 uvicorn==0.23.2
yarl==1.9.2

View File

@ -33,6 +33,14 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
```python ```python
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
``` ```
- `async_fetch_building_ids`:
```python
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
```
- `concurrent_fetch_builing_ids`:
```python
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame
```
- `preprocess_df`: - `preprocess_df`:
```python ```python
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame def preprocess_df(df: pd.DataFrame) -> pd.DataFrame

View File

@ -1,5 +1,5 @@
from .rosseti import RossetiParser from .rosseti import RossetiParser
from .address import split_addresses from .address import split_addresses
from .building_id import fetch_builing_ids from .building_id import fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
from .util import pipeline from .util import pipeline

View File

@ -4,6 +4,8 @@ from typing import Optional, Tuple, Any, List
import requests import requests
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import asyncio
import aiohttp
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]] GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
@ -29,3 +31,42 @@ def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand') lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
return df return df
async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType:
if pd.isnull(street):
return None, None, None
async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={
'street': street
}) as r:
res = await r.json()
if 'error' in res:
return None, None, None
return res['Building_ID'], res['Latitude'], res['Longitude']
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
async with aiohttp.ClientSession() as session:
tasks = []
for _, row in df.iterrows():
tasks.append(
asyncio.ensure_future(
async_fetch_building_id(session, row['Улица'])
)
)
res = await asyncio.gather(*tasks)
df[['ID здания', 'Широта', 'Долгота']] = res
return df
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
return asyncio.run(
async_fetch_building_ids(df)
)

View File

@ -1,6 +1,6 @@
from typing import Optional from typing import Optional
from . import RossetiParser, split_addresses, fetch_builing_ids, preprocess_df from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser: def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
@ -11,7 +11,7 @@ def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
parser.df = split_addresses(parser.df) parser.df = split_addresses(parser.df)
parser.df = fetch_builing_ids(parser.df) parser.df = concurrent_fetch_builing_ids(parser.df)
parser.df = preprocess_df(parser.df) parser.df = preprocess_df(parser.df)