Added async building_id fetching and defaulted it
This commit is contained in:
parent
c705a0ed02
commit
464d0d3640
451
analysis.ipynb
451
analysis.ipynb
File diff suppressed because one or more lines are too long
@ -1,6 +1,10 @@
|
|||||||
|
aiohttp==3.8.5
|
||||||
|
aiosignal==1.3.1
|
||||||
annotated-types==0.5.0
|
annotated-types==0.5.0
|
||||||
anyio==3.7.1
|
anyio==3.7.1
|
||||||
asttokens==2.4.0
|
asttokens==2.4.0
|
||||||
|
async-timeout==4.0.3
|
||||||
|
attrs==23.1.0
|
||||||
autopep8==2.0.4
|
autopep8==2.0.4
|
||||||
backcall==0.2.0
|
backcall==0.2.0
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
@ -14,6 +18,7 @@ decorator==5.1.1
|
|||||||
executing==1.2.0
|
executing==1.2.0
|
||||||
fastapi==0.103.1
|
fastapi==0.103.1
|
||||||
fonttools==4.42.1
|
fonttools==4.42.1
|
||||||
|
frozenlist==1.4.0
|
||||||
greenlet==2.0.2
|
greenlet==2.0.2
|
||||||
idna==3.4
|
idna==3.4
|
||||||
ipykernel==6.25.2
|
ipykernel==6.25.2
|
||||||
@ -25,6 +30,7 @@ kiwisolver==1.4.5
|
|||||||
lxml==4.9.3
|
lxml==4.9.3
|
||||||
matplotlib==3.8.0
|
matplotlib==3.8.0
|
||||||
matplotlib-inline==0.1.6
|
matplotlib-inline==0.1.6
|
||||||
|
multidict==6.0.4
|
||||||
mypy==1.5.1
|
mypy==1.5.1
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
nest-asyncio==1.5.8
|
nest-asyncio==1.5.8
|
||||||
@ -70,3 +76,4 @@ typing_extensions==4.8.0
|
|||||||
tzdata==2023.3
|
tzdata==2023.3
|
||||||
urllib3==2.0.4
|
urllib3==2.0.4
|
||||||
wcwidth==0.2.6
|
wcwidth==0.2.6
|
||||||
|
yarl==1.9.2
|
||||||
|
@ -1,15 +1,21 @@
|
|||||||
|
aiohttp==3.8.5
|
||||||
|
aiosignal==1.3.1
|
||||||
annotated-types==0.5.0
|
annotated-types==0.5.0
|
||||||
anyio==3.7.1
|
anyio==3.7.1
|
||||||
|
async-timeout==4.0.3
|
||||||
|
attrs==23.1.0
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
bs4==0.0.1
|
bs4==0.0.1
|
||||||
certifi==2023.7.22
|
certifi==2023.7.22
|
||||||
charset-normalizer==3.2.0
|
charset-normalizer==3.2.0
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
fastapi==0.103.1
|
fastapi==0.103.1
|
||||||
|
frozenlist==1.4.0
|
||||||
greenlet==2.0.2
|
greenlet==2.0.2
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
idna==3.4
|
idna==3.4
|
||||||
lxml==4.9.3
|
lxml==4.9.3
|
||||||
|
multidict==6.0.4
|
||||||
numpy==1.26.0
|
numpy==1.26.0
|
||||||
pandas==2.1.1
|
pandas==2.1.1
|
||||||
psycopg==3.1.10
|
psycopg==3.1.10
|
||||||
@ -30,3 +36,4 @@ typing_extensions==4.8.0
|
|||||||
tzdata==2023.3
|
tzdata==2023.3
|
||||||
urllib3==2.0.5
|
urllib3==2.0.5
|
||||||
uvicorn==0.23.2
|
uvicorn==0.23.2
|
||||||
|
yarl==1.9.2
|
||||||
|
@ -33,6 +33,14 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
|
|||||||
```python
|
```python
|
||||||
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||||
```
|
```
|
||||||
|
- `async_fetch_building_ids`:
|
||||||
|
```python
|
||||||
|
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||||
|
```
|
||||||
|
- `concurrent_fetch_builing_ids`:
|
||||||
|
```python
|
||||||
|
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame
|
||||||
|
```
|
||||||
- `preprocess_df`:
|
- `preprocess_df`:
|
||||||
```python
|
```python
|
||||||
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from .rosseti import RossetiParser
|
from .rosseti import RossetiParser
|
||||||
from .address import split_addresses
|
from .address import split_addresses
|
||||||
from .building_id import fetch_builing_ids
|
from .building_id import fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids
|
||||||
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
||||||
from .util import pipeline
|
from .util import pipeline
|
||||||
|
@ -4,6 +4,8 @@ from typing import Optional, Tuple, Any, List
|
|||||||
import requests
|
import requests
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
||||||
|
|
||||||
@ -29,3 +31,42 @@ def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
|
|||||||
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType:
|
||||||
|
if pd.isnull(street):
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={
|
||||||
|
'street': street
|
||||||
|
}) as r:
|
||||||
|
res = await r.json()
|
||||||
|
|
||||||
|
if 'error' in res:
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
return res['Building_ID'], res['Latitude'], res['Longitude']
|
||||||
|
|
||||||
|
|
||||||
|
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
tasks.append(
|
||||||
|
asyncio.ensure_future(
|
||||||
|
async_fetch_building_id(session, row['Улица'])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
res = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
df[['ID здания', 'Широта', 'Долгота']] = res
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
|
||||||
|
return asyncio.run(
|
||||||
|
async_fetch_building_ids(df)
|
||||||
|
)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from . import RossetiParser, split_addresses, fetch_builing_ids, preprocess_df
|
from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df
|
||||||
|
|
||||||
|
|
||||||
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
||||||
@ -11,7 +11,7 @@ def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
|||||||
|
|
||||||
parser.df = split_addresses(parser.df)
|
parser.df = split_addresses(parser.df)
|
||||||
|
|
||||||
parser.df = fetch_builing_ids(parser.df)
|
parser.df = concurrent_fetch_builing_ids(parser.df)
|
||||||
|
|
||||||
parser.df = preprocess_df(parser.df)
|
parser.df = preprocess_df(parser.df)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user