Compare commits
No commits in common. "95ce7d77a528ac5b88b57fd292e1e45306f4883d" and "c705a0ed0216769b242154ac5aa95e8856a72882" have entirely different histories.
95ce7d77a5
...
c705a0ed02
451
analysis.ipynb
451
analysis.ipynb
File diff suppressed because one or more lines are too long
@ -40,7 +40,7 @@ def contains_lower(name, val):
|
|||||||
return getattr(models.Record, name) == val
|
return getattr(models.Record, name) == val
|
||||||
|
|
||||||
|
|
||||||
def and_if_can(a: BinaryExpression, b: Optional[BinaryExpression]) -> BinaryExpression:
|
def and_if_can(a: BinaryExpression, b: Optional[BinaryExpression]):
|
||||||
if b is not None:
|
if b is not None:
|
||||||
return a & b
|
return a & b
|
||||||
else:
|
else:
|
||||||
@ -63,8 +63,8 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec
|
|||||||
|
|
||||||
if query is None:
|
if query is None:
|
||||||
res = db.query(models.Record).all()
|
res = db.query(models.Record).all()
|
||||||
else:
|
|
||||||
res = db.query(models.Record).filter(query).all()
|
res = db.query(models.Record).filter(query).all()
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -1,10 +1,6 @@
|
|||||||
aiohttp==3.8.5
|
|
||||||
aiosignal==1.3.1
|
|
||||||
annotated-types==0.5.0
|
annotated-types==0.5.0
|
||||||
anyio==3.7.1
|
anyio==3.7.1
|
||||||
asttokens==2.4.0
|
asttokens==2.4.0
|
||||||
async-timeout==4.0.3
|
|
||||||
attrs==23.1.0
|
|
||||||
autopep8==2.0.4
|
autopep8==2.0.4
|
||||||
backcall==0.2.0
|
backcall==0.2.0
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
@ -18,7 +14,6 @@ decorator==5.1.1
|
|||||||
executing==1.2.0
|
executing==1.2.0
|
||||||
fastapi==0.103.1
|
fastapi==0.103.1
|
||||||
fonttools==4.42.1
|
fonttools==4.42.1
|
||||||
frozenlist==1.4.0
|
|
||||||
greenlet==2.0.2
|
greenlet==2.0.2
|
||||||
idna==3.4
|
idna==3.4
|
||||||
ipykernel==6.25.2
|
ipykernel==6.25.2
|
||||||
@ -30,7 +25,6 @@ kiwisolver==1.4.5
|
|||||||
lxml==4.9.3
|
lxml==4.9.3
|
||||||
matplotlib==3.8.0
|
matplotlib==3.8.0
|
||||||
matplotlib-inline==0.1.6
|
matplotlib-inline==0.1.6
|
||||||
multidict==6.0.4
|
|
||||||
mypy==1.5.1
|
mypy==1.5.1
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
nest-asyncio==1.5.8
|
nest-asyncio==1.5.8
|
||||||
@ -76,4 +70,3 @@ typing_extensions==4.8.0
|
|||||||
tzdata==2023.3
|
tzdata==2023.3
|
||||||
urllib3==2.0.4
|
urllib3==2.0.4
|
||||||
wcwidth==0.2.6
|
wcwidth==0.2.6
|
||||||
yarl==1.9.2
|
|
||||||
|
@ -1,21 +1,15 @@
|
|||||||
aiohttp==3.8.5
|
|
||||||
aiosignal==1.3.1
|
|
||||||
annotated-types==0.5.0
|
annotated-types==0.5.0
|
||||||
anyio==3.7.1
|
anyio==3.7.1
|
||||||
async-timeout==4.0.3
|
|
||||||
attrs==23.1.0
|
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
bs4==0.0.1
|
bs4==0.0.1
|
||||||
certifi==2023.7.22
|
certifi==2023.7.22
|
||||||
charset-normalizer==3.2.0
|
charset-normalizer==3.2.0
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
fastapi==0.103.1
|
fastapi==0.103.1
|
||||||
frozenlist==1.4.0
|
|
||||||
greenlet==2.0.2
|
greenlet==2.0.2
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
idna==3.4
|
idna==3.4
|
||||||
lxml==4.9.3
|
lxml==4.9.3
|
||||||
multidict==6.0.4
|
|
||||||
numpy==1.26.0
|
numpy==1.26.0
|
||||||
pandas==2.1.1
|
pandas==2.1.1
|
||||||
psycopg==3.1.10
|
psycopg==3.1.10
|
||||||
@ -36,4 +30,3 @@ typing_extensions==4.8.0
|
|||||||
tzdata==2023.3
|
tzdata==2023.3
|
||||||
urllib3==2.0.5
|
urllib3==2.0.5
|
||||||
uvicorn==0.23.2
|
uvicorn==0.23.2
|
||||||
yarl==1.9.2
|
|
||||||
|
@ -33,14 +33,6 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
|
|||||||
```python
|
```python
|
||||||
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||||
```
|
```
|
||||||
- `async_fetch_building_ids`:
|
|
||||||
```python
|
|
||||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
|
|
||||||
```
|
|
||||||
- `concurrent_fetch_builing_ids`:
|
|
||||||
```python
|
|
||||||
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame
|
|
||||||
```
|
|
||||||
- `preprocess_df`:
|
- `preprocess_df`:
|
||||||
```python
|
```python
|
||||||
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from .rosseti import RossetiParser
|
from .rosseti import RossetiParser
|
||||||
from .address import split_addresses
|
from .address import split_addresses
|
||||||
from .building_id import fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids
|
from .building_id import fetch_builing_ids
|
||||||
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
||||||
from .util import pipeline
|
from .util import pipeline
|
||||||
|
@ -4,8 +4,6 @@ from typing import Optional, Tuple, Any, List
|
|||||||
import requests
|
import requests
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import asyncio
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
||||||
|
|
||||||
@ -31,42 +29,3 @@ def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
|
|||||||
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType:
|
|
||||||
if pd.isnull(street):
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={
|
|
||||||
'street': street
|
|
||||||
}) as r:
|
|
||||||
res = await r.json()
|
|
||||||
|
|
||||||
if 'error' in res:
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
return res['Building_ID'], res['Latitude'], res['Longitude']
|
|
||||||
|
|
||||||
|
|
||||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
tasks = []
|
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
tasks.append(
|
|
||||||
asyncio.ensure_future(
|
|
||||||
async_fetch_building_id(session, row['Улица'])
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
res = await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
df[['ID здания', 'Широта', 'Долгота']] = res
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
|
|
||||||
return asyncio.run(
|
|
||||||
async_fetch_building_ids(df)
|
|
||||||
)
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df
|
from . import RossetiParser, split_addresses, fetch_builing_ids, preprocess_df
|
||||||
|
|
||||||
|
|
||||||
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
||||||
@ -11,7 +11,7 @@ def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
|||||||
|
|
||||||
parser.df = split_addresses(parser.df)
|
parser.df = split_addresses(parser.df)
|
||||||
|
|
||||||
parser.df = concurrent_fetch_builing_ids(parser.df)
|
parser.df = fetch_builing_ids(parser.df)
|
||||||
|
|
||||||
parser.df = preprocess_df(parser.df)
|
parser.df = preprocess_df(parser.df)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user