Compare commits
No commits in common. "95ce7d77a528ac5b88b57fd292e1e45306f4883d" and "c705a0ed0216769b242154ac5aa95e8856a72882" have entirely different histories.
95ce7d77a5
...
c705a0ed02
451
analysis.ipynb
451
analysis.ipynb
File diff suppressed because one or more lines are too long
@ -40,7 +40,7 @@ def contains_lower(name, val):
|
||||
return getattr(models.Record, name) == val
|
||||
|
||||
|
||||
def and_if_can(a: BinaryExpression, b: Optional[BinaryExpression]) -> BinaryExpression:
|
||||
def and_if_can(a: BinaryExpression, b: Optional[BinaryExpression]):
|
||||
if b is not None:
|
||||
return a & b
|
||||
else:
|
||||
@ -63,8 +63,8 @@ def search_each(db: Session, filters: schemas.RecordRequest) -> List[schemas.Rec
|
||||
|
||||
if query is None:
|
||||
res = db.query(models.Record).all()
|
||||
else:
|
||||
res = db.query(models.Record).filter(query).all()
|
||||
|
||||
res = db.query(models.Record).filter(query).all()
|
||||
|
||||
return res
|
||||
|
||||
|
@ -1,10 +1,6 @@
|
||||
aiohttp==3.8.5
|
||||
aiosignal==1.3.1
|
||||
annotated-types==0.5.0
|
||||
anyio==3.7.1
|
||||
asttokens==2.4.0
|
||||
async-timeout==4.0.3
|
||||
attrs==23.1.0
|
||||
autopep8==2.0.4
|
||||
backcall==0.2.0
|
||||
beautifulsoup4==4.12.2
|
||||
@ -18,7 +14,6 @@ decorator==5.1.1
|
||||
executing==1.2.0
|
||||
fastapi==0.103.1
|
||||
fonttools==4.42.1
|
||||
frozenlist==1.4.0
|
||||
greenlet==2.0.2
|
||||
idna==3.4
|
||||
ipykernel==6.25.2
|
||||
@ -30,7 +25,6 @@ kiwisolver==1.4.5
|
||||
lxml==4.9.3
|
||||
matplotlib==3.8.0
|
||||
matplotlib-inline==0.1.6
|
||||
multidict==6.0.4
|
||||
mypy==1.5.1
|
||||
mypy-extensions==1.0.0
|
||||
nest-asyncio==1.5.8
|
||||
@ -76,4 +70,3 @@ typing_extensions==4.8.0
|
||||
tzdata==2023.3
|
||||
urllib3==2.0.4
|
||||
wcwidth==0.2.6
|
||||
yarl==1.9.2
|
||||
|
@ -1,21 +1,15 @@
|
||||
aiohttp==3.8.5
|
||||
aiosignal==1.3.1
|
||||
annotated-types==0.5.0
|
||||
anyio==3.7.1
|
||||
async-timeout==4.0.3
|
||||
attrs==23.1.0
|
||||
beautifulsoup4==4.12.2
|
||||
bs4==0.0.1
|
||||
certifi==2023.7.22
|
||||
charset-normalizer==3.2.0
|
||||
click==8.1.7
|
||||
fastapi==0.103.1
|
||||
frozenlist==1.4.0
|
||||
greenlet==2.0.2
|
||||
h11==0.14.0
|
||||
idna==3.4
|
||||
lxml==4.9.3
|
||||
multidict==6.0.4
|
||||
numpy==1.26.0
|
||||
pandas==2.1.1
|
||||
psycopg==3.1.10
|
||||
@ -36,4 +30,3 @@ typing_extensions==4.8.0
|
||||
tzdata==2023.3
|
||||
urllib3==2.0.5
|
||||
uvicorn==0.23.2
|
||||
yarl==1.9.2
|
||||
|
@ -33,14 +33,6 @@ def split_addresses(df: pd.DataFrame) -> pd.DataFrame
|
||||
```python
|
||||
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||
```
|
||||
- `async_fetch_building_ids`:
|
||||
```python
|
||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame
|
||||
```
|
||||
- `concurrent_fetch_builing_ids`:
|
||||
```python
|
||||
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame
|
||||
```
|
||||
- `preprocess_df`:
|
||||
```python
|
||||
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .rosseti import RossetiParser
|
||||
from .address import split_addresses
|
||||
from .building_id import fetch_builing_ids, async_fetch_building_ids, concurrent_fetch_builing_ids
|
||||
from .building_id import fetch_builing_ids
|
||||
from .preprocess import preprocess_df, COL_NS, ICOL_NS, preprocess_read_df, group_by_index
|
||||
from .util import pipeline
|
||||
|
@ -4,8 +4,6 @@ from typing import Optional, Tuple, Any, List
|
||||
import requests
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
|
||||
|
||||
@ -31,42 +29,3 @@ def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
|
||||
lambda row: get_building_id(row['Улица']), axis=1, result_type='expand')
|
||||
|
||||
return df
|
||||
|
||||
|
||||
async def async_fetch_building_id(session: aiohttp.ClientSession, street: str) -> GeoTupleType:
|
||||
if pd.isnull(street):
|
||||
return None, None, None
|
||||
|
||||
async with session.get('https://geocode.gate.petersburg.ru/parse/eas', params={
|
||||
'street': street
|
||||
}) as r:
|
||||
res = await r.json()
|
||||
|
||||
if 'error' in res:
|
||||
return None, None, None
|
||||
|
||||
return res['Building_ID'], res['Latitude'], res['Longitude']
|
||||
|
||||
|
||||
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = []
|
||||
|
||||
for _, row in df.iterrows():
|
||||
tasks.append(
|
||||
asyncio.ensure_future(
|
||||
async_fetch_building_id(session, row['Улица'])
|
||||
)
|
||||
)
|
||||
|
||||
res = await asyncio.gather(*tasks)
|
||||
|
||||
df[['ID здания', 'Широта', 'Долгота']] = res
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
|
||||
return asyncio.run(
|
||||
async_fetch_building_ids(df)
|
||||
)
|
||||
|
@ -1,6 +1,6 @@
|
||||
from typing import Optional
|
||||
|
||||
from . import RossetiParser, split_addresses, concurrent_fetch_builing_ids, preprocess_df
|
||||
from . import RossetiParser, split_addresses, fetch_builing_ids, preprocess_df
|
||||
|
||||
|
||||
def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
||||
@ -11,7 +11,7 @@ def pipeline(parser: Optional[RossetiParser] = None) -> RossetiParser:
|
||||
|
||||
parser.df = split_addresses(parser.df)
|
||||
|
||||
parser.df = concurrent_fetch_builing_ids(parser.df)
|
||||
parser.df = fetch_builing_ids(parser.df)
|
||||
|
||||
parser.df = preprocess_df(parser.df)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user