lenengro_parser/parser/building_id.py

74 lines
1.8 KiB
Python

from __future__ import annotations
import asyncio
from typing import Optional, Tuple
import aiohttp
import pandas as pd
import requests
GeoTupleType = Tuple[Optional[int], Optional[float], Optional[float]]
def get_building_id(street: str) -> GeoTupleType:
if pd.isnull(street):
return None, None, None
r = requests.get(
"https://geocode.gate.petersburg.ru/parse/eas",
params={"street": street},
timeout=10,
)
res = r.json()
if "error" in res:
return None, None, None
return res["Building_ID"], res["Latitude"], res["Longitude"]
def fetch_builing_ids(df: pd.DataFrame) -> pd.DataFrame:
df[["ID здания", "Широта", "Долгота"]] = df.apply(
lambda row: get_building_id(row["Улица"]), axis=1, result_type="expand"
)
return df
async def async_fetch_building_id(
session: aiohttp.ClientSession, street: str
) -> GeoTupleType:
if pd.isnull(street):
return None, None, None
async with session.get(
"https://geocode.gate.petersburg.ru/parse/eas", params={"street": street}
) as r:
res = await r.json()
if "error" in res:
return None, None, None
return res["Building_ID"], res["Latitude"], res["Longitude"]
async def async_fetch_building_ids(df: pd.DataFrame) -> pd.DataFrame:
async with aiohttp.ClientSession() as session:
tasks = []
for _, row in df.iterrows():
tasks.append(
asyncio.ensure_future(async_fetch_building_id(session, row["Улица"]))
)
res = await asyncio.gather(*tasks)
df[["ID здания", "Широта", "Долгота"]] = res
return df
def concurrent_fetch_builing_ids(df: pd.Dataframe) -> pd.DataFrame:
return asyncio.run(async_fetch_building_ids(df))