New fuctions
This commit is contained in:
parent
a71acc2ddf
commit
1fd7a123f9
@ -130,6 +130,21 @@ def address_classification(token: pd.Series, pre_token: pd.Series) -> pd.Series:
|
||||
token["obj"] = re.sub(r"\(\)", brackets.group(), token["obj"])
|
||||
return token
|
||||
|
||||
def cut_address(ad: pd.Series, cl: str) -> pd.Series:
|
||||
while ad["class"] and CLASSES.index(ad["class"][-1]) > CLASSES.index(cl[0]):
|
||||
if ad["class"][-1] == "h":
|
||||
ad["address"] = re.sub(r"[мкдтпучасток]*\.? ?\d{1,4} ?\/*\d* ?", "",
|
||||
ad["address"].lower())
|
||||
elif ad["class"][-1] == "b":
|
||||
num = re.findall("к{0,1}\.? ?\d", ad["address"])[-1]
|
||||
ad["address"] = re.sub(num, "", ad["address"])
|
||||
elif ad["class"][-1] == "l":
|
||||
ad["address"] = re.sub(r"[литера]*\.? ?[А-Яа-я]{1}$", "", ad["address"])
|
||||
elif ad["class"][-1] == "r":
|
||||
ad["address"] = re.sub(r"пом\.? ?\d+", "", ad["address"])
|
||||
ad["class"] = ad["class"][:-1]
|
||||
return ad
|
||||
|
||||
|
||||
# TODO: переработать систему из if в нормальный вид
|
||||
def split_address(address: str) -> List[str]:
|
||||
@ -163,31 +178,27 @@ def split_address(address: str) -> List[str]:
|
||||
accumulator["class"] = cur_tk['class']
|
||||
accumulator["address"] = cur_tk["obj"]
|
||||
continue
|
||||
|
||||
if CLASSES.index(accumulator["class"][-1]) < CLASSES.index(cur_tk["class"][0]) and accumulator["class"]!="w":
|
||||
accumulator["class"] += cur_tk['class']
|
||||
accumulator["address"] += " " + cur_tk["obj"]
|
||||
else:
|
||||
ad_no_ranges = unfold_house_ranges(accumulator["address"])
|
||||
accumulator["address"] = ad_no_ranges[-1]
|
||||
|
||||
res.extend(ad_no_ranges)
|
||||
while accumulator["class"] and CLASSES.index(accumulator["class"][-1]) > CLASSES.index(cur_tk["class"][0]):
|
||||
if accumulator["class"][-1] == "h":
|
||||
accumulator["address"] = re.sub(r"[мкдтпучасток]*\.? ?\d{1,4} ?\/*\d* ?", "", accumulator["address"].lower())
|
||||
elif accumulator["class"][-1] == "b":
|
||||
num = re.findall("к{0,1}\.? ?\d", accumulator["address"])[-1]
|
||||
accumulator["address"] = re.sub(num, "", accumulator["address"])
|
||||
elif accumulator["class"][-1] == "l":
|
||||
accumulator ["address"] = re.sub(r"[литера]*\.? ?[А-Яа-я]{1}$","", accumulator["address"])
|
||||
elif accumulator["class"][-1] == "r":
|
||||
accumulator["address"] = re.sub(r"пом\.? ?\d+","", accumulator["address"])
|
||||
accumulator["class"] = accumulator["class"][:-1]
|
||||
|
||||
accumulator = cut_address(accumulator, cur_tk["class"])
|
||||
|
||||
if not accumulator["class"] or CLASSES.index(cur_tk["class"][0]) <= CLASSES.index("s") or accumulator["class"]=="w":
|
||||
accumulator["class"] = cur_tk["class"]
|
||||
accumulator["address"] = cur_tk["obj"]
|
||||
|
||||
if cur_tk["class"][0] == "h":
|
||||
num = re.findall("\d{1,4} ?\/?\d* ?", cur_tk['obj'])[0]
|
||||
accumulator["address"] = re.sub(r"\d{1,4} ?\/*\d* ?", num, accumulator["address"])
|
||||
cur_tk["class"] =cur_tk["class"][1:]
|
||||
|
||||
if cur_tk["class"] and cur_tk["class"][0] == "b":
|
||||
num = re.findall("\d", cur_tk["obj"])[-1]
|
||||
if num and not "b" in accumulator["class"]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user