Implement fuzzy search and ranking

The regex based method to find key matches does not work very well for deeper
hierarchies and partial matching of the hierarchy levels. This approach uses the
SequenceMatcher from the difflib module to compute a score for an entry and rank
them accordingly.

This change also filters out all hidden file system entries to avoid traversing
and evaluating Git repositories which are commonly used to store pass storages.
This commit is contained in:
Matthias Vogelgesang 2017-09-04 11:23:52 +02:00
parent d400a05435
commit 119a5c42f0

View File

@ -27,8 +27,9 @@
from os import getenv
from os import walk
from os.path import expanduser
from os.path import expanduser, join as path_join
import re
import difflib
import subprocess
import dbus
@ -81,22 +82,25 @@ class SearchPassService(dbus.service.Object):
pass
def get_result_set(self, terms):
names = []
for term in terms:
names += self.get_password_names(term)
return set(names)
name = ' '.join(terms)
matcher = difflib.SequenceMatcher(b=name, autojunk=False)
matches = []
def get_password_names(self, name):
names = []
for root, dirs, files in walk(self.password_store):
dir_path = root[len(self.password_store) + 1:]
for file in files:
file_path = '{0}/{1}'.format(dir_path, file)
if re.match(r'.*{0}.*\.gpg$'.format(name),
file_path,
re.IGNORECASE):
names.append(file_path[:-4])
return names
if dir_path.startswith('.'):
continue
for filename in files:
path = path_join(dir_path, filename)
matcher.set_seq1(path[:-4])
score = matcher.ratio()
if score >= 0.5:
matches.append((score, path[:-4]))
return [x[1] for x in sorted(matches, key=lambda x: x[0], reverse=True)]
def send_password_to_gpaste(self, name):
pass_cmd = subprocess.run(