pfsm

Python Fast Strings Matching


Keywords
strings, matching, distance, cosine, similarity, ngrams, tf-idf, fast, python
License
GPL-3.0
Install
pip install pfsm==0.1.3

Documentation

Python Fast Strings Matching

This package provides a fast way to perform strings matching tasks.

Installing pfsm

pip install pfsm

Example

from pfsm import *
import numpy as np

def stringsVsStrings(array_1, array_2):
    l = len(array_1)
    obj = Strings(np.concatenate((np.array(array_1), np.array(array_2))))
    obj.processStrings()
    obj.tfidfM()
    return cosimtop(obj.M[0:l,], obj.M[l:, ].transpose(), 3, 0.5)

M = stringsVsStrings(["Company X", "Bank XXYYZZ"], ["Bank XYZ", "Sample Corporation", "X Company"]).todense()
from pfsm import *

def getIdsFromStrings(strings):
    obj_strings = Strings(strings)
    obj_strings.processStrings()
    obj_strings.tfidfM()
    obj_strings.groupByKMeans()
    obj_strings.getEntityID()
    return obj_strings.groups

dtf_strings_ids = getIdsFromStrings(["Company X", "Bank XXYYZZ", "Bank XYZ", "Sample Corporation", "X Company"])