gobbler/categorize/match/match.go

89 lines
1.9 KiB
Go

package match
import (
"errors"
"strings"
"unicode"
"github.com/agnivade/levenshtein"
)
var ErrNoMatches = errors.New("no match found")
// match holds a string that matches to a given string, and a score indicating their similarity (lower is better)
type match struct {
Str string
Score int
}
func FindBestMatch(candidate string, index []string) (string, error) {
haveCommonPrefix := []string{}
for _, indexEntry := range index {
prefix := getLongestCommonPrefix(
normalizeForMatch(candidate),
normalizeForMatch(indexEntry),
)
if isPrefixMeaningful(prefix) {
haveCommonPrefix = append(haveCommonPrefix, indexEntry)
}
}
if len(haveCommonPrefix) == 0 {
return "", ErrNoMatches
}
return FindClosestString(candidate, haveCommonPrefix), nil
}
// Finds the string in "index" that best matches to candidate
func FindClosestString(candidate string, index []string) string {
matches := calculateScores(candidate, index)
return findMinScoreMatch(matches).Str
}
func calculateScores(candidate string, showFolders []string) []match {
matches := make([]match, len(showFolders))
for i, showFolder := range showFolders {
matches[i] = match{
Str: showFolder,
Score: levenshtein.ComputeDistance(candidate, showFolder),
}
}
return matches
}
func findMinScoreMatch(matches []match) match {
minScorematch := matches[0]
for _, match := range matches {
if match.Score < minScorematch.Score {
minScorematch = match
}
}
return minScorematch
}
func normalizeForMatch(target string) string {
return filterChars(target, func(r rune) bool {
return !isCharLikelySeparator(r)
})
}
func filterChars(target string, shouldAllowChar func(s rune) bool) string {
builder := strings.Builder{}
for _, char := range target {
if shouldAllowChar(char) {
builder.WriteRune(char)
}
}
return builder.String()
}
func isCharLikelySeparator(r rune) bool {
return !unicode.IsNumber(r) && !unicode.IsLetter(r)
}