gobbler/categorize/match/prefix.go

61 lines
1.7 KiB
Go

package match
import (
"strings"
)
func getLongestCommonPrefix(a, b string) string {
shortestString, longestString := getShortestAndLongestString(a, b)
for i := range shortestString {
if shortestString[i] != longestString[i] {
return longestString[:i]
}
}
// If we get through the loop without returning, we must have fully matched the shortest string
return shortestString
}
func getShortestAndLongestString(a, b string) (string, string) {
if len(a) > len(b) {
return b, a
} else {
return a, b
}
}
// isPrefixMeaningful checks if the given of a match is truly meaningful enough to allow it to be considered a
// good enough match to continue processing. For instance, a longest common prefix of a single letter is not really
// all that meaningful.
func isPrefixMeaningful(prefix string) bool {
// Single character prefixes aren't that helpful.
// I don't want to get into threshold territory by playing with a number higher than this, though
if len(prefix) <= 1 {
return false
}
// A match whose prefix is "the" (possibly followed by a separator) is probably not that useful of a match
return isPrefixMeaningfulWithoutThe(prefix)
}
// Check if the start of a given prefix is 'the', and if it is, whether removing it would still procure a
// meaningful match
func isPrefixMeaningfulWithoutThe(prefix string) bool {
if len(prefix) < 3 {
return false
}
startsWithThe := strings.ToLower(prefix[:3]) == "the"
if !startsWithThe {
return true
}
trimmedPrefix := strings.TrimFunc(prefix, isCharLikelySeparator)
if len(trimmedPrefix) <= 3 {
return false
}
prefixWithoutThe := strings.TrimFunc(trimmedPrefix[3:], isCharLikelySeparator)
return isPrefixMeaningful(prefixWithoutThe)
}