Don't allow matches from shows only matching 'the'
parent
2f4695510a
commit
07440d72d0
|
@ -2,6 +2,8 @@ package match
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/agnivade/levenshtein"
|
||||
)
|
||||
|
@ -17,10 +19,12 @@ type match struct {
|
|||
func FindBestMatch(candidate string, index []string) (string, error) {
|
||||
haveCommonPrefix := []string{}
|
||||
for _, indexEntry := range index {
|
||||
prefix := getLongestCommonPrefix(candidate, indexEntry)
|
||||
// Single character prefixes aren't that helpful.
|
||||
// I don't want to get into threshold territory by playing with a number higher than this, though
|
||||
if len(prefix) > 1 {
|
||||
prefix := getLongestCommonPrefix(
|
||||
normalizeForMatch(candidate),
|
||||
normalizeForMatch(indexEntry),
|
||||
)
|
||||
|
||||
if isPrefixMeaningful(prefix) {
|
||||
haveCommonPrefix = append(haveCommonPrefix, indexEntry)
|
||||
}
|
||||
}
|
||||
|
@ -61,3 +65,24 @@ func findMinScoreMatch(matches []match) match {
|
|||
|
||||
return minScorematch
|
||||
}
|
||||
|
||||
func normalizeForMatch(target string) string {
|
||||
return filterChars(target, func(r rune) bool {
|
||||
return !isCharLikelySeparator(r)
|
||||
})
|
||||
}
|
||||
|
||||
func filterChars(target string, shouldAllowChar func(s rune) bool) string {
|
||||
builder := strings.Builder{}
|
||||
for _, char := range target {
|
||||
if shouldAllowChar(char) {
|
||||
builder.WriteRune(char)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
func isCharLikelySeparator(r rune) bool {
|
||||
return !unicode.IsNumber(r) && !unicode.IsLetter(r)
|
||||
}
|
||||
|
|
|
@ -78,6 +78,13 @@ func TestFindBestMatch(t *testing.T) {
|
|||
expectedMatch: "",
|
||||
expectedError: ErrNoMatches,
|
||||
},
|
||||
{
|
||||
// two have a common prefix of "the-a" but really there should be no match
|
||||
candidate: "the-amazing-gymnist",
|
||||
index: []string{"the-athlete", "sitting-at-home", "the-show-about-nothing"},
|
||||
expectedMatch: "",
|
||||
expectedError: ErrNoMatches,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tt {
|
||||
|
@ -89,7 +96,7 @@ func TestFindBestMatch(t *testing.T) {
|
|||
t.Run(testName, func(t *testing.T) {
|
||||
match, err := FindBestMatch(test.candidate, test.index)
|
||||
if test.expectedError != nil {
|
||||
assert.ErrorIs(t, err, ErrNoMatches, fmt.Sprintf("got %s", match))
|
||||
assert.ErrorIs(t, err, ErrNoMatches, fmt.Sprintf("got match %q and error %q", match, err))
|
||||
} else {
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, test.expectedMatch, match)
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func getLongestCommonPrefix(a, b string) string {
|
||||
shortestString, longestString := getShortestAndLongestString(a, b)
|
||||
for i := range shortestString {
|
||||
|
@ -19,3 +23,38 @@ func getShortestAndLongestString(a, b string) (string, string) {
|
|||
return a, b
|
||||
}
|
||||
}
|
||||
|
||||
// isPrefixMeaningful checks if the given of a match is truly meaningful enough to allow it to be considered a
|
||||
// good enough match to continue processing. For instance, a longest common prefix of a single letter is not really
|
||||
// all that meaningful.
|
||||
func isPrefixMeaningful(prefix string) bool {
|
||||
// Single character prefixes aren't that helpful.
|
||||
// I don't want to get into threshold territory by playing with a number higher than this, though
|
||||
if len(prefix) <= 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// A match whose prefix is "the" (possibly followed by a separator) is probably not that useful of a match
|
||||
return isPrefixMeaningfulWithoutThe(prefix)
|
||||
}
|
||||
|
||||
// Check if the start of a given prefix is 'the', and if it is, whether removing it would still procure a
|
||||
// meaningful match
|
||||
func isPrefixMeaningfulWithoutThe(prefix string) bool {
|
||||
if len(prefix) < 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
startsWithThe := strings.ToLower(prefix[:3]) == "the"
|
||||
if !startsWithThe {
|
||||
return true
|
||||
}
|
||||
|
||||
trimmedPrefix := strings.TrimFunc(prefix, isCharLikelySeparator)
|
||||
if len(trimmedPrefix) <= 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
prefixWithoutThe := strings.TrimFunc(trimmedPrefix[3:], isCharLikelySeparator)
|
||||
return isPrefixMeaningful(prefixWithoutThe)
|
||||
}
|
||||
|
|
|
@ -39,3 +39,40 @@ func TestLongestCommonPrefix(t *testing.T) {
|
|||
assert.Equal(t, test.expectedPrefix, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrefixMeaningful(t *testing.T) {
|
||||
tt := []struct {
|
||||
prefix string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
prefix: "Brooklyn",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
prefix: "b",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
prefix: "The",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
prefix: "The-",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
prefix: "The Runner",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
prefix: "The R",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tt {
|
||||
isMeaningful := isPrefixMeaningful(test.prefix)
|
||||
assert.Equal(t, test.expected, isMeaningful)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,15 +46,27 @@ func TestFindBestFolder(t *testing.T) {
|
|||
"",
|
||||
match.ErrNoMatches,
|
||||
},
|
||||
{
|
||||
"A match with a non-meaningful prefix ('the') should not match",
|
||||
"The.Amazing.Spider-Man.1977.S01.VHSRip",
|
||||
[]string{"The-Good-Place", "The-Simpsons", "Stranger-Things", "Brooklyn-Nine-Nine", "The-IT-Crowd"},
|
||||
"",
|
||||
match.ErrNoMatches,
|
||||
},
|
||||
{
|
||||
"A good match starting with 'the' should still match",
|
||||
"The.Simpsons.S06.720p.DSNP.WEB-DL.DDP5.1.H.264-MZABI",
|
||||
[]string{"The-Good-Place", "The-Simpsons", "Stranger-Things", "Brooklyn-Nine-Nine", "The-IT-Crowd"},
|
||||
"The-Simpsons",
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
t.Run(test.testName, func(t *testing.T) {
|
||||
|
||||
foundFolder, err := findShowFolder(test.fileName, test.folders)
|
||||
|
||||
if test.expectedError != nil {
|
||||
assert.ErrorIs(t, err, test.expectedError, fmt.Sprintf("got %s", foundFolder))
|
||||
assert.ErrorIs(t, err, test.expectedError, fmt.Sprintf("got show %q and error %q", foundFolder, err))
|
||||
} else {
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, test.expectedFolder, foundFolder)
|
||||
|
|
Loading…
Reference in New Issue