gobbler/categorize/tv/season.go

176 lines
5.5 KiB
Go

package tv
import (
"errors"
"regexp"
"strconv"
"unicode/utf8"
)
const seasonPattern = `(?i)S(?:eason(?P<sep>.*?))?(?P<seasonNumber>\d+)`
// These two could probably be combined into one expression, but it's not worth the complexity of attempting to do so
const seasonRangePattern = `(?i)S(?:eason(?P<sep>.*?))?(?P<rangeStart>\d+)\s*-\s*(?:S(?:eason(?P<sep2>.*?))?)?(?P<rangeEnd>\d+)`
var errNoSeasonInfo = errors.New("could not find season info")
type seasonRangeMatch struct {
fullRangeMatch string
rangeStart string
rangeEnd string
}
// GetSeasonForFuncs gets all of the seasons that a filename could contain, whether that be a range of seasons,
// or just a single season. errNoSeasonInfo is returned if there is no season finromation that coudl be found
func GetSeasonsForFile(filename string) ([]int, error) {
getSeasonFuncs := []func(string) ([]int, error){
getSeasonRangeForFile,
func(filename string) ([]int, error) {
seasonNumber, err := getSingleSeasonForFile(filename)
return []int{seasonNumber}, err
},
}
// Try all extraction functions, yielding the first result
for _, extractFunc := range getSeasonFuncs {
seasonString, err := extractFunc(filename)
if err == nil {
return seasonString, nil
} else if !errors.Is(err, errNoSeasonInfo) {
return nil, err
}
}
return nil, errNoSeasonInfo
}
func getSeasonRangeForFile(filename string) ([]int, error) {
seasonRange, err := extractSeasonRangeString(filename)
if err != nil {
return nil, errNoSeasonInfo
}
rangeStart, rangeEnd := mustAtoi(seasonRange.rangeStart), mustAtoi(seasonRange.rangeEnd)
rangeSize := rangeEnd - rangeStart + 1
seasonNumbers := make([]int, rangeSize)
for i := range seasonNumbers {
seasonNumbers[i] = rangeStart + i
}
return seasonNumbers, nil
}
func getSingleSeasonForFile(filename string) (int, error) {
rawSeasonNumber, err := extractSingleSeasonNumberString(filename)
if err != nil {
return 0, errNoSeasonInfo
}
return mustAtoi(rawSeasonNumber), nil
}
// extractSeasonNumber will get the full string of the season representation in the filename, without any kind of
// separation or conversion. Returns errNoSeasonInfo if no season could be found
func extractSeasonNumberString(filename string) (string, error) {
extractFuncs := []func(string) (string, error){
func(filename string) (string, error) {
res, err := extractSeasonRangeString(filename)
return res.fullRangeMatch, err
},
extractSingleSeasonNumberString,
}
// Try all extraction functions, yielding the first result
for _, extractFunc := range extractFuncs {
seasonString, err := extractFunc(filename)
if err == nil {
return seasonString, nil
} else if !errors.Is(err, errNoSeasonInfo) {
return "", err
}
}
return "", errNoSeasonInfo
}
// extractSingleSeasonString will get the string representing a single season in a filename. This may have false matches
// if a ranged string is given (as the pattern will likely see the first season in the range and decide that's the right
// one to extract). Retrusn errNoSeasonInfo if no season could be found.
func extractSingleSeasonNumberString(filename string) (string, error) {
seasonRegex := regexp.MustCompile(seasonPattern)
match := seasonRegex.FindStringSubmatch(filename)
if len(match) == 0 {
return "", errNoSeasonInfo
}
namedGroups := matchIndexToGroupNames(seasonRegex, match)
// We don't want to match S(some garbage)number, unless it's truly only the same separator repeated.
// Regex is not powerful enough to check for this on its own so we must do it manually
if !onlyConsistsOfOneChar(namedGroups["sep"]) {
return "", errNoSeasonInfo
}
return namedGroups["seasonNumber"], nil
}
// extractSeasonRangeString will get the string representing a season range in a filename. Returns errNoSeasonInfo
// if no season could be found
func extractSeasonRangeString(filename string) (seasonRangeMatch, error) {
seasonRegex := regexp.MustCompile(seasonRangePattern)
match := seasonRegex.FindStringSubmatch(filename)
if len(match) == 0 {
return seasonRangeMatch{}, errNoSeasonInfo
}
namedGroups := matchIndexToGroupNames(seasonRegex, match)
// We don't want to match S(some garbage)number, unless it's truly only the same separator repeated.
// Regex is not powerful enough to check for this on its own so we must do it manually
if !onlyConsistsOfOneChar(namedGroups["sep"]) || !onlyConsistsOfOneChar(namedGroups["sep2"]) {
return seasonRangeMatch{}, errNoSeasonInfo
}
return seasonRangeMatch{
fullRangeMatch: match[0],
rangeStart: namedGroups["rangeStart"],
rangeEnd: namedGroups["rangeEnd"],
}, nil
}
// matchIndexToGroupNames converts the result form FindStringSubmatch to a map of the named groups
func matchIndexToGroupNames(pattern *regexp.Regexp, groups []string) map[string]string {
res := map[string]string{}
for i, name := range pattern.SubexpNames() {
res[name] = groups[i]
}
return res
}
// onlyConsistsOfOneChar checks if a given string consists of only one char, e.g. "aaaa" returns true, but "abba"
// returns false. The empty string, despite having no chars, is defined to meet this condition.
func onlyConsistsOfOneChar(s string) bool {
if len(s) == 0 {
return true
}
firstRune, _ := utf8.DecodeRuneInString(s)
for _, c := range s[1:] {
if c != firstRune {
return false
}
}
return true
}
// mustAtoi is a wrapper for strconv.Atoi specifically for cases where it is programmer error to have anything other
// than a number, and thus panics. This should be used with care
func mustAtoi(s string) int {
n, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
return n
}