You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
1.5 KiB
Go

package question
import (
"math"
"regexp"
"strings"
)
const minSimilarity = 0.85
var normalizeRegex = regexp.MustCompile(`[^a-z0-9\s]+`)
// IsAnswerMatch compares an expected and provided answer using a similarity threshold.
func IsAnswerMatch(expected, provided string) (matched bool, score float64) {
a := normalizeForMatch(expected)
b := normalizeForMatch(provided)
if a == "" && b == "" {
return true, 1
}
if a == "" || b == "" {
return false, 0
}
d := levenshtein(a, b)
maxLen := math.Max(float64(len([]rune(a))), float64(len([]rune(b))))
if maxLen == 0 {
return true, 1
}
score = 1 - float64(d)/maxLen
return score >= minSimilarity, score
}
func normalizeForMatch(s string) string {
s = strings.ToLower(strings.TrimSpace(s))
s = strings.Join(strings.Fields(s), " ")
s = normalizeRegex.ReplaceAllString(s, "")
s = strings.Join(strings.Fields(s), " ")
return s
}
func levenshtein(a, b string) int {
ra := []rune(a)
rb := []rune(b)
da := make([][]int, len(ra)+1)
for i := range da {
da[i] = make([]int, len(rb)+1)
}
for i := 0; i <= len(ra); i++ {
da[i][0] = i
}
for j := 0; j <= len(rb); j++ {
da[0][j] = j
}
for i := 1; i <= len(ra); i++ {
for j := 1; j <= len(rb); j++ {
cost := 0
if ra[i-1] != rb[j-1] {
cost = 1
}
da[i][j] = min3(
da[i-1][j]+1,
da[i][j-1]+1,
da[i-1][j-1]+cost,
)
}
}
return da[len(ra)][len(rb)]
}
func min3(a, b, c int) int {
if a <= b && a <= c {
return a
}
if b <= c {
return b
}
return c
}