161 lines
4.7 KiB
Go
161 lines
4.7 KiB
Go
|
// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
|
||
|
// https://github.com/sergi/go-diff
|
||
|
// See the included LICENSE file for license details.
|
||
|
//
|
||
|
// go-diff is a Go implementation of Google's Diff, Match, and Patch library
|
||
|
// Original library is Copyright (c) 2006 Google Inc.
|
||
|
// http://code.google.com/p/google-diff-match-patch/
|
||
|
|
||
|
package diffmatchpatch
|
||
|
|
||
|
import (
|
||
|
"math"
|
||
|
)
|
||
|
|
||
|
// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
|
||
|
// Returns -1 if no match found.
|
||
|
func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
|
||
|
// Check for null inputs not needed since null can't be passed in C#.
|
||
|
|
||
|
loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
|
||
|
if text == pattern {
|
||
|
// Shortcut (potentially not guaranteed by the algorithm)
|
||
|
return 0
|
||
|
} else if len(text) == 0 {
|
||
|
// Nothing to match.
|
||
|
return -1
|
||
|
} else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
|
||
|
// Perfect match at the perfect spot! (Includes case of null pattern)
|
||
|
return loc
|
||
|
}
|
||
|
// Do a fuzzy compare.
|
||
|
return dmp.MatchBitap(text, pattern, loc)
|
||
|
}
|
||
|
|
||
|
// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
|
||
|
// Returns -1 if no match was found.
|
||
|
func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
|
||
|
// Initialise the alphabet.
|
||
|
s := dmp.MatchAlphabet(pattern)
|
||
|
|
||
|
// Highest score beyond which we give up.
|
||
|
scoreThreshold := dmp.MatchThreshold
|
||
|
// Is there a nearby exact match? (speedup)
|
||
|
bestLoc := indexOf(text, pattern, loc)
|
||
|
if bestLoc != -1 {
|
||
|
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
|
||
|
pattern), scoreThreshold)
|
||
|
// What about in the other direction? (speedup)
|
||
|
bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
|
||
|
if bestLoc != -1 {
|
||
|
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
|
||
|
pattern), scoreThreshold)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Initialise the bit arrays.
|
||
|
matchmask := 1 << uint((len(pattern) - 1))
|
||
|
bestLoc = -1
|
||
|
|
||
|
var binMin, binMid int
|
||
|
binMax := len(pattern) + len(text)
|
||
|
lastRd := []int{}
|
||
|
for d := 0; d < len(pattern); d++ {
|
||
|
// Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
|
||
|
binMin = 0
|
||
|
binMid = binMax
|
||
|
for binMin < binMid {
|
||
|
if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
|
||
|
binMin = binMid
|
||
|
} else {
|
||
|
binMax = binMid
|
||
|
}
|
||
|
binMid = (binMax-binMin)/2 + binMin
|
||
|
}
|
||
|
// Use the result from this iteration as the maximum for the next.
|
||
|
binMax = binMid
|
||
|
start := int(math.Max(1, float64(loc-binMid+1)))
|
||
|
finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
|
||
|
|
||
|
rd := make([]int, finish+2)
|
||
|
rd[finish+1] = (1 << uint(d)) - 1
|
||
|
|
||
|
for j := finish; j >= start; j-- {
|
||
|
var charMatch int
|
||
|
if len(text) <= j-1 {
|
||
|
// Out of range.
|
||
|
charMatch = 0
|
||
|
} else if _, ok := s[text[j-1]]; !ok {
|
||
|
charMatch = 0
|
||
|
} else {
|
||
|
charMatch = s[text[j-1]]
|
||
|
}
|
||
|
|
||
|
if d == 0 {
|
||
|
// First pass: exact match.
|
||
|
rd[j] = ((rd[j+1] << 1) | 1) & charMatch
|
||
|
} else {
|
||
|
// Subsequent passes: fuzzy match.
|
||
|
rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
|
||
|
}
|
||
|
if (rd[j] & matchmask) != 0 {
|
||
|
score := dmp.matchBitapScore(d, j-1, loc, pattern)
|
||
|
// This match will almost certainly be better than any existing match. But check anyway.
|
||
|
if score <= scoreThreshold {
|
||
|
// Told you so.
|
||
|
scoreThreshold = score
|
||
|
bestLoc = j - 1
|
||
|
if bestLoc > loc {
|
||
|
// When passing loc, don't exceed our current distance from loc.
|
||
|
start = int(math.Max(1, float64(2*loc-bestLoc)))
|
||
|
} else {
|
||
|
// Already passed loc, downhill from here on in.
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
|
||
|
// No hope for a (better) match at greater error levels.
|
||
|
break
|
||
|
}
|
||
|
lastRd = rd
|
||
|
}
|
||
|
return bestLoc
|
||
|
}
|
||
|
|
||
|
// matchBitapScore computes and returns the score for a match with e errors and x location.
|
||
|
func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
|
||
|
accuracy := float64(e) / float64(len(pattern))
|
||
|
proximity := math.Abs(float64(loc - x))
|
||
|
if dmp.MatchDistance == 0 {
|
||
|
// Dodge divide by zero error.
|
||
|
if proximity == 0 {
|
||
|
return accuracy
|
||
|
}
|
||
|
|
||
|
return 1.0
|
||
|
}
|
||
|
return accuracy + (proximity / float64(dmp.MatchDistance))
|
||
|
}
|
||
|
|
||
|
// MatchAlphabet initialises the alphabet for the Bitap algorithm.
|
||
|
func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
|
||
|
s := map[byte]int{}
|
||
|
charPattern := []byte(pattern)
|
||
|
for _, c := range charPattern {
|
||
|
_, ok := s[c]
|
||
|
if !ok {
|
||
|
s[c] = 0
|
||
|
}
|
||
|
}
|
||
|
i := 0
|
||
|
|
||
|
for _, c := range charPattern {
|
||
|
value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
|
||
|
s[c] = value
|
||
|
i++
|
||
|
}
|
||
|
return s
|
||
|
}
|