254 lines
6.2 KiB
Go
254 lines
6.2 KiB
Go
// Copyright (c) 2017 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package vellum
|
|
|
|
import (
|
|
"io"
|
|
|
|
"github.com/willf/bitset"
|
|
)
|
|
|
|
// FST is an in-memory representation of a finite state transducer,
|
|
// capable of returning the uint64 value associated with
|
|
// each []byte key stored, as well as enumerating all of the keys
|
|
// in order.
|
|
type FST struct {
|
|
f io.Closer
|
|
ver int
|
|
len int
|
|
typ int
|
|
data []byte
|
|
decoder decoder
|
|
}
|
|
|
|
func new(data []byte, f io.Closer) (rv *FST, err error) {
|
|
rv = &FST{
|
|
data: data,
|
|
f: f,
|
|
}
|
|
|
|
rv.ver, rv.typ, err = decodeHeader(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rv.decoder, err = loadDecoder(rv.ver, rv.data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rv.len = rv.decoder.getLen()
|
|
|
|
return rv, nil
|
|
}
|
|
|
|
// Contains returns true if this FST contains the specified key.
|
|
func (f *FST) Contains(val []byte) (bool, error) {
|
|
_, exists, err := f.Get(val)
|
|
return exists, err
|
|
}
|
|
|
|
// Get returns the value associated with the key. NOTE: a value of zero
|
|
// does not imply the key does not exist, you must consult the second
|
|
// return value as well.
|
|
func (f *FST) Get(input []byte) (uint64, bool, error) {
|
|
return f.get(input, nil)
|
|
}
|
|
|
|
func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
|
|
var total uint64
|
|
curr := f.decoder.getRoot()
|
|
state, err := f.decoder.stateAt(curr, prealloc)
|
|
if err != nil {
|
|
return 0, false, err
|
|
}
|
|
for i := range input {
|
|
_, curr, output := state.TransitionFor(input[i])
|
|
if curr == noneAddr {
|
|
return 0, false, nil
|
|
}
|
|
|
|
state, err = f.decoder.stateAt(curr, state)
|
|
if err != nil {
|
|
return 0, false, err
|
|
}
|
|
|
|
total += output
|
|
}
|
|
|
|
if state.Final() {
|
|
total += state.FinalOutput()
|
|
return total, true, nil
|
|
}
|
|
return 0, false, nil
|
|
}
|
|
|
|
// Version returns the encoding version used by this FST instance.
|
|
func (f *FST) Version() int {
|
|
return f.ver
|
|
}
|
|
|
|
// Len returns the number of entries in this FST instance.
|
|
func (f *FST) Len() int {
|
|
return f.len
|
|
}
|
|
|
|
// Type returns the type of this FST instance.
|
|
func (f *FST) Type() int {
|
|
return f.typ
|
|
}
|
|
|
|
// Close will unmap any mmap'd data (if managed by vellum) and it will close
|
|
// the backing file (if managed by vellum). You MUST call Close() for any
|
|
// FST instance that is created.
|
|
func (f *FST) Close() error {
|
|
if f.f != nil {
|
|
err := f.f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
f.data = nil
|
|
f.decoder = nil
|
|
return nil
|
|
}
|
|
|
|
// Start returns the start state of this Automaton
|
|
func (f *FST) Start() int {
|
|
return f.decoder.getRoot()
|
|
}
|
|
|
|
// IsMatch returns if this state is a matching state in this Automaton
|
|
func (f *FST) IsMatch(addr int) bool {
|
|
match, _ := f.IsMatchWithVal(addr)
|
|
return match
|
|
}
|
|
|
|
// CanMatch returns if this state can ever transition to a matching state
|
|
// in this Automaton
|
|
func (f *FST) CanMatch(addr int) bool {
|
|
if addr == noneAddr {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// WillAlwaysMatch returns if from this state the Automaton will always
|
|
// be in a matching state
|
|
func (f *FST) WillAlwaysMatch(int) bool {
|
|
return false
|
|
}
|
|
|
|
// Accept returns the next state for this Automaton on input of byte b
|
|
func (f *FST) Accept(addr int, b byte) int {
|
|
next, _ := f.AcceptWithVal(addr, b)
|
|
return next
|
|
}
|
|
|
|
// IsMatchWithVal returns if this state is a matching state in this Automaton
|
|
// and also returns the final output value for this state
|
|
func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
|
|
s, err := f.decoder.stateAt(addr, nil)
|
|
if err != nil {
|
|
return false, 0
|
|
}
|
|
return s.Final(), s.FinalOutput()
|
|
}
|
|
|
|
// AcceptWithVal returns the next state for this Automaton on input of byte b
|
|
// and also returns the output value for the transition
|
|
func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
|
|
s, err := f.decoder.stateAt(addr, nil)
|
|
if err != nil {
|
|
return noneAddr, 0
|
|
}
|
|
_, next, output := s.TransitionFor(b)
|
|
return next, output
|
|
}
|
|
|
|
// Iterator returns a new Iterator capable of enumerating the key/value pairs
|
|
// between the provided startKeyInclusive and endKeyExclusive.
|
|
func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
|
|
return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
|
|
}
|
|
|
|
// Search returns a new Iterator capable of enumerating the key/value pairs
|
|
// between the provided startKeyInclusive and endKeyExclusive that also
|
|
// satisfy the provided automaton.
|
|
func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
|
|
return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
|
|
}
|
|
|
|
// Debug is only intended for debug purposes, it simply asks the underlying
|
|
// decoder visit each state, and pass it to the provided callback.
|
|
func (f *FST) Debug(callback func(int, interface{}) error) error {
|
|
|
|
addr := f.decoder.getRoot()
|
|
set := bitset.New(uint(addr))
|
|
stack := addrStack{addr}
|
|
|
|
stateNumber := 0
|
|
stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
|
|
for addr != noneAddr {
|
|
if set.Test(uint(addr)) {
|
|
stack, addr = stack.Pop()
|
|
continue
|
|
}
|
|
set.Set(uint(addr))
|
|
state, err := f.decoder.stateAt(addr, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = callback(stateNumber, state)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for i := 0; i < state.NumTransitions(); i++ {
|
|
tchar := state.TransitionAt(i)
|
|
_, dest, _ := state.TransitionFor(tchar)
|
|
stack = append(stack, dest)
|
|
}
|
|
stateNumber++
|
|
stack, addr = stack.Pop()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
type addrStack []int
|
|
|
|
func (a addrStack) Pop() (addrStack, int) {
|
|
l := len(a)
|
|
if l < 1 {
|
|
return a, noneAddr
|
|
}
|
|
return a[:l-1], a[l-1]
|
|
}
|
|
|
|
// Reader() returns a Reader instance that a single thread may use to
|
|
// retrieve data from the FST
|
|
func (f *FST) Reader() (*Reader, error) {
|
|
return &Reader{f: f}, nil
|
|
}
|
|
|
|
// A Reader is meant for a single threaded use
|
|
type Reader struct {
|
|
f *FST
|
|
prealloc fstStateV1
|
|
}
|
|
|
|
func (r *Reader) Get(input []byte) (uint64, bool, error) {
|
|
return r.f.get(input, &r.prealloc)
|
|
}
|