You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
825 B

package gse
import (
"fmt"
"unicode"
"unicode/utf8"
"github.com/go-ego/gse"
"github.com/go-ego/gse/hmm/pos"
)
var (
seg gse.Segmenter
posSeg pos.Segmenter
loadedDict = false
)
func init() {
go loadDict()
}
func loadDict() {
err := seg.LoadDictEmbed()
if err != nil {
fmt.Println("seg.LoadDictEmbed", err)
return
}
err = seg.LoadStopEmbed()
if err != nil {
fmt.Println("seg.LoadStopEmbed", err)
return
}
loadedDict = true
}
func SegWords(text string) (words []string) {
if !loadedDict {
return
}
wds := seg.Cut(text)
for _, wd := range wds {
// 跳过单字、空格、标点、数字
if utf8.RuneCountInString(wd) == 1 || unicode.IsSpace(rune(wd[0])) || unicode.IsPunct(rune(wd[0])) || unicode.IsDigit(rune(wd[0])) {
continue
}
words = append(words, wd)
}
return
}