Usuage:
go run --file=""
The key is the selection of vector points and the determination of threshold values
The sample data comes from the "List of 40 Obscene and Pornographic Online Novels" released by the State Administration of Press and Publication issued a notice.
package main import ( "bufio" "flag" "fmt" "io" "log" "math" "os" "path" "path/filepath" ) var debug bool = false var data_dir string = "./moyan" //File storage directoryvar limen float64 = 0.1159203888322267 //Threshold const ( MIN_HANZI rune = 0x3400 MAX_HANZI rune = 0x9fbb ) var labels []rune = []rune{ 0x817f, 0x80f8, 0x4e73, 0x81c0, 0x5c41, 0x80a1, 0x88f8, 0x6deb, } func errHandle(err error) { if err != nil { (err) } } func load(name string) (m map[rune]int, err error) { f, err := (name) if err != nil { return nil, err } defer () buf := (f) m = make(map[rune]int) var r rune for { r, _, err = () if err != nil { if err == { break } return nil, err } if r >= MIN_HANZI && r <= MAX_HANZI { m[r] += 1 } } return m, nil } func classify(m map[rune]int) (idv []float64, dis float64) { len_m := len(m) for i, v := range labels { if debug { (i, m[v], string(v), float64(m[v])/float64(len_m)) } idv = append(idv, float64(m[v])/float64(len_m)) } for _, v := range idv { dis += (v, 2) } dis = (dis) return } func check(fp string, dis float64) { switch { case dis >= limen: (fp, dis, "Photo-related") case dis == 1.0: (fp, dis, "Are you cheating?") case dis == 0: (fp, dis, "Check whether the file character encoding is utf8 format") default: (fp, dis, "normal") } } func walkFunc(fp string, info , err error) error { if (fp) == ".txt" { m, err := load(fp) errHandle(err) _, dis := classify(m) check(fp, dis) } return err } var file string func init() { _, err := (data_dir) if err != nil { err = (data_dir, ) errHandle(err) } (&file, "file", "", "file read in,if you don't give the file read in,"+ "it will create a data dictionary,just pust your files in it") } func main() { () if file == "" { (data_dir, walkFunc) return } m, err := load(file) errHandle(err) _, dis := classify(m) check(file, dis) }
The above is the entire content of this article, I hope you like it.