Improve recognition accuracy of searches
This commit is contained in:
@@ -2,8 +2,14 @@ import { cutForSearch } from "@node-rs/jieba";
|
|||||||
import minisearch from "minisearch";
|
import minisearch from "minisearch";
|
||||||
import { getPostFileContent, sortedPosts } from "./post-process";
|
import { getPostFileContent, sortedPosts } from "./post-process";
|
||||||
|
|
||||||
|
// Due to the flaws of the word tokenizer,
|
||||||
|
// it is necessary to match CJKL symbols only
|
||||||
|
// during the word segmentation process to prevent repeated recognition.
|
||||||
|
const CJKLRecognizeRegex = /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7A3a-zA-Z]+/g;
|
||||||
|
|
||||||
function tokenizer(str: string) {
|
function tokenizer(str: string) {
|
||||||
return cutForSearch(str, true);
|
const result = cutForSearch(str, true).filter((item) => item.match(CJKLRecognizeRegex));
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeSearchIndex() {
|
function makeSearchIndex() {
|
||||||
@@ -11,9 +17,6 @@ function makeSearchIndex() {
|
|||||||
fields: ["id", "title", "tags", "subtitle", "summary", "content"],
|
fields: ["id", "title", "tags", "subtitle", "summary", "content"],
|
||||||
storeFields: ["id", "title", "tags"],
|
storeFields: ["id", "title", "tags"],
|
||||||
tokenize: tokenizer,
|
tokenize: tokenizer,
|
||||||
searchOptions: {
|
|
||||||
fuzzy: 0.1,
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
for (let index = 0; index < sortedPosts.allPostList.length; index++) {
|
for (let index = 0; index < sortedPosts.allPostList.length; index++) {
|
||||||
const post = sortedPosts.allPostList[index];
|
const post = sortedPosts.allPostList[index];
|
||||||
|
|||||||
Reference in New Issue
Block a user