diff --git a/lib/search.ts b/lib/search.ts index 6eb6d7b..da6df46 100644 --- a/lib/search.ts +++ b/lib/search.ts @@ -2,8 +2,14 @@ import { cutForSearch } from "@node-rs/jieba"; import minisearch from "minisearch"; import { getPostFileContent, sortedPosts } from "./post-process"; +// Due to the flaws of the word tokenizer, +// it is necessary to match CJKL symbols only +// during the word segmentation process to prevent repeated recognition. +const CJKLRecognizeRegex = /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7A3a-zA-Z]+/g; + function tokenizer(str: string) { - return cutForSearch(str, true); + const result = cutForSearch(str, true).filter((item) => item.match(CJKLRecognizeRegex)); + return result; } function makeSearchIndex() { @@ -11,9 +17,6 @@ function makeSearchIndex() { fields: ["id", "title", "tags", "subtitle", "summary", "content"], storeFields: ["id", "title", "tags"], tokenize: tokenizer, - searchOptions: { - fuzzy: 0.1, - }, }); for (let index = 0; index < sortedPosts.allPostList.length; index++) { const post = sortedPosts.allPostList[index];