2024-01-06 16:01:59 +08:00
|
|
|
import { cutForSearch } from "@node-rs/jieba";
|
2024-01-15 11:44:48 +08:00
|
|
|
import Colors from "colors";
|
2024-01-06 11:47:18 +08:00
|
|
|
import minisearch from "minisearch";
|
2024-01-15 11:44:48 +08:00
|
|
|
import sizeof from "object-sizeof";
|
2024-01-06 11:47:18 +08:00
|
|
|
import { getPostFileContent, sortedPosts } from "./post-process";
|
|
|
|
|
|
2024-01-08 21:25:34 +08:00
|
|
|
// Due to the flaws of the word tokenizer,
|
|
|
|
|
// it is necessary to match CJKL symbols only
|
|
|
|
|
// during the word segmentation process to prevent repeated recognition.
|
2024-08-12 10:57:33 +08:00
|
|
|
const NonCJKLRecognizeRegex =
|
|
|
|
|
/[^\u4e00-\u9fa5\u3040-\u30ff\uac00-\ud7af\u1100-\u11ff\u3130-\u318f\u31c0-\u31ef\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\u0041-\u005a\u0061-\u007a\u00c0-\u00ff\u0100-\u017f\u0180-\u024f\s ]/g;
|
2024-01-08 21:25:34 +08:00
|
|
|
|
2024-01-06 16:01:59 +08:00
|
|
|
function tokenizer(str: string) {
|
2024-08-12 10:57:33 +08:00
|
|
|
const result = cutForSearch(str.replace(NonCJKLRecognizeRegex, " "), true);
|
|
|
|
|
for (let i = 0; i < result.length; i++) {
|
|
|
|
|
if (result[i].trim() === "") {
|
|
|
|
|
result.splice(i, 1);
|
|
|
|
|
i--;
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-01-08 21:25:34 +08:00
|
|
|
return result;
|
2024-01-06 16:01:59 +08:00
|
|
|
}
|
2024-01-06 11:47:18 +08:00
|
|
|
|
|
|
|
|
function makeSearchIndex() {
|
2024-04-03 22:08:27 +08:00
|
|
|
const startTime = Date.now();
|
2024-08-14 12:57:22 +08:00
|
|
|
const miniSearch = new minisearch({
|
2024-01-06 11:47:18 +08:00
|
|
|
fields: ["id", "title", "tags", "subtitle", "summary", "content"],
|
2024-08-12 10:57:33 +08:00
|
|
|
storeFields: ["id", "title", "tags", "summary"],
|
2024-01-06 16:01:59 +08:00
|
|
|
tokenize: tokenizer,
|
2024-01-06 11:47:18 +08:00
|
|
|
});
|
|
|
|
|
for (let index = 0; index < sortedPosts.allPostList.length; index++) {
|
|
|
|
|
const post = sortedPosts.allPostList[index];
|
|
|
|
|
const content = getPostFileContent(post.id);
|
|
|
|
|
miniSearch.add({
|
|
|
|
|
id: post.id,
|
|
|
|
|
title: post.frontMatter.title,
|
|
|
|
|
tags: post.frontMatter.tags,
|
|
|
|
|
subtitle: post.frontMatter.subtitle,
|
|
|
|
|
summary: post.frontMatter.summary,
|
|
|
|
|
content: content,
|
|
|
|
|
});
|
|
|
|
|
}
|
2024-04-03 22:08:27 +08:00
|
|
|
const endTime = Date.now();
|
2024-01-15 11:44:48 +08:00
|
|
|
const sizeofIndex = (sizeof(miniSearch) / 1024 ** 2).toFixed(3);
|
2024-04-03 22:08:27 +08:00
|
|
|
console.log(
|
|
|
|
|
Colors.cyan(
|
|
|
|
|
`Search index is ready. And the size of index is ${sizeofIndex} mb. And it costs ${(endTime - startTime) / 1000} s.`,
|
|
|
|
|
),
|
|
|
|
|
);
|
2024-01-06 11:47:18 +08:00
|
|
|
return miniSearch;
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-12 10:57:33 +08:00
|
|
|
export const SearchIndex = Object.freeze(makeSearchIndex());
|