Added thesaurus for stuttering word segmentation to improve support for Chinese searches

This commit is contained in:
PrinOrange
2024-01-06 16:01:59 +08:00
parent 88f1261ca6
commit 1a7673a99c
3 changed files with 264 additions and 8 deletions

View File

@@ -1,19 +1,16 @@
import { cutForSearch } from "@node-rs/jieba";
import minisearch from "minisearch"; import minisearch from "minisearch";
// import { cutForSearch } from "nodejs-jieba";
import { getPostFileContent, sortedPosts } from "./post-process"; import { getPostFileContent, sortedPosts } from "./post-process";
// TODO: The jieba tokenizer seems not be supported by vercel platform function tokenizer(str: string) {
// that causes the module loading error. return cutForSearch(str, true);
// So here is the remain task for seeking better Chinese tokenizer. }
// function tokenizer(str: string) {
// return cutForSearch(str, true);
// }
function makeSearchIndex() { function makeSearchIndex() {
let miniSearch = new minisearch({ let miniSearch = new minisearch({
fields: ["id", "title", "tags", "subtitle", "summary", "content"], fields: ["id", "title", "tags", "subtitle", "summary", "content"],
storeFields: ["id", "title", "tags"], storeFields: ["id", "title", "tags"],
// tokenize: tokenizer, tokenize: tokenizer,
searchOptions: { searchOptions: {
fuzzy: 0.1, fuzzy: 0.1,
}, },

258
package-lock.json generated
View File

@@ -12,6 +12,7 @@
"@mdx-js/loader": "^3.0.0", "@mdx-js/loader": "^3.0.0",
"@mdx-js/react": "^3.0.0", "@mdx-js/react": "^3.0.0",
"@next/mdx": "^14.0.1", "@next/mdx": "^14.0.1",
"@node-rs/jieba": "^1.8.1",
"@radix-ui/react-dialog": "^1.0.5", "@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-separator": "^1.0.3", "@radix-ui/react-separator": "^1.0.3",
"@radix-ui/react-slot": "^1.0.2", "@radix-ui/react-slot": "^1.0.2",
@@ -114,6 +115,24 @@
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@emnapi/core": {
"version": "0.45.0",
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-0.45.0.tgz",
"integrity": "sha512-DPWjcUDQkCeEM4VnljEOEcXdAD7pp8zSZsgOujk/LGIwCXWbXJngin+MO4zbH429lzeC3WbYLGjE2MaUOwzpyw==",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@emnapi/runtime": {
"version": "0.45.0",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-0.45.0.tgz",
"integrity": "sha512-Txumi3td7J4A/xTTwlssKieHKTGl3j4A1tglBx72auZ49YK7ePY6XZricgIg9mnZT4xPfA+UPCUdnhRuEFDL+w==",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@eslint-community/eslint-utils": { "node_modules/@eslint-community/eslint-utils": {
"version": "4.4.0", "version": "4.4.0",
"resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
@@ -699,6 +718,245 @@
"node": ">= 10" "node": ">= 10"
} }
}, },
"node_modules/@node-rs/jieba": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba/-/jieba-1.8.1.tgz",
"integrity": "sha512-eLk1ArIT8H3uGJCsOS2nDHkThILRu08IQO9SgKZnJ9F6gviszX+CneImIaeYaARdsNk6WsRVNFW9Ynk8/LNTyA==",
"engines": {
"node": ">= 10"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Brooooooklyn"
},
"optionalDependencies": {
"@node-rs/jieba-android-arm-eabi": "1.8.1",
"@node-rs/jieba-android-arm64": "1.8.1",
"@node-rs/jieba-darwin-arm64": "1.8.1",
"@node-rs/jieba-darwin-x64": "1.8.1",
"@node-rs/jieba-freebsd-x64": "1.8.1",
"@node-rs/jieba-linux-arm-gnueabihf": "1.8.1",
"@node-rs/jieba-linux-arm64-gnu": "1.8.1",
"@node-rs/jieba-linux-arm64-musl": "1.8.1",
"@node-rs/jieba-linux-x64-gnu": "1.8.1",
"@node-rs/jieba-linux-x64-musl": "1.8.1",
"@node-rs/jieba-wasm32-wasi": "1.8.1",
"@node-rs/jieba-win32-arm64-msvc": "1.8.1",
"@node-rs/jieba-win32-ia32-msvc": "1.8.1",
"@node-rs/jieba-win32-x64-msvc": "1.8.1"
}
},
"node_modules/@node-rs/jieba-android-arm-eabi": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm-eabi/-/jieba-android-arm-eabi-1.8.1.tgz",
"integrity": "sha512-LQdmjOJ1yGaDgJJTbmcWetPDJBa0NkgUzTWegxZL/0/fgBm7+8wh/5r/bCGx5H00J/v0gLQ79A6CRLH8D4lC1g==",
"cpu": [
"arm"
],
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-android-arm64": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm64/-/jieba-android-arm64-1.8.1.tgz",
"integrity": "sha512-F/E95teA/3X+dolC/4ai18An38aneFs6xqaqgfzJwEuvypcOnFGyouhSHh/oEhT69m/RFgPuqG85WgfmKHCBzA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-darwin-arm64": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-arm64/-/jieba-darwin-arm64-1.8.1.tgz",
"integrity": "sha512-J5Zt25XlVyXQ8qkxCDLmACoBZYQqmXr4kTLcT16Q1xyN4FXoZe3Aq0QnBkDJLPHk3AngLOicG7FZci6q0LHgow==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-darwin-x64": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-x64/-/jieba-darwin-x64-1.8.1.tgz",
"integrity": "sha512-cUuEpx6L4rdLSvpIuuoN2UBh0gKjjeQt/xc7TbDWhUEkFh4bn1zvRFksUUMZpNOJ3OKDLk76oVwwA87P9PzlUA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-freebsd-x64": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-freebsd-x64/-/jieba-freebsd-x64-1.8.1.tgz",
"integrity": "sha512-KCVqKTeiP03eGh/Xt/kMG/PAirq3tStw2YNeICKPG4L6rhCKPZc8wBW5+y3zSDsQolWB8+r89ZAJWEN/H1AGRw==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-linux-arm-gnueabihf": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm-gnueabihf/-/jieba-linux-arm-gnueabihf-1.8.1.tgz",
"integrity": "sha512-xEbzUrHnoEi6kpeBBNz7ZrJkPPBYasdXuH/RHyN9aoEre9Lg4UUbuRNns1nq7D4o8bV2EsULHsJyvKuS2TifIQ==",
"cpu": [
"arm"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-linux-arm64-gnu": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-gnu/-/jieba-linux-arm64-gnu-1.8.1.tgz",
"integrity": "sha512-NnUEBTuMrii6KvbsFHNX3IsEdYLHSS4sJHsh/HfgrqJUjWjbjD+QhzmgISCluxOTEGMrZmLsMMBHfN4t/hWMTA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-linux-arm64-musl": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-musl/-/jieba-linux-arm64-musl-1.8.1.tgz",
"integrity": "sha512-Tgo8FO18THofQx4ZGUVFbsEHbaQyC3MfiQQFA6vDIGHBQFB4vXgpxJCA9dFCOrmCpsg6Uurrz1G8S+ZVmmvjkQ==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-linux-x64-gnu": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-gnu/-/jieba-linux-x64-gnu-1.8.1.tgz",
"integrity": "sha512-py5StersiDn5DdzWtybzvmicPLGHUykFWebpUu3MkY8vkypA3t//0yFFHMaw8V5ebuPw68iUnYTDcUycVszc7Q==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-linux-x64-musl": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-musl/-/jieba-linux-x64-musl-1.8.1.tgz",
"integrity": "sha512-eW2lwqKrw4DWm51BX+/huWMKgfltJgPWAAnbX05st2Ayw14PFT3rVe9NK4Wc5Pkioux82VUa1w68yxymrYeWWQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-wasm32-wasi": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-wasm32-wasi/-/jieba-wasm32-wasi-1.8.1.tgz",
"integrity": "sha512-3qZiNOuuoJia7S/mDslcp2ijK5WpP0zrRsqemJhQWLALXFt08Epfq1o+BQ7nMj176fki+PxmVG+VmS0WzetHAg==",
"cpu": [
"wasm32"
],
"optional": true,
"dependencies": {
"@emnapi/core": "^0.45.0",
"@emnapi/runtime": "^0.45.0"
},
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/@node-rs/jieba-win32-arm64-msvc": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-arm64-msvc/-/jieba-win32-arm64-msvc-1.8.1.tgz",
"integrity": "sha512-gr6i+7Ys2GJTWGXZVhvX/qp4cajQPMw3dhIpx3mwC6Pvcdgm4z6HOyoWoKD77uFmzKROaIoOeFKFIfcF63H3Bw==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-win32-ia32-msvc": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-ia32-msvc/-/jieba-win32-ia32-msvc-1.8.1.tgz",
"integrity": "sha512-i9uIK1E1M2vy7jtLvZRpXbh8enCZJPGn2JDy9NiYkbTR/u8WifSMDjOVEl88iziHp2R7uNiqNsuKexWe+pqXIA==",
"cpu": [
"ia32"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@node-rs/jieba-win32-x64-msvc": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-x64-msvc/-/jieba-win32-x64-msvc-1.8.1.tgz",
"integrity": "sha512-IP2wkDJBLJ3T+P78NkBgy864uAccrG1WtbJclRw3lu0xjLf8TLfn96ScM7GcFQMWh++EsX/L1t000mbWy25jrQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@nodelib/fs.scandir": { "node_modules/@nodelib/fs.scandir": {
"version": "2.1.5", "version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",

View File

@@ -28,6 +28,7 @@
"@mdx-js/loader": "^3.0.0", "@mdx-js/loader": "^3.0.0",
"@mdx-js/react": "^3.0.0", "@mdx-js/react": "^3.0.0",
"@next/mdx": "^14.0.1", "@next/mdx": "^14.0.1",
"@node-rs/jieba": "^1.8.1",
"@radix-ui/react-dialog": "^1.0.5", "@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-separator": "^1.0.3", "@radix-ui/react-separator": "^1.0.3",
"@radix-ui/react-slot": "^1.0.2", "@radix-ui/react-slot": "^1.0.2",