[fix] fix tokenizer problem

This commit is contained in:
PrinOrange
2024-08-12 10:57:33 +08:00
parent fb87786f2f
commit 8916bd99d2
3 changed files with 93 additions and 54 deletions

View File

@@ -7,10 +7,17 @@ import { getPostFileContent, sortedPosts } from "./post-process";
// Due to the flaws of the word tokenizer, // Due to the flaws of the word tokenizer,
// it is necessary to match CJKL symbols only // it is necessary to match CJKL symbols only
// during the word segmentation process to prevent repeated recognition. // during the word segmentation process to prevent repeated recognition.
const CJKLRecognizeRegex = /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7A3a-zA-Z]+/g; const NonCJKLRecognizeRegex =
/[^\u4e00-\u9fa5\u3040-\u30ff\uac00-\ud7af\u1100-\u11ff\u3130-\u318f\u31c0-\u31ef\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\u0041-\u005a\u0061-\u007a\u00c0-\u00ff\u0100-\u017f\u0180-\u024f\s ]/g;
function tokenizer(str: string) { function tokenizer(str: string) {
const result = cutForSearch(str, true).filter((item) => CJKLRecognizeRegex.test(item)); const result = cutForSearch(str.replace(NonCJKLRecognizeRegex, " "), true);
for (let i = 0; i < result.length; i++) {
if (result[i].trim() === "") {
result.splice(i, 1);
i--;
}
}
return result; return result;
} }
@@ -18,7 +25,7 @@ function makeSearchIndex() {
const startTime = Date.now(); const startTime = Date.now();
let miniSearch = new minisearch({ let miniSearch = new minisearch({
fields: ["id", "title", "tags", "subtitle", "summary", "content"], fields: ["id", "title", "tags", "subtitle", "summary", "content"],
storeFields: ["id", "title", "tags"], storeFields: ["id", "title", "tags", "summary"],
tokenize: tokenizer, tokenize: tokenizer,
}); });
for (let index = 0; index < sortedPosts.allPostList.length; index++) { for (let index = 0; index < sortedPosts.allPostList.length; index++) {
@@ -43,4 +50,4 @@ function makeSearchIndex() {
return miniSearch; return miniSearch;
} }
export const SearchIndex = makeSearchIndex(); export const SearchIndex = Object.freeze(makeSearchIndex());

View File

@@ -10,52 +10,76 @@ import { Config } from "@/data/config";
import { isEmptyString } from "@/lib/utils"; import { isEmptyString } from "@/lib/utils";
import { TSearchResultItem } from "@/types/search-result"; import { TSearchResultItem } from "@/types/search-result";
import axios from "axios"; import axios from "axios";
import { isArray } from "lodash";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { GetServerSideProps } from "next";
import Link from "next/link"; import Link from "next/link";
import { ChangeEvent, KeyboardEvent, useState } from "react"; import { useRouter } from "next/router";
import { useQuery } from "react-query"; import { ChangeEvent, KeyboardEvent, useEffect, useState } from "react";
export default function SearchPage() { type SearchPageProps = { query: string | null };
const [searchText, setSearchText] = useState<string>("");
export default function SearchPage(props: SearchPageProps) {
const [searchText, setSearchText] = useState<string>(props.query ?? "");
const [searchResult, setSearchResult] = useState<TSearchResultItem[]>([]); const [searchResult, setSearchResult] = useState<TSearchResultItem[]>([]);
const [isLoading, setIsLoading] = useState<boolean>(false);
const { toast } = useToast(); const { toast } = useToast();
const router = useRouter();
const fetchSearchAPI = async (param: string) => { useEffect(() => {
const response = (await axios.get<TSearchResultItem[]>(`/api/search/${param}`)).data; if (!isEmptyString(searchText)) {
return response; handleMakeSearch();
}
}, []);
const fetchSearchAPI = (param: string): Promise<TSearchResultItem[]> => {
return axios.get<TSearchResultItem[]>(`/api/search/${param}`).then((response) => response.data);
}; };
const querySearch = useQuery("searchData", () => fetchSearchAPI(searchText), {
enabled: false,
onSuccess: (data) => {
setSearchResult(data);
if (data.length === 0) {
toast({ title: "Empty Result", description: "No results were found for this keyword. Try another keyword." });
}
},
onError: () => {
toast({ title: "Network Error", description: "Please try it later." });
},
});
const handleInputSearchText = (event: ChangeEvent<HTMLInputElement>) => { const handleInputSearchText = (event: ChangeEvent<HTMLInputElement>) => {
setSearchText(event.target.value); setSearchText(event.target.value);
}; };
const handleEnterKeySearch = (event: KeyboardEvent<HTMLInputElement>) => { const handleEnterKeySearch = (event: KeyboardEvent<HTMLInputElement>) => {
(event.key === "Go" || event.key === "Enter") && handleMakeSearch(); if (event.key === "Go" || event.key === "Enter") {
handleMakeSearch();
}
}; };
const handleMakeSearch = () => { const handleMakeSearch = () => {
if (isEmptyString(searchText)) { const searchQuery = searchText;
if (isEmptyString(searchQuery)) {
toast({ title: "Enter a Keyword", description: "Please enter one keyword at least." }); toast({ title: "Enter a Keyword", description: "Please enter one keyword at least." });
return; return;
} }
if (searchText.length < 4) { if (searchQuery && searchQuery.length < 4) {
toast({ title: "Keywords too short", description: "Keyword length must be at least 5." }); toast({ title: "Keywords too short", description: "Keyword length must be at least 5." });
return; return;
} }
querySearch.refetch();
router.push({
pathname: router.pathname,
query: { ...router.query, q: searchQuery },
});
setIsLoading(true);
fetchSearchAPI(searchQuery)
.then((data) => {
setSearchResult(data);
if (data.length === 0) {
toast({
title: "Empty Result",
description: "No results were found for this keyword. Try another keyword.",
});
}
})
.catch(() => {
toast({ title: "Network Error", description: "Please try it later." });
})
.finally(() => {
setIsLoading(false);
});
}; };
return ( return (
@@ -64,9 +88,7 @@ export default function SearchPage() {
<Toaster /> <Toaster />
<NavBar /> <NavBar />
<ContentContainer> <ContentContainer>
<h2 className={`my-10 flex justify-center text-2xl font-bold font-fang-zheng-xiao-biao-song`}> <h2 className={`my-10 flex justify-center text-2xl font-bold caption-font`}>{"SEARCH POSTS"}</h2>
{"SEARCH POSTS"}
</h2>
<div className="flex my-10"> <div className="flex my-10">
<Input <Input
className="my-auto py-0" className="my-auto py-0"
@@ -75,36 +97,45 @@ export default function SearchPage() {
placeholder="Input the keyword" placeholder="Input the keyword"
value={searchText} value={searchText}
/> />
<Button className="mx-3 w-32 my-auto" disabled={querySearch.isLoading} onClick={handleMakeSearch}> <Button className="mx-3 w-32 my-auto" disabled={isLoading} onClick={handleMakeSearch}>
{querySearch.isFetching ? "Loading" : "Search"} {isLoading ? "Loading" : "Search"}
</Button> </Button>
</div> </div>
<div className="flex flex-col justify-center"> <div className="flex flex-col justify-center">
<div className={`min-h-full flex flex-col font-source-serif-screen`}> <div className={`min-h-full flex flex-col content-font`}>
{querySearch.isSuccess && {searchResult.map((item, index) => (
searchResult.map((item, index) => ( <Link
<Link className={`p-2 border-t ${index === searchResult.length - 1 && "border-b"} hover:bg-gray-50 dark:hover:bg-gray-900 flex flex-col`}
className={`py-2 px-5 border-t ${ href={`/blog/${item.id}`}
index === searchResult.length - 1 && "border-b" key={nanoid()}
} hover:bg-gray-50 dark:hover:bg-gray-900 flex flex-col`} target="_blank"
href={`/blog/${item.id}`} >
key={nanoid()} <div className="my-1">
target="_blank" <div className="capitalize post-list-caption-font text-md font-bold">{item.title}</div>
> {item.summary && <div>{item.summary}</div>}
<div className="my-1 capitalize">{item.title}</div> </div>
<div className="flex space-x-2 flex-wrap"> <div className="flex space-x-2 flex-wrap">
{item.tags?.map((tagitem) => ( {item.tags?.map((tagitem) => (
<div className="text-sm text-gray-500 dark:text-gray-400" key={nanoid()}> <div className="text-sm text-gray-500 dark:text-gray-400" key={nanoid()}>
{tagitem} {tagitem}
</div> </div>
))} ))}
</div> </div>
</Link> </Link>
))} ))}
</div> </div>
</div> </div>
<div className="text-center my-3 text-gray-500 dark:text-gray-400">
<p className="mx-auto text-sm">{"For search efficiency, only the first 20 results are displayed."}</p>
</div>
</ContentContainer> </ContentContainer>
<Footer /> <Footer />
</Page> </Page>
); );
} }
export const getServerSideProps: GetServerSideProps<SearchPageProps> = async (context) => {
let query = context.query.q;
if (isArray(query)) query = query.join(" ");
return { props: { query: query ?? null } };
};

View File

@@ -1,5 +1,6 @@
export type TSearchResultItem = { export type TSearchResultItem = {
id: string; id: string;
title: string; title: string;
summary: string | null;
tags: string[] | null; tags: string[] | null;
}; };