[fix] fix tokenizer problem

This commit is contained in:
PrinOrange
2024-08-12 10:57:33 +08:00
parent fb87786f2f
commit 8916bd99d2
3 changed files with 93 additions and 54 deletions

View File

@@ -7,10 +7,17 @@ import { getPostFileContent, sortedPosts } from "./post-process";
// Due to the flaws of the word tokenizer,
// it is necessary to match CJKL symbols only
// during the word segmentation process to prevent repeated recognition.
const CJKLRecognizeRegex = /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7A3a-zA-Z]+/g;
const NonCJKLRecognizeRegex =
/[^\u4e00-\u9fa5\u3040-\u30ff\uac00-\ud7af\u1100-\u11ff\u3130-\u318f\u31c0-\u31ef\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\u0041-\u005a\u0061-\u007a\u00c0-\u00ff\u0100-\u017f\u0180-\u024f\s ]/g;
function tokenizer(str: string) {
const result = cutForSearch(str, true).filter((item) => CJKLRecognizeRegex.test(item));
const result = cutForSearch(str.replace(NonCJKLRecognizeRegex, " "), true);
for (let i = 0; i < result.length; i++) {
if (result[i].trim() === "") {
result.splice(i, 1);
i--;
}
}
return result;
}
@@ -18,7 +25,7 @@ function makeSearchIndex() {
const startTime = Date.now();
let miniSearch = new minisearch({
fields: ["id", "title", "tags", "subtitle", "summary", "content"],
storeFields: ["id", "title", "tags"],
storeFields: ["id", "title", "tags", "summary"],
tokenize: tokenizer,
});
for (let index = 0; index < sortedPosts.allPostList.length; index++) {
@@ -43,4 +50,4 @@ function makeSearchIndex() {
return miniSearch;
}
export const SearchIndex = makeSearchIndex();
export const SearchIndex = Object.freeze(makeSearchIndex());

View File

@@ -10,52 +10,76 @@ import { Config } from "@/data/config";
import { isEmptyString } from "@/lib/utils";
import { TSearchResultItem } from "@/types/search-result";
import axios from "axios";
import { isArray } from "lodash";
import { nanoid } from "nanoid";
import { GetServerSideProps } from "next";
import Link from "next/link";
import { ChangeEvent, KeyboardEvent, useState } from "react";
import { useQuery } from "react-query";
import { useRouter } from "next/router";
import { ChangeEvent, KeyboardEvent, useEffect, useState } from "react";
export default function SearchPage() {
const [searchText, setSearchText] = useState<string>("");
type SearchPageProps = { query: string | null };
export default function SearchPage(props: SearchPageProps) {
const [searchText, setSearchText] = useState<string>(props.query ?? "");
const [searchResult, setSearchResult] = useState<TSearchResultItem[]>([]);
const [isLoading, setIsLoading] = useState<boolean>(false);
const { toast } = useToast();
const router = useRouter();
const fetchSearchAPI = async (param: string) => {
const response = (await axios.get<TSearchResultItem[]>(`/api/search/${param}`)).data;
return response;
};
const querySearch = useQuery("searchData", () => fetchSearchAPI(searchText), {
enabled: false,
onSuccess: (data) => {
setSearchResult(data);
if (data.length === 0) {
toast({ title: "Empty Result", description: "No results were found for this keyword. Try another keyword." });
useEffect(() => {
if (!isEmptyString(searchText)) {
handleMakeSearch();
}
},
onError: () => {
toast({ title: "Network Error", description: "Please try it later." });
},
});
}, []);
const fetchSearchAPI = (param: string): Promise<TSearchResultItem[]> => {
return axios.get<TSearchResultItem[]>(`/api/search/${param}`).then((response) => response.data);
};
const handleInputSearchText = (event: ChangeEvent<HTMLInputElement>) => {
setSearchText(event.target.value);
};
const handleEnterKeySearch = (event: KeyboardEvent<HTMLInputElement>) => {
(event.key === "Go" || event.key === "Enter") && handleMakeSearch();
if (event.key === "Go" || event.key === "Enter") {
handleMakeSearch();
}
};
const handleMakeSearch = () => {
if (isEmptyString(searchText)) {
const searchQuery = searchText;
if (isEmptyString(searchQuery)) {
toast({ title: "Enter a Keyword", description: "Please enter one keyword at least." });
return;
}
if (searchText.length < 4) {
if (searchQuery && searchQuery.length < 4) {
toast({ title: "Keywords too short", description: "Keyword length must be at least 5." });
return;
}
querySearch.refetch();
router.push({
pathname: router.pathname,
query: { ...router.query, q: searchQuery },
});
setIsLoading(true);
fetchSearchAPI(searchQuery)
.then((data) => {
setSearchResult(data);
if (data.length === 0) {
toast({
title: "Empty Result",
description: "No results were found for this keyword. Try another keyword.",
});
}
})
.catch(() => {
toast({ title: "Network Error", description: "Please try it later." });
})
.finally(() => {
setIsLoading(false);
});
};
return (
@@ -64,9 +88,7 @@ export default function SearchPage() {
<Toaster />
<NavBar />
<ContentContainer>
<h2 className={`my-10 flex justify-center text-2xl font-bold font-fang-zheng-xiao-biao-song`}>
{"SEARCH POSTS"}
</h2>
<h2 className={`my-10 flex justify-center text-2xl font-bold caption-font`}>{"SEARCH POSTS"}</h2>
<div className="flex my-10">
<Input
className="my-auto py-0"
@@ -75,23 +97,23 @@ export default function SearchPage() {
placeholder="Input the keyword"
value={searchText}
/>
<Button className="mx-3 w-32 my-auto" disabled={querySearch.isLoading} onClick={handleMakeSearch}>
{querySearch.isFetching ? "Loading" : "Search"}
<Button className="mx-3 w-32 my-auto" disabled={isLoading} onClick={handleMakeSearch}>
{isLoading ? "Loading" : "Search"}
</Button>
</div>
<div className="flex flex-col justify-center">
<div className={`min-h-full flex flex-col font-source-serif-screen`}>
{querySearch.isSuccess &&
searchResult.map((item, index) => (
<div className={`min-h-full flex flex-col content-font`}>
{searchResult.map((item, index) => (
<Link
className={`py-2 px-5 border-t ${
index === searchResult.length - 1 && "border-b"
} hover:bg-gray-50 dark:hover:bg-gray-900 flex flex-col`}
className={`p-2 border-t ${index === searchResult.length - 1 && "border-b"} hover:bg-gray-50 dark:hover:bg-gray-900 flex flex-col`}
href={`/blog/${item.id}`}
key={nanoid()}
target="_blank"
>
<div className="my-1 capitalize">{item.title}</div>
<div className="my-1">
<div className="capitalize post-list-caption-font text-md font-bold">{item.title}</div>
{item.summary && <div>{item.summary}</div>}
</div>
<div className="flex space-x-2 flex-wrap">
{item.tags?.map((tagitem) => (
<div className="text-sm text-gray-500 dark:text-gray-400" key={nanoid()}>
@@ -103,8 +125,17 @@ export default function SearchPage() {
))}
</div>
</div>
<div className="text-center my-3 text-gray-500 dark:text-gray-400">
<p className="mx-auto text-sm">{"For search efficiency, only the first 20 results are displayed."}</p>
</div>
</ContentContainer>
<Footer />
</Page>
);
}
export const getServerSideProps: GetServerSideProps<SearchPageProps> = async (context) => {
let query = context.query.q;
if (isArray(query)) query = query.join(" ");
return { props: { query: query ?? null } };
};

View File

@@ -1,5 +1,6 @@
export type TSearchResultItem = {
id: string;
title: string;
summary: string | null;
tags: string[] | null;
};