| 
									
										
										
										
											2024-01-06 16:01:59 +08:00
										 |  |  | import { cutForSearch } from "@node-rs/jieba"; | 
					
						
							| 
									
										
										
										
											2024-01-15 11:44:48 +08:00
										 |  |  | import Colors from "colors"; | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  | import minisearch from "minisearch"; | 
					
						
							| 
									
										
										
										
											2024-01-15 11:44:48 +08:00
										 |  |  | import sizeof from "object-sizeof"; | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  | import { getPostFileContent, sortedPosts } from "./post-process"; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-08 21:25:34 +08:00
										 |  |  | // Due to the flaws of the word tokenizer,
 | 
					
						
							|  |  |  | // it is necessary to match CJKL symbols only
 | 
					
						
							|  |  |  | // during the word segmentation process to prevent repeated recognition.
 | 
					
						
							| 
									
										
										
										
											2024-08-12 10:57:33 +08:00
										 |  |  | const NonCJKLRecognizeRegex = | 
					
						
							|  |  |  |   /[^\u4e00-\u9fa5\u3040-\u30ff\uac00-\ud7af\u1100-\u11ff\u3130-\u318f\u31c0-\u31ef\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\u0041-\u005a\u0061-\u007a\u00c0-\u00ff\u0100-\u017f\u0180-\u024f\s ]/g; | 
					
						
							| 
									
										
										
										
											2024-01-08 21:25:34 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-06 16:01:59 +08:00
										 |  |  | function tokenizer(str: string) { | 
					
						
							| 
									
										
										
										
											2024-08-12 10:57:33 +08:00
										 |  |  |   const result = cutForSearch(str.replace(NonCJKLRecognizeRegex, " "), true); | 
					
						
							|  |  |  |   for (let i = 0; i < result.length; i++) { | 
					
						
							|  |  |  |     if (result[i].trim() === "") { | 
					
						
							|  |  |  |       result.splice(i, 1); | 
					
						
							|  |  |  |       i--; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2024-01-08 21:25:34 +08:00
										 |  |  |   return result; | 
					
						
							| 
									
										
										
										
											2024-01-06 16:01:59 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | function makeSearchIndex() { | 
					
						
							| 
									
										
										
										
											2024-04-03 22:08:27 +08:00
										 |  |  |   const startTime = Date.now(); | 
					
						
							| 
									
										
										
										
											2024-08-14 12:57:22 +08:00
										 |  |  |   const miniSearch = new minisearch({ | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  |     fields: ["id", "title", "tags", "subtitle", "summary", "content"], | 
					
						
							| 
									
										
										
										
											2024-08-12 10:57:33 +08:00
										 |  |  |     storeFields: ["id", "title", "tags", "summary"], | 
					
						
							| 
									
										
										
										
											2024-01-06 16:01:59 +08:00
										 |  |  |     tokenize: tokenizer, | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  |   }); | 
					
						
							|  |  |  |   for (let index = 0; index < sortedPosts.allPostList.length; index++) { | 
					
						
							|  |  |  |     const post = sortedPosts.allPostList[index]; | 
					
						
							|  |  |  |     const content = getPostFileContent(post.id); | 
					
						
							|  |  |  |     miniSearch.add({ | 
					
						
							|  |  |  |       id: post.id, | 
					
						
							|  |  |  |       title: post.frontMatter.title, | 
					
						
							|  |  |  |       tags: post.frontMatter.tags, | 
					
						
							|  |  |  |       subtitle: post.frontMatter.subtitle, | 
					
						
							|  |  |  |       summary: post.frontMatter.summary, | 
					
						
							|  |  |  |       content: content, | 
					
						
							|  |  |  |     }); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2024-04-03 22:08:27 +08:00
										 |  |  |   const endTime = Date.now(); | 
					
						
							| 
									
										
										
										
											2024-01-15 11:44:48 +08:00
										 |  |  |   const sizeofIndex = (sizeof(miniSearch) / 1024 ** 2).toFixed(3); | 
					
						
							| 
									
										
										
										
											2024-04-03 22:08:27 +08:00
										 |  |  |   console.log( | 
					
						
							|  |  |  |     Colors.cyan( | 
					
						
							|  |  |  |       `Search index is ready. And the size of index is ${sizeofIndex} mb. And it costs ${(endTime - startTime) / 1000} s.`, | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  |   ); | 
					
						
							| 
									
										
										
										
											2024-01-06 11:47:18 +08:00
										 |  |  |   return miniSearch; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-12 10:57:33 +08:00
										 |  |  | export const SearchIndex = Object.freeze(makeSearchIndex()); |