import { cutForSearch } from "@node-rs/jieba";
import minisearch from "minisearch";
import { getPostFileContent, sortedPosts } from "./post-process";

// Due to the flaws of the word tokenizer,
// it is necessary to match CJKL symbols only
// during the word segmentation process to prevent repeated recognition.
const CJKLRecognizeRegex = /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7A3a-zA-Z]+/g;

function tokenizer(str: string) {
  const result = cutForSearch(str, true).filter((item) => item.match(CJKLRecognizeRegex));
  return result;
}

function makeSearchIndex() {
  let miniSearch = new minisearch({
    fields: ["id", "title", "tags", "subtitle", "summary", "content"],
    storeFields: ["id", "title", "tags"],
    tokenize: tokenizer,
  });
  for (let index = 0; index < sortedPosts.allPostList.length; index++) {
    const post = sortedPosts.allPostList[index];
    const content = getPostFileContent(post.id);
    miniSearch.add({
      id: post.id,
      title: post.frontMatter.title,
      tags: post.frontMatter.tags,
      subtitle: post.frontMatter.subtitle,
      summary: post.frontMatter.summary,
      content: content,
    });
  }
  return miniSearch;
}

export const SearchIndex = makeSearchIndex();