refactor(search): avoid issue with one character keyword (#447)
* refactor(search): avoid issue with one character keyword closes https://github.com/CaiJimmy/hugo-theme-stack/issues/184 * Remove keyword sorts * fix typo: secion -> section * fix(search): avoid matching html entity * Use | operator to concatenate keywords Idea from https://github.com/CaiJimmy/hugo-theme-stack/pull/436 * Add missing `matchCount` * Limit preview length * Don't add ellipsis to title * add comment to `processMatches` * Initialize DOMParser only once * Remove marker function * Deal with blank search * Use const keyword for constant arrays
This commit is contained in:
parent
86cbc1b682
commit
4764a92df3
@ -6,5 +6,7 @@
|
|||||||
"*"
|
"*"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"lib": ["es2020", "dom"],
|
||||||
|
"jsx": "preserve"
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -8,6 +8,11 @@ interface pageData {
|
|||||||
matchCount: number
|
matchCount: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface match {
|
||||||
|
start: number,
|
||||||
|
end: number
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Escape HTML tags as HTML entities
|
* Escape HTML tags as HTML entities
|
||||||
* Edited from:
|
* Edited from:
|
||||||
@ -53,77 +58,129 @@ class Search {
|
|||||||
this.bindSearchForm();
|
this.bindSearchForm();
|
||||||
}
|
}
|
||||||
|
|
||||||
private async searchKeywords(keywords: string[]) {
|
/**
|
||||||
const rawData = await this.getData();
|
* Processes search matches
|
||||||
let results: pageData[] = [];
|
* @param str original text
|
||||||
|
* @param matches array of matches
|
||||||
/// Sort keywords by their length
|
* @param ellipsis whether to add ellipsis to the end of each match
|
||||||
keywords.sort((a, b) => {
|
* @param charLimit max length of preview string
|
||||||
return b.length - a.length
|
* @param offset how many characters before and after the match to include in preview
|
||||||
|
* @returns preview string
|
||||||
|
*/
|
||||||
|
private static processMatches(str: string, matches: match[], ellipsis: boolean = true, charLimit = 140, offset = 20): string {
|
||||||
|
matches.sort((a, b) => {
|
||||||
|
return a.start - b.start;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let i = 0,
|
||||||
|
lastIndex = 0,
|
||||||
|
charCount = 0;
|
||||||
|
|
||||||
|
const resultArray: string[] = [];
|
||||||
|
|
||||||
|
while (i < matches.length) {
|
||||||
|
const item = matches[i];
|
||||||
|
|
||||||
|
/// item.start >= lastIndex (equal only for the first iteration)
|
||||||
|
/// because of the while loop that comes after, iterating over variable j
|
||||||
|
|
||||||
|
if (ellipsis && item.start - offset > lastIndex) {
|
||||||
|
resultArray.push(`${replaceHTMLEnt(str.substring(lastIndex, lastIndex + offset))} [...] `);
|
||||||
|
resultArray.push(`${replaceHTMLEnt(str.substring(item.start - offset, item.start))}`);
|
||||||
|
charCount += offset * 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/// If the match is too close to the end of last match, don't add ellipsis
|
||||||
|
resultArray.push(replaceHTMLEnt(str.substring(lastIndex, item.start)));
|
||||||
|
charCount += item.start - lastIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
let j = i + 1,
|
||||||
|
end = item.end;
|
||||||
|
|
||||||
|
/// Include as many matches as possible
|
||||||
|
/// [item.start, end] is the range of the match
|
||||||
|
while (j < matches.length && matches[j].start <= end) {
|
||||||
|
end = Math.max(matches[j].end, end);
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
|
||||||
|
resultArray.push(`<mark>${replaceHTMLEnt(str.substring(item.start, end))}</mark>`);
|
||||||
|
charCount += end - item.start;
|
||||||
|
|
||||||
|
i = j;
|
||||||
|
lastIndex = end;
|
||||||
|
|
||||||
|
if (ellipsis && charCount > charLimit) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add the rest of the string
|
||||||
|
if (lastIndex < str.length) {
|
||||||
|
let end = str.length;
|
||||||
|
if (ellipsis) end = Math.min(end, lastIndex + offset);
|
||||||
|
|
||||||
|
resultArray.push(`${replaceHTMLEnt(str.substring(lastIndex, end))}`);
|
||||||
|
|
||||||
|
if (ellipsis && end != str.length) {
|
||||||
|
resultArray.push(` [...]`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return resultArray.join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
private async searchKeywords(keywords: string[]) {
|
||||||
|
const rawData = await this.getData();
|
||||||
|
const results: pageData[] = [];
|
||||||
|
|
||||||
|
const regex = new RegExp(keywords.filter((v, index, arr) => {
|
||||||
|
arr[index] = escapeRegExp(v);
|
||||||
|
return v.trim() !== '';
|
||||||
|
}).join('|'), 'gi');
|
||||||
|
|
||||||
for (const item of rawData) {
|
for (const item of rawData) {
|
||||||
|
const titleMatches: match[] = [],
|
||||||
|
contentMatches: match[] = [];
|
||||||
|
|
||||||
let result = {
|
let result = {
|
||||||
...item,
|
...item,
|
||||||
preview: '',
|
preview: '',
|
||||||
matchCount: 0
|
matchCount: 0
|
||||||
}
|
}
|
||||||
|
|
||||||
let matched = false;
|
const contentMatchAll = item.content.matchAll(regex);
|
||||||
|
for (const match of Array.from(contentMatchAll)) {
|
||||||
for (const keyword of keywords) {
|
contentMatches.push({
|
||||||
if (keyword === '') continue;
|
start: match.index,
|
||||||
|
end: match.index + match[0].length
|
||||||
const regex = new RegExp(escapeRegExp(replaceHTMLEnt(keyword)), 'gi');
|
|
||||||
|
|
||||||
const contentMatch = regex.exec(result.content);
|
|
||||||
regex.lastIndex = 0; /// Reset regex
|
|
||||||
|
|
||||||
const titleMatch = regex.exec(result.title);
|
|
||||||
regex.lastIndex = 0; /// Reset regex
|
|
||||||
|
|
||||||
if (titleMatch) {
|
|
||||||
result.title = result.title.replace(regex, Search.marker);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (titleMatch || contentMatch) {
|
|
||||||
matched = true;
|
|
||||||
++result.matchCount;
|
|
||||||
|
|
||||||
let start = 0,
|
|
||||||
end = 100;
|
|
||||||
|
|
||||||
if (contentMatch) {
|
|
||||||
start = contentMatch.index - 20;
|
|
||||||
end = contentMatch.index + 80
|
|
||||||
|
|
||||||
if (start < 0) start = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result.preview.indexOf(keyword) !== -1) {
|
|
||||||
result.preview = result.preview.replace(regex, Search.marker);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (start !== 0) result.preview += `[...] `;
|
|
||||||
result.preview += `${result.content.slice(start, end).replace(regex, Search.marker)} `;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (matched) {
|
|
||||||
result.preview += '[...]';
|
|
||||||
results.push(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Result with more matches appears first */
|
|
||||||
return results.sort((a, b) => {
|
|
||||||
return b.matchCount - a.matchCount;
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static marker(match) {
|
const titleMatchAll = item.title.matchAll(regex);
|
||||||
return '<mark>' + match + '</mark>';
|
for (const match of Array.from(titleMatchAll)) {
|
||||||
|
titleMatches.push({
|
||||||
|
start: match.index,
|
||||||
|
end: match.index + match[0].length
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (titleMatches.length > 0) result.title = Search.processMatches(result.title, titleMatches, false);
|
||||||
|
if (contentMatches.length > 0) {
|
||||||
|
result.preview = Search.processMatches(result.content, contentMatches);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/// If there are no matches in the content, use the first 140 characters as preview
|
||||||
|
result.preview = replaceHTMLEnt(result.content.substring(0, 140));
|
||||||
|
}
|
||||||
|
|
||||||
|
result.matchCount = titleMatches.length + contentMatches.length;
|
||||||
|
if (result.matchCount > 0) results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result with more matches appears first
|
||||||
|
return results.sort((a, b) => {
|
||||||
|
return b.matchCount - a.matchCount;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private async doSearch(keywords: string[]) {
|
private async doSearch(keywords: string[]) {
|
||||||
@ -150,6 +207,11 @@ class Search {
|
|||||||
/// Not fetched yet
|
/// Not fetched yet
|
||||||
const jsonURL = this.form.dataset.json;
|
const jsonURL = this.form.dataset.json;
|
||||||
this.data = await fetch(jsonURL).then(res => res.json());
|
this.data = await fetch(jsonURL).then(res => res.json());
|
||||||
|
const parser = new DOMParser();
|
||||||
|
|
||||||
|
for (const item of this.data) {
|
||||||
|
item.content = parser.parseFromString(item.content, 'text/html').body.innerText;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.data;
|
return this.data;
|
||||||
@ -160,7 +222,7 @@ class Search {
|
|||||||
|
|
||||||
const eventHandler = (e) => {
|
const eventHandler = (e) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
const keywords = this.input.value;
|
const keywords = this.input.value.trim();
|
||||||
|
|
||||||
Search.updateQueryString(keywords, true);
|
Search.updateQueryString(keywords, true);
|
||||||
|
|
||||||
@ -225,7 +287,7 @@ class Search {
|
|||||||
<a href={item.permalink}>
|
<a href={item.permalink}>
|
||||||
<div class="article-details">
|
<div class="article-details">
|
||||||
<h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2>
|
<h2 class="article-title" dangerouslySetInnerHTML={{ __html: item.title }}></h2>
|
||||||
<secion class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></secion>
|
<section class="article-preview" dangerouslySetInnerHTML={{ __html: item.preview }}></section>
|
||||||
</div>
|
</div>
|
||||||
{item.image &&
|
{item.image &&
|
||||||
<div class="article-image">
|
<div class="article-image">
|
||||||
|
Loading…
Reference in New Issue
Block a user