search improvements

This commit is contained in:
Jacky Zhao 2021-10-24 23:17:13 -07:00
parent 299533a4f4
commit 6fd19069de
1 changed files with 187 additions and 176 deletions

View File

@ -1,208 +1,219 @@
<div id="search-container"> <div id="search-container">
<div> <div id="search-space">
<input autocomplete="off" id="search-bar" name="search" type="text" aria-label="Search" placeholder="Search for something..."> <input autoComplete="off" id="search-bar" name="search" type="text" aria-label="Search"
placeholder="Search for something...">
<div id="results-container"> <div id="results-container">
</div> </div>
</div> </div>
</div> </div>
<script src="https://cdn.jsdelivr.net/gh/nextapps-de/flexsearch@0.7.2/dist/flexsearch.bundle.js"></script> <script src="https://cdn.jsdelivr.net/gh/nextapps-de/flexsearch@0.7.2/dist/flexsearch.bundle.js"></script>
<script> <script>
// code from https://github.com/danestves/markdown-to-text // code from https://github.com/danestves/markdown-to-text
const removeMarkdown = ( const removeMarkdown = (
markdown, markdown,
options = { options = {
listUnicodeChar: false, listUnicodeChar: false,
stripListLeaders: true, stripListLeaders: true,
gfm: true, gfm: true,
useImgAltText: false, useImgAltText: false,
preserveLinks: false, preserveLinks: false,
} }
) => { ) => {
let output = markdown || ""; let output = markdown || "";
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, ""); output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, "");
try { try {
if (options.stripListLeaders) { if (options.stripListLeaders) {
if (options.listUnicodeChar) if (options.listUnicodeChar)
output = output.replace( output = output.replace(
/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, /^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm,
options.listUnicodeChar + " $1" options.listUnicodeChar + " $1"
); );
else output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, "$1"); else output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, "$1");
} }
if (options.gfm) { if (options.gfm) {
output = output output = output
.replace(/\n={2,}/g, "\n") .replace(/\n={2,}/g, "\n")
.replace(/~{3}.*\n/g, "") .replace(/~{3}.*\n/g, "")
.replace(/~~/g, "") .replace(/~~/g, "")
.replace(/`{3}.*\n/g, ""); .replace(/`{3}.*\n/g, "");
} }
if(options.preserveLinks) { if (options.preserveLinks) {
output = output.replace(/\[(.*?)\][\[\(](.*?)[\]\)]/g, "$1 ($2)") output = output.replace(/\[(.*?)\][\[\(](.*?)[\]\)]/g, "$1 ($2)")
} }
output = output output = output
.replace(/<[^>]*>/g, "") .replace(/<[^>]*>/g, "")
.replace(/^[=\-]{2,}\s*$/g, "") .replace(/^[=\-]{2,}\s*$/g, "")
.replace(/\[\^.+?\](\: .*?$)?/g, "") .replace(/\[\^.+?\](\: .*?$)?/g, "")
.replace(/\s{0,2}\[.*?\]: .*?$/g, "") .replace(/\s{0,2}\[.*?\]: .*?$/g, "")
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? "$1" : "") .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? "$1" : "")
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, "$1") .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, "$1")
.replace(/^\s{0,3}>\s?/g, "") .replace(/^\s{0,3}>\s?/g, "")
.replace(/(^|\n)\s{0,3}>\s?/g, "\n\n") .replace(/(^|\n)\s{0,3}>\s?/g, "\n\n")
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, "") .replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, "")
.replace( .replace(
/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm, /^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm,
"$1$2$3" "$1$2$3"
) )
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2") .replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2") .replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/(`{3,})(.*?)\1/gm, "$2") .replace(/(`{3,})(.*?)\1/gm, "$2")
.replace(/`(.+?)`/g, "$1") .replace(/`(.+?)`/g, "$1")
.replace(/\n{2,}/g, "\n\n"); .replace(/\n{2,}/g, "\n\n");
} catch (e) { } catch (e) {
console.error(e); console.error(e);
return markdown; return markdown;
} }
return output; return output;
}; };
</script> </script>
<script> <script>
const contentIndex = new FlexSearch.Worker({ const contentIndex = new FlexSearch.Worker({
tokenize: "strict", tokenize: "reverse",
charset: "latin:advanced", charset: "latin:extra",
context: true, suggest: true,
depth: 3, })
cache: 10,
suggest: true,
})
const scrapedContent = {{$.Site.Data.contentIndex}} const scrapedContent = {{$.Site.Data.contentIndex}}
for (const [key, value] of Object.entries(scrapedContent)) { for (const [key, value] of Object.entries(scrapedContent)) {
contentIndex.add(key, value.content) contentIndex.add(key, value.content)
}
const stopwords = ['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
const highlight = (content, term) => {
const highlightWindow = 15
const tokenizedTerm = term.split(/\s+/).filter(t => t !== "")
const splitText = content.split(/\s+/).filter(t => t !== "")
const includesCheck = (token) => tokenizedTerm.some(term => token.toLowerCase().includes(term.toLowerCase()))
const occurrencesIndices = splitText
.map(includesCheck)
// calculate best index
let bestSum = 0
let bestIndex = 0
for (let i = 0; i < Math.max(occurrencesIndices.length - highlightWindow, 0); i++) {
const window = occurrencesIndices.slice(i, i + highlightWindow)
const windowSum = window.reduce((total, cur) => total + cur, 0)
if (windowSum > bestSum) {
bestSum = windowSum
bestIndex = i
}
} }
const startIndex = Math.max(bestIndex - highlightWindow, 0) const highlight = (content, term) => {
const endIndex = Math.min(startIndex + 2 * highlightWindow, splitText.length) const highlightWindow = 15
const mappedText = splitText const tokenizedTerm = term.split(/\s+/).filter(t => t !== "")
.slice(startIndex, endIndex) const splitText = content.split(/\s+/).filter(t => t !== "")
.map(token => { const includesCheck = (token) => tokenizedTerm.some(term => token.toLowerCase().includes(term.toLowerCase()))
if (includesCheck(token)) {
return `<span class="search-highlight">${token}</span>`
}
return token
})
.join(" ")
.replaceAll('</span> <span class="search-highlight">', " ")
return `${startIndex === 0 ? "" : "..."}${mappedText}${endIndex === splitText.length ? "" : "..."}`
}
const resultToHTML = ({url, title, content, term}) => { const occurrencesIndices = splitText
const md = content.split("---")[2] .map(includesCheck)
const text = removeMarkdown(md)
const resultTitle = highlight(title, term) // calculate best index
const resultText = highlight(text, term) let bestSum = 0
return `<div class="result-card" id="${url}"> let bestIndex = 0
for (let i = 0; i < Math.max(occurrencesIndices.length - highlightWindow, 0); i++) {
const window = occurrencesIndices.slice(i, i + highlightWindow)
const windowSum = window.reduce((total, cur) => total + cur, 0)
if (windowSum >= bestSum) {
bestSum = windowSum
bestIndex = i
}
}
const startIndex = Math.max(bestIndex - highlightWindow, 0)
const endIndex = Math.min(startIndex + 2 * highlightWindow, splitText.length)
const mappedText = splitText
.slice(startIndex, endIndex)
.map(token => {
if (includesCheck(token)) {
return `<span class="search-highlight">${token}</span>`
}
return token
})
.join(" ")
.replaceAll('</span> <span class="search-highlight">', " ")
return `${startIndex === 0 ? "" : "..."}${mappedText}${endIndex === splitText.length ? "" : "..."}`
}
const resultToHTML = ({url, title, content, term}) => {
const md = content.split("---")[2]
const text = removeMarkdown(md)
const resultTitle = highlight(title, term)
const resultText = highlight(text, term)
return `<button class="result-card" id="${url}">
<h3>${resultTitle}</h3> <h3>${resultTitle}</h3>
<p>${resultText}</p> <p>${resultText}</p>
</div>` </button>`
} }
const source = document.getElementById('search-bar') const source = document.getElementById('search-bar')
const results = document.getElementById("results-container") const results = document.getElementById("results-container")
source.addEventListener('input', (e) => { let term
const term = e.target.value source.addEventListener("keyup", (e) => {
contentIndex.search(term, { if (e.key === "Enter") {
limit: 5, const anchor = document.getElementsByClassName("result-card")[0]
depth: 3, window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}`
suggest: true, }
}).then(searchResults => { })
const resultIds = [...new Set(searchResults)] source.addEventListener('input', (e) => {
const finalResults = resultIds.map(id => ({ term = e.target.value
url: id, contentIndex.search(term, {
title: scrapedContent[id].title, limit: 20,
content: scrapedContent[id].content depth: 3,
})) suggest: true,
}).then(searchResults => {
const resultIds = [...new Set(searchResults)]
const finalResults = resultIds.map(id => ({
url: id,
title: scrapedContent[id].title,
content: scrapedContent[id].content
}))
// display // display
if (finalResults.length === 0) { if (finalResults.length === 0) {
results.innerHTML = `<div class="result-card"> results.innerHTML = `<div class="result-card">
<p>No results.</p> <p>No results.</p>
</div>` </div>`
} else { } else {
results.innerHTML = finalResults results.innerHTML = finalResults
.map(result => resultToHTML({ .map(result => resultToHTML({
...result, ...result,
term, term,
})) }))
.join("\n") .join("\n")
const anchors = document.getElementsByClassName("result-card"); const anchors = document.getElementsByClassName("result-card");
[...anchors].forEach(anchor => { [...anchors].forEach(anchor => {
anchor.onclick = () => { anchor.onclick = () => {
window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}` window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}`
} }
})
}
}) })
}
}) })
})
const searchContainer = document.getElementById("search-container") const searchContainer = document.getElementById("search-container")
function openSearch() {
if (searchContainer.style.display === "none" || searchContainer.style.display === "") { function openSearch() {
source.value = "" if (searchContainer.style.display === "none" || searchContainer.style.display === "") {
results.innerHTML = "" source.value = ""
searchContainer.style.display = "block" results.innerHTML = ""
source.focus() searchContainer.style.display = "block"
} else { source.focus()
searchContainer.style.display = "none" } else {
searchContainer.style.display = "none"
}
} }
}
function closeSearch() { function closeSearch() {
searchContainer.style.display = "none" searchContainer.style.display = "none"
}
document.addEventListener('keydown', (event) => {
if (event.key === "/") {
event.preventDefault()
openSearch()
} }
if (event.key === "Escape") {
event.preventDefault()
closeSearch()
}
})
window.addEventListener('DOMContentLoaded', () => { document.addEventListener('keydown', (event) => {
const searchButton = document.getElementById("search-icon") if (event.key === "/") {
searchButton.addEventListener('click', (evt) => { event.preventDefault()
openSearch() openSearch()
}
if (event.key === "Escape") {
event.preventDefault()
closeSearch()
}
}) })
searchButton.addEventListener('keydown', (evt) => {
openSearch() window.addEventListener('DOMContentLoaded', () => {
const searchButton = document.getElementById("search-icon")
searchButton.addEventListener('click', (evt) => {
openSearch()
})
searchButton.addEventListener('keydown', (evt) => {
openSearch()
})
searchContainer.addEventListener('click', (evt) => {
closeSearch()
})
document.getElementById("search-space").addEventListener('click', (evt) => {
evt.stopPropagation()
})
}) })
})
</script> </script>