search improvements

This commit is contained in:
Jacky Zhao 2021-10-24 23:17:13 -07:00
parent 299533a4f4
commit 6fd19069de
1 changed files with 187 additions and 176 deletions

View File

@ -1,208 +1,219 @@
<div id="search-container">
<div>
<input autocomplete="off" id="search-bar" name="search" type="text" aria-label="Search" placeholder="Search for something...">
<div id="search-space">
<input autoComplete="off" id="search-bar" name="search" type="text" aria-label="Search"
placeholder="Search for something...">
<div id="results-container">
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/gh/nextapps-de/flexsearch@0.7.2/dist/flexsearch.bundle.js"></script>
<script>
// code from https://github.com/danestves/markdown-to-text
const removeMarkdown = (
markdown,
options = {
listUnicodeChar: false,
stripListLeaders: true,
gfm: true,
useImgAltText: false,
preserveLinks: false,
}
) => {
let output = markdown || "";
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, "");
// code from https://github.com/danestves/markdown-to-text
const removeMarkdown = (
markdown,
options = {
listUnicodeChar: false,
stripListLeaders: true,
gfm: true,
useImgAltText: false,
preserveLinks: false,
}
) => {
let output = markdown || "";
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, "");
try {
if (options.stripListLeaders) {
if (options.listUnicodeChar)
output = output.replace(
/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm,
options.listUnicodeChar + " $1"
);
else output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, "$1");
}
if (options.gfm) {
output = output
.replace(/\n={2,}/g, "\n")
.replace(/~{3}.*\n/g, "")
.replace(/~~/g, "")
.replace(/`{3}.*\n/g, "");
}
if(options.preserveLinks) {
output = output.replace(/\[(.*?)\][\[\(](.*?)[\]\)]/g, "$1 ($2)")
}
output = output
.replace(/<[^>]*>/g, "")
.replace(/^[=\-]{2,}\s*$/g, "")
.replace(/\[\^.+?\](\: .*?$)?/g, "")
.replace(/\s{0,2}\[.*?\]: .*?$/g, "")
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? "$1" : "")
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, "$1")
.replace(/^\s{0,3}>\s?/g, "")
.replace(/(^|\n)\s{0,3}>\s?/g, "\n\n")
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, "")
.replace(
/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm,
"$1$2$3"
)
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/(`{3,})(.*?)\1/gm, "$2")
.replace(/`(.+?)`/g, "$1")
.replace(/\n{2,}/g, "\n\n");
} catch (e) {
console.error(e);
return markdown;
}
return output;
};
try {
if (options.stripListLeaders) {
if (options.listUnicodeChar)
output = output.replace(
/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm,
options.listUnicodeChar + " $1"
);
else output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, "$1");
}
if (options.gfm) {
output = output
.replace(/\n={2,}/g, "\n")
.replace(/~{3}.*\n/g, "")
.replace(/~~/g, "")
.replace(/`{3}.*\n/g, "");
}
if (options.preserveLinks) {
output = output.replace(/\[(.*?)\][\[\(](.*?)[\]\)]/g, "$1 ($2)")
}
output = output
.replace(/<[^>]*>/g, "")
.replace(/^[=\-]{2,}\s*$/g, "")
.replace(/\[\^.+?\](\: .*?$)?/g, "")
.replace(/\s{0,2}\[.*?\]: .*?$/g, "")
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? "$1" : "")
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, "$1")
.replace(/^\s{0,3}>\s?/g, "")
.replace(/(^|\n)\s{0,3}>\s?/g, "\n\n")
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, "")
.replace(
/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm,
"$1$2$3"
)
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, "$2")
.replace(/(`{3,})(.*?)\1/gm, "$2")
.replace(/`(.+?)`/g, "$1")
.replace(/\n{2,}/g, "\n\n");
} catch (e) {
console.error(e);
return markdown;
}
return output;
};
</script>
<script>
const contentIndex = new FlexSearch.Worker({
tokenize: "strict",
charset: "latin:advanced",
context: true,
depth: 3,
cache: 10,
suggest: true,
})
const contentIndex = new FlexSearch.Worker({
tokenize: "reverse",
charset: "latin:extra",
suggest: true,
})
const scrapedContent = {{$.Site.Data.contentIndex}}
for (const [key, value] of Object.entries(scrapedContent)) {
contentIndex.add(key, value.content)
}
const stopwords = ['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
const highlight = (content, term) => {
const highlightWindow = 15
const tokenizedTerm = term.split(/\s+/).filter(t => t !== "")
const splitText = content.split(/\s+/).filter(t => t !== "")
const includesCheck = (token) => tokenizedTerm.some(term => token.toLowerCase().includes(term.toLowerCase()))
const occurrencesIndices = splitText
.map(includesCheck)
// calculate best index
let bestSum = 0
let bestIndex = 0
for (let i = 0; i < Math.max(occurrencesIndices.length - highlightWindow, 0); i++) {
const window = occurrencesIndices.slice(i, i + highlightWindow)
const windowSum = window.reduce((total, cur) => total + cur, 0)
if (windowSum > bestSum) {
bestSum = windowSum
bestIndex = i
}
const scrapedContent = {{$.Site.Data.contentIndex}}
for (const [key, value] of Object.entries(scrapedContent)) {
contentIndex.add(key, value.content)
}
const startIndex = Math.max(bestIndex - highlightWindow, 0)
const endIndex = Math.min(startIndex + 2 * highlightWindow, splitText.length)
const mappedText = splitText
.slice(startIndex, endIndex)
.map(token => {
if (includesCheck(token)) {
return `<span class="search-highlight">${token}</span>`
}
return token
})
.join(" ")
.replaceAll('</span> <span class="search-highlight">', " ")
return `${startIndex === 0 ? "" : "..."}${mappedText}${endIndex === splitText.length ? "" : "..."}`
}
const highlight = (content, term) => {
const highlightWindow = 15
const tokenizedTerm = term.split(/\s+/).filter(t => t !== "")
const splitText = content.split(/\s+/).filter(t => t !== "")
const includesCheck = (token) => tokenizedTerm.some(term => token.toLowerCase().includes(term.toLowerCase()))
const resultToHTML = ({url, title, content, term}) => {
const md = content.split("---")[2]
const text = removeMarkdown(md)
const resultTitle = highlight(title, term)
const resultText = highlight(text, term)
return `<div class="result-card" id="${url}">
const occurrencesIndices = splitText
.map(includesCheck)
// calculate best index
let bestSum = 0
let bestIndex = 0
for (let i = 0; i < Math.max(occurrencesIndices.length - highlightWindow, 0); i++) {
const window = occurrencesIndices.slice(i, i + highlightWindow)
const windowSum = window.reduce((total, cur) => total + cur, 0)
if (windowSum >= bestSum) {
bestSum = windowSum
bestIndex = i
}
}
const startIndex = Math.max(bestIndex - highlightWindow, 0)
const endIndex = Math.min(startIndex + 2 * highlightWindow, splitText.length)
const mappedText = splitText
.slice(startIndex, endIndex)
.map(token => {
if (includesCheck(token)) {
return `<span class="search-highlight">${token}</span>`
}
return token
})
.join(" ")
.replaceAll('</span> <span class="search-highlight">', " ")
return `${startIndex === 0 ? "" : "..."}${mappedText}${endIndex === splitText.length ? "" : "..."}`
}
const resultToHTML = ({url, title, content, term}) => {
const md = content.split("---")[2]
const text = removeMarkdown(md)
const resultTitle = highlight(title, term)
const resultText = highlight(text, term)
return `<button class="result-card" id="${url}">
<h3>${resultTitle}</h3>
<p>${resultText}</p>
</div>`
}
</button>`
}
const source = document.getElementById('search-bar')
const results = document.getElementById("results-container")
source.addEventListener('input', (e) => {
const term = e.target.value
contentIndex.search(term, {
limit: 5,
depth: 3,
suggest: true,
}).then(searchResults => {
const resultIds = [...new Set(searchResults)]
const finalResults = resultIds.map(id => ({
url: id,
title: scrapedContent[id].title,
content: scrapedContent[id].content
}))
const source = document.getElementById('search-bar')
const results = document.getElementById("results-container")
let term
source.addEventListener("keyup", (e) => {
if (e.key === "Enter") {
const anchor = document.getElementsByClassName("result-card")[0]
window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}`
}
})
source.addEventListener('input', (e) => {
term = e.target.value
contentIndex.search(term, {
limit: 20,
depth: 3,
suggest: true,
}).then(searchResults => {
const resultIds = [...new Set(searchResults)]
const finalResults = resultIds.map(id => ({
url: id,
title: scrapedContent[id].title,
content: scrapedContent[id].content
}))
// display
if (finalResults.length === 0) {
results.innerHTML = `<div class="result-card">
// display
if (finalResults.length === 0) {
results.innerHTML = `<div class="result-card">
<p>No results.</p>
</div>`
} else {
results.innerHTML = finalResults
.map(result => resultToHTML({
...result,
term,
}))
.join("\n")
const anchors = document.getElementsByClassName("result-card");
[...anchors].forEach(anchor => {
anchor.onclick = () => {
window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}`
}
} else {
results.innerHTML = finalResults
.map(result => resultToHTML({
...result,
term,
}))
.join("\n")
const anchors = document.getElementsByClassName("result-card");
[...anchors].forEach(anchor => {
anchor.onclick = () => {
window.location.href = `${anchor.id}#:~:text=${encodeURIComponent(term)}`
}
})
}
})
}
})
})
const searchContainer = document.getElementById("search-container")
function openSearch() {
if (searchContainer.style.display === "none" || searchContainer.style.display === "") {
source.value = ""
results.innerHTML = ""
searchContainer.style.display = "block"
source.focus()
} else {
searchContainer.style.display = "none"
const searchContainer = document.getElementById("search-container")
function openSearch() {
if (searchContainer.style.display === "none" || searchContainer.style.display === "") {
source.value = ""
results.innerHTML = ""
searchContainer.style.display = "block"
source.focus()
} else {
searchContainer.style.display = "none"
}
}
}
function closeSearch() {
searchContainer.style.display = "none"
}
document.addEventListener('keydown', (event) => {
if (event.key === "/") {
event.preventDefault()
openSearch()
function closeSearch() {
searchContainer.style.display = "none"
}
if (event.key === "Escape") {
event.preventDefault()
closeSearch()
}
})
window.addEventListener('DOMContentLoaded', () => {
const searchButton = document.getElementById("search-icon")
searchButton.addEventListener('click', (evt) => {
openSearch()
document.addEventListener('keydown', (event) => {
if (event.key === "/") {
event.preventDefault()
openSearch()
}
if (event.key === "Escape") {
event.preventDefault()
closeSearch()
}
})
searchButton.addEventListener('keydown', (evt) => {
openSearch()
window.addEventListener('DOMContentLoaded', () => {
const searchButton = document.getElementById("search-icon")
searchButton.addEventListener('click', (evt) => {
openSearch()
})
searchButton.addEventListener('keydown', (evt) => {
openSearch()
})
searchContainer.addEventListener('click', (evt) => {
closeSearch()
})
document.getElementById("search-space").addEventListener('click', (evt) => {
evt.stopPropagation()
})
})
})
</script>