diff --git a/app/helpers/breadcrumbs.mts b/app/helpers/breadcrumbs.mts new file mode 100644 index 00000000..3580bdc4 --- /dev/null +++ b/app/helpers/breadcrumbs.mts @@ -0,0 +1,4 @@ +export interface Breadcrumb { + title: string; + url: string; +} diff --git a/app/helpers/functions.mts b/app/helpers/functions.mts index aabfae5b..a7b3aa62 100644 --- a/app/helpers/functions.mts +++ b/app/helpers/functions.mts @@ -5,6 +5,7 @@ import { config } from '../config.mjs'; import markdownIt from 'markdown-it'; import markdownItAttrs from 'markdown-it-attrs'; import momentJs from 'moment-timezone'; +import { inspect } from 'util'; const moment = momentJs.tz.setDefault("UTC"); const md = markdownIt({ html: true, @@ -189,6 +190,8 @@ export default { printReadme, stripWebVTT, renderArchive, + config, + inspect, md, moment, }; \ No newline at end of file diff --git a/app/models/solr-doc.mts b/app/models/solr-doc.mts deleted file mode 100644 index 5f1c9991..00000000 --- a/app/models/solr-doc.mts +++ /dev/null @@ -1,9 +0,0 @@ -export interface SolrDocument { - id: string; - sha256sum: string; - url: string; - content_length: number; - content_type: string; - text: string; - _version_?: number; -} diff --git a/app/glob-slash.mts b/app/page/glob-slash.mts similarity index 100% rename from app/glob-slash.mts rename to app/page/glob-slash.mts diff --git a/app/page/router.mts b/app/page/router.mts new file mode 100644 index 00000000..5de577a0 --- /dev/null +++ b/app/page/router.mts @@ -0,0 +1,274 @@ +console.log(`Loading nm3clol-express-app directory router module...`); + +import { config } from '../config.mjs'; +import express from 'express'; +import serve from './vercel-serve.mjs'; +import path from 'path'; +import { globSync } from 'glob'; +import matter from 'gray-matter'; +import ejs from 'ejs'; +import helpers from '../helpers/functions.mjs'; +import fs from 'fs'; +import { readFile } from 'fs/promises'; +import { Breadcrumb } from '../helpers/breadcrumbs.mjs'; + +export default function () { + const pageRouter = express.Router(); + + // // Serve static files (CSS, JavaScript, images, etc.) + // app.use(serve('../public', { + // dotfiles: 'ignore', + // index: false, + // })); + + // app.get('/', (req, res) => { + // res.send('Hello World!'); + // }) + + // console.log("Setting route for /ads.txt"); + // app.get('/ads.txt', (req, res) => { + // res.setHeader("Content-Type", "text/plain"); + // res.setHeader("Cache-Control", "no-cache"); + // res.send(`google.com, pub-8937572456576531, DIRECT, f08c47fec0942fa0`); + // }); + + console.log(`Serving /robots.txt from memory.`); + pageRouter.get('/robots.txt', (req, res) => { + res.setHeader("Content-Type", "text/plain"); + res.setHeader("Cache-Control", "no-cache"); + // TODO: Implement Site Map feature and provide sitemap url in robots.txt + res.send( + `User-agent: * + Allow: / + + # TODO: Implement Site Map feature and provide sitemap url in robots.txt + #sitemap: https://no-moss-3-carbo-landfill-library.online/sitemap.xml` + );//end of res.send() for robots.txt + }); + + // Endpoints for all the site's pages. + console.log(`Scanning for pages in ${config.pagesPath} to create routes.`); + globSync('**/*.md', { + cwd: config.pagesPath, + matchBase: true, + follow: true, + }).forEach((filePath) => { + const expressRoutePathFromFilePath = (filePath: string) => { + filePath = filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); + if (!filePath.startsWith('/') && filePath.length > 0) { + filePath = `/${filePath}`; + } + return filePath; + }; + const route = expressRoutePathFromFilePath(filePath); + const fullFilePath = path.join(config.pagesPath, filePath); + let paths = route.split(path.posix.sep); + console.log(`Serving ${route} route as a page at ${fullFilePath}.`); + pageRouter.get(route, async (req, res) => { + const fm = matter.read(fullFilePath); + const fmData = { fm: fm.data, excerpt: fm.excerpt }; + const content = helpers.md.render(fm.content, fmData ); + let breadcrumbs: Breadcrumb[] = []; + paths.forEach((path, index) => { + if (index == 0) { + breadcrumbs.push({ title: config.siteName, url: '/' }); + } + else { + breadcrumbs.push({ title: fmData.fm['title']||path.replaceAll('_', ' ').replaceAll('-', ' '), url: helpers.trimSlashes(helpers.leftTrimFirstDirectory(path)) }); + } + }); + const renderData = { breadcrumbs, content, filePath, fullFilePath, paths, req, route, ...fmData }; + res.render("page", { h: helpers, ...renderData }); + }); + }); + + // console.log("Scanning for documents to create routes."); + // glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { + // cwd: path.join(config.publicPath), + // matchBase: true, + // follow: true, + // }).forEach((filePath) => { + // const expressRoutePathFromFilePath = (filePath) => { + // return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); + // }; + // const route = expressRoutePathFromFilePath(filePath); + // const fullFilePath = path.join(config.publicPath, filePath); + // let paths = route.split(path.posix.sep); + // paths[0] = 'public'; + // console.log(`Setting route for ${route}`); + // app.get(route, async (req, res) => { + // const fm = matter.read(fullFilePath); + // const fmData = { fm: fm.data, excerpt: fm.excerpt }; + // const content = helpers.md.render(fm.content, fmData ); + // const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; + // res.render("page", { h: helpers, ...renderData }); + // }); + // }); + + //TODO: Rewrite this facility so that it utilizes Git index as a filesystem. + console.log("Scanning for web archive HTML documents to create routes."); + globSync('Web_Site_Archives/**/*{.htm,.html}', { + cwd: config.publicPath, + matchBase: true, + follow: true, + }).forEach((filePath) => { + const expressRoutePathFromFilePath = (filePath: string) => { + return '/' + filePath.replaceAll(path.sep, path.posix.sep); + }; + const route = expressRoutePathFromFilePath(filePath); + const fullFilePath = path.join(config.publicPath, filePath); + let paths = route.split(path.posix.sep); + paths[0] = 'public'; + console.log(`Setting route for ${route}`); + pageRouter.get(route, async (req, res) => { + const html = fs.readFileSync(fullFilePath).toString(); + const renderData = { route, filePath, fullFilePath, req, paths, html }; + res.render("archive", { h: helpers, ...renderData }); + }); + }); + + + //TODO: Rewrite this facility so that it utilizes Git index as a filesystem. + console.log("Scanning for archived videos to create routes."); + globSync(['Russell_County/Board_of_Supervisors/YouTube_Archive/**/*.info.json', 'Virginia_Energy/YouTube_Archive/**/*.info.json', 'Virginia_Governor/**/*.info.json'], { + cwd: config.publicPath, + matchBase: true, + follow: true, + }).forEach((filePath: string) => { + const expressRoutePathFromFilePath = (filePath: string) => { + return path.posix.sep+filePath.substring(0, filePath.lastIndexOf(path.sep)).replaceAll(path.sep, path.posix.sep); + }; + const dirFromFilePath = (filePath: string) => { + return filePath.substring(0, filePath.lastIndexOf(path.sep)); + } + const directory = dirFromFilePath(filePath); + let videoURL = ""+globSync("*.{mpg,mpeg,mp4,mkv,webm}", { + cwd: path.join(config.publicPath, directory), + matchBase: true, + follow: true, + }).pop(); + let subtitleURL = ""+globSync("*.en.vtt", { + cwd: path.join(config.publicPath, directory), + matchBase: true, + follow: true, + }).pop(); + let subtitleFile = path.join(config.publicPath, directory, subtitleURL); + const route = encodeURI(expressRoutePathFromFilePath(filePath)); + let paths = filePath + .substring(0, filePath.lastIndexOf(path.sep) > 0 ? filePath.lastIndexOf(path.sep) : filePath.length-1) + .split(path.sep) + .map((name, idx, aPaths) => { + let url = aPaths.slice(0, idx+1).join(path.posix.sep); + return { + name, + url, + }; + }); + const fullFilePath = path.join(config.publicPath, filePath); + console.log(`Setting route for ${route}`); + pageRouter.get(route, async (req, res) => { + if (!req.path.endsWith('/')) { + res.redirect(req.path + '/'); + } + else { + let info = JSON.parse((await readFile(fullFilePath)).toString()); + let subtitleVTT = fs.existsSync(subtitleFile) ? (await readFile(subtitleFile)).toString() : ''; + const renderData = { route, filePath, fullFilePath, req, paths, directory: path.join('public', directory), videoURL, subtitleURL, subtitleVTT, info }; + res.render("video-player", { h: helpers, ...renderData }); + } + }); + }); + + //app.get('/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding**', rewriter.rewrite('/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/$1')); + + console.log(`Serving /vendor/**/* route for all files in ${path.join(config.assetsPath, 'vendor')}`);; + pageRouter.get('/vendor/**/*', async (req, res) => { + await serve(req, res, { + public: config.assetsPath, + symlinks: true, + trailingSlash: true, + cleanUrls: false, + renderSingle: false, + unlisted: [ + ".DS_Store", + ".git", + "Thumbs.db", + "README*", + ], + }); + }); + + console.log(`Serving /css/*.css route for all files in ${path.join(config.assetsPath, 'css')}`);; + pageRouter.get('/css/*.css', async (req, res) => { + await serve(req, res, { + public: config.assetsPath, + symlinks: true, + trailingSlash: true, + cleanUrls: false, + renderSingle: false, + unlisted: [ + ".DS_Store", + ".git", + "Thumbs.db", + "README*", + ], + }); + }); + + console.log(`Serving /svg/*.svg route for all files in ${path.join(config.assetsPath, 'svg')}`);; + pageRouter.get('/svg/*.svg', async (req, res) => { + await serve(req, res, { + public: config.assetsPath, + symlinks: true, + trailingSlash: true, + cleanUrls: false, + renderSingle: false, + unlisted: [ + ".DS_Store", + ".git", + "Thumbs.db", + "README*", + ], + }); + }); + + //TODO: Rewrite this facility so that it utilizes Git index as a filesystem. + console.log(`Serving * default route for all files in ${config.publicPath}`);; + pageRouter.get('*', async (req, res) => { + await serve(req, res, { + public: config.publicPath, + symlinks: true, + trailingSlash: true, + cleanUrls: false, + renderSingle: false, + unlisted: [ + ".*", //dot files/folders + "Thumbs.db" + ], + redirects: [ + { + source: "/:year(\d{4})-:mo(\d{2})-:dd(\d{2})_:hh(\d{2})_:mm(\d{2})/", + destination: "/Web_Site_Archives/Russell_County_Web_Site-:year-:mo-:dd_:hh_:mm/" + }, + { + source: "/OCR-Encoded-PDFs", + destination: "/Web_Site_Archives" + }, + { + source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding.zip", + destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding.zip" + }, + { + source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding/:u(.*)", + destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding:u" + }, + { source: '/YouTube Channel', destination: '/Russell_County/Board_of_Supervisors/YouTube_Archive/@russellcountyvirginia8228' }, + // { source: '/YouTube Channel.zip', destination: '/Russell_County_BOS/YouTube_Channel.zip' }, + // { source: '/YouTube Channel/:u?', destination: '/Russell_County_BOS/YouTube_Channel/:u' }, + { source: '/Project Reclaim [WI19KR9Ogwg].mkv', destination: '/YouTube_Archives/@VADMME/Project Reclaim [WI19KR9Ogwg].mkv' }, + ] + }); + }); + + return pageRouter; +}; \ No newline at end of file diff --git a/app/vercel-serve.mts b/app/page/vercel-serve.mts similarity index 97% rename from app/vercel-serve.mts rename to app/page/vercel-serve.mts index 1dd23a06..1308e953 100644 --- a/app/vercel-serve.mts +++ b/app/page/vercel-serve.mts @@ -19,8 +19,9 @@ import parseRange from 'range-parser'; import { ServerResponse, OutgoingHttpHeader, OutgoingHttpHeaders } from 'http'; import { Request } from 'express'; import ejs from 'ejs'; -import { config } from './config.mjs'; -import helpers from './helpers/functions.mjs'; +import { config } from '../config.mjs'; +import helpers from '../helpers/functions.mjs'; +import { Breadcrumb } from '../helpers/breadcrumbs.mjs'; export interface Dictionary { [Key: string]: T; @@ -168,8 +169,22 @@ export interface ServeErrorTemplateParameters { } export const directoryTemplate = (vals: ServeDirectoryTemplateParameters) => { + let breadcrumbs: Breadcrumb[] = []; + if (vals.paths.length == 1 && helpers.getDirectoryName(vals.paths[0].name)) { + breadcrumbs.push({ title: config.siteWelcomeMessage, url: '/' }); + } + else { + vals.paths.forEach((path, index, paths) => { + if (index == 0) { + breadcrumbs.push({ title: config.siteName, url: '/' }); + } + else { + breadcrumbs.push({ title: helpers.getDirectoryName(path.name).replaceAll('_', ' '), url: `/${path.url}` }); + } + }); + } return new Promise((resolve, reject) => { - ejs.renderFile(path.join(config.viewsPath, 'directory.ejs'), { h: helpers, ...vals }, (err, str) => { + ejs.renderFile(path.join(config.viewsPath, 'directory.ejs'), { breadcrumbs, h: helpers, ...vals }, (err, str) => { if (err) { reject(err); } else { diff --git a/app/routes/search.mts b/app/routes/search.mts deleted file mode 100644 index 2a95bcb2..00000000 --- a/app/routes/search.mts +++ /dev/null @@ -1,171 +0,0 @@ -console.log(`Loading nm3clol-express-app search router module...`); - -import express from 'express'; -import { parse, toString } from 'lucene'; -import { createClient, Query } from 'solr-client'; -import { SearchResponse } from 'solr-client/dist/lib/solr.js'; -import { config } from '../config.mjs'; -import helpers from '../helpers/functions.mjs'; - -interface Dictionary { - [Key: string]: T; -} - -interface Highlight { - text: string[]; -} - -interface WithHighlighting { - highlighting: Dictionary; -} - -const router = express.Router(); - -router.get('/', (req: express.Request, res: express.Response) => { - // Extract paging parameters from request query parameters - let { q = '', page = 1, pageSize = 10 } = req.query; - // Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes - if (typeof q != "undefined") { - if (typeof q != "string") { - q = (q as string[]).join(' '); - } - q = q?.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\''); - } - if (page instanceof String) page = parseInt(page as string); - if (pageSize instanceof String) pageSize = parseInt(pageSize as string); - // Cap at 100 max per page - pageSize = Math.min(pageSize as number, 100); - // Calculate start offset for pagination - const start = (page as number - 1) * pageSize; - if (!q || (typeof q === 'string' && q.trim() == "")) { - res.render('search-error', { h: helpers, query: q, error: { code: 400, message: 'Search query is required.'} }); - } - else { - // Parse query - let parsedQuery = parse(q); - // Construct a Solr q field query string based on the extracted components - let qQuery = toString(parsedQuery); - // Generate a Solr query based on the query strings and additional parameters - let solrQuery = new Query().df('text').q(qQuery).start(start).rows(10).hl({ - on: true, - q: qQuery, - fl: '*', - snippets: 5, - formatter: 'simple', - simplePre: ``, - simplePost: ``, - highlightMultiTerm: true, - usePhraseHighlighter: true, - }); - // Create a Solr client - const solrClient = createClient({ host: config.solrDocsHost, port: config.solrDocsPort, core: config.solrDocsCore }); - solrClient.search(solrQuery) - .then((solrResponse: SearchResponse|WithHighlighting) => { - const solrResponseAsSearchResponse = solrResponse as SearchResponse; - const solrResponseWithHighlighting = solrResponse as WithHighlighting; - //console.log(require('util').inspect(solrResponse, { showHidden: true, depth: null, colors: true })); - // overcome broken hl simplePre/simplePost implementation - let overrideHighlighting: Dictionary = {}; - Object.keys(solrResponseWithHighlighting.highlighting).forEach((highlight_key: string) => { - overrideHighlighting[highlight_key] = solrResponseWithHighlighting.highlighting[highlight_key]; - if (overrideHighlighting[highlight_key].text && overrideHighlighting[highlight_key].text.length > 0) { - overrideHighlighting[highlight_key].text = overrideHighlighting[highlight_key].text.map( (text) => { - return text.replaceAll("", ``).replaceAll("", "") - }); - } - }); - solrResponseWithHighlighting.highlighting = overrideHighlighting; - // Calculate total number of results (needed for pagination) - const totalResults = solrResponseAsSearchResponse.response.numFound; - // Calculate total number of pages - const totalPages = Math.ceil(totalResults / pageSize); - res.render('search-results', { - h: helpers, - query: qQuery, - page, - pageSize, - totalResults, - totalPages, - solrQuery: solrQuery, - ...solrResponse - }); - // res.render('search-error', { h: helpers, query: sanitizedQuery, error: { code: 400, message: 'Search query is required.'} }); - }) - .catch(error => { - if (typeof error === 'object' && error instanceof Error) { - // check for error from throw new Error(`Request HTTP error ${response.statusCode}: ${text}`) in solr.ts from - // solr-node-client dependency - const detectRequestHttpErrorRegExLit = /^Request HTTP error (?\d{1,3}): (?\{.*\}$)/s; - const detectRequestHttpErrorRegExp = new RegExp(detectRequestHttpErrorRegExLit); - const matchRequestHttpErrorRegExpInError = error.message.match(detectRequestHttpErrorRegExp); - const statusCode = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.statusCode); - const text = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.text); - if (text) { - let solrRequestHttpInternalError = JSON.parse(text); - error = { - message: "Solr Client Request HTTP Error", - code: statusCode, - innerError: solrRequestHttpInternalError - }; - } - else { - error = { - message: error - }; - } - } - res.render('search-error', { h: helpers, query: qQuery, error }); - }); - } - - // // Sanitize search query to prevent code injection - // try { - // // Validate search query - // if (!query) { - // //return res.status(400).json({ error: 'q parameter is required' }); - // - // } - // else { - // // Send search query to Solr - // const response = await axios.get(solrUrl + '/select', { - // params: { - // q: `text:${sanitizedQuery}`, // Query string with field name - // hl: 'true', - // 'hl.method': 'unified', - // 'hl.fl': '*', - // 'hl.snippets': 5, - // 'hl.tag.pre': '', - // 'hl.tag.post': '', - // 'hl.usePhraseHighlighter': true, - // start, // Start offset for pagination - // rows: 10, // Number of rows to return - // wt: 'json', // Response format (JSON) - // }, - // }); - // - // // Extract search results from Solr response - // const searchResults = response.data.response.docs; - // const highlightedSnippets = response.data.highlighting; - - // // Calculate total number of results (needed for pagination) - // const totalResults = response.data.response.numFound; - - // // Calculate total number of pages - // const totalPages = Math.ceil(totalResults / pageSize); - - // // Send search results as JSON response - // //res.json('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); - // res.render('search-results', { h: helpers, query: sanitizedQuery, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); - // } - // } catch (error) { - // // console.error('Error searching Solr:', error.message); - // // res.status(500).json({ error: 'Internal server error' }); - // res.render('search-error', { h: helpers, query: sanitizedQuery, error }); - // } -}); - -export default { - router, - // solrUrl, - // sanitizeQuery, -}; \ No newline at end of file diff --git a/app/search/router.mts b/app/search/router.mts new file mode 100644 index 00000000..fa2ba51a --- /dev/null +++ b/app/search/router.mts @@ -0,0 +1,189 @@ +console.log(`Loading nm3clol-express-app search router module...`); + +import express from 'express'; +import { parse, toString } from 'lucene'; +import { createClient, Query } from 'solr-client'; +import { SearchResponse } from 'solr-client/dist/lib/solr.js'; +import { config } from '../config.mjs'; +import helpers from '../helpers/functions.mjs'; +import { Breadcrumb } from '../helpers/breadcrumbs.mjs'; + +interface Dictionary { + [Key: string]: T; +} + +interface Highlight { + text: string[]; +} + +interface WithHighlighting { + highlighting: Dictionary; +} + +export default function () { + const searchRouter = express.Router(); + + searchRouter.get('/', (req: express.Request, res: express.Response) => { + // Extract paging parameters from request query parameters + let { q = '', page = 1, pageSize = 10 } = req.query; + // Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes + if (typeof q != "undefined") { + if (typeof q != "string") { + q = (q as string[]).join(' '); + } + q = q?.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\''); + } + if (page instanceof String) page = parseInt(page as string); + if (pageSize instanceof String) pageSize = parseInt(pageSize as string); + // Cap at 100 max per page + pageSize = Math.min(pageSize as number, 100); + // Calculate start offset for pagination + const start = (page as number - 1) * pageSize; + if (!q || (typeof q === 'string' && q.trim() == "")) { + // Build breadcrumbs + const breadcrumbs: Breadcrumb[] = [ + { title: `${config.siteName}`, url: '/' }, + { title: `Search Error`, url: req.url } + ]; + // Render ejs page to output + res.render('search-error', { breadcrumbs, h: helpers, query: q, error: { code: 400, message: 'Search query is required.'} }); + } + else { + // Parse query + let parsedQuery = parse(q); + // Construct a Solr q field query string based on the extracted components + let qQuery = toString(parsedQuery); + // Generate a Solr query based on the query strings and additional parameters + let solrQuery = new Query().df('text').q(qQuery).start(start).rows(10).hl({ + on: true, + q: qQuery, + fl: '*', + snippets: 5, + formatter: 'simple', + simplePre: ``, + simplePost: ``, + highlightMultiTerm: true, + usePhraseHighlighter: true, + }); + // Create a Solr client + const solrClient = createClient({ host: config.solrDocsHost, port: config.solrDocsPort, core: config.solrDocsCore }); + solrClient.search(solrQuery) + .then((solrResponse: SearchResponse|WithHighlighting) => { + const solrResponseAsSearchResponse = solrResponse as SearchResponse; + const solrResponseWithHighlighting = solrResponse as WithHighlighting; + //console.log(require('util').inspect(solrResponse, { showHidden: true, depth: null, colors: true })); + // overcome broken hl simplePre/simplePost implementation + let overrideHighlighting: Dictionary = {}; + Object.keys(solrResponseWithHighlighting.highlighting).forEach((highlight_key: string) => { + overrideHighlighting[highlight_key] = solrResponseWithHighlighting.highlighting[highlight_key]; + if (overrideHighlighting[highlight_key].text && overrideHighlighting[highlight_key].text.length > 0) { + overrideHighlighting[highlight_key].text = overrideHighlighting[highlight_key].text.map( (text) => { + return text.replaceAll("", ``).replaceAll("", "") + }); + } + }); + solrResponseWithHighlighting.highlighting = overrideHighlighting; + // Calculate total number of results (needed for pagination) + const totalResults = solrResponseAsSearchResponse.response.numFound; + // Calculate total number of pages + const totalPages = Math.ceil(totalResults / pageSize); + // Build breadcrumbs + let breadcrumbs: Breadcrumb[] = [ + { title: `${config.siteName}`, url: '/' }, + { title: `Search Results for ${qQuery}`, url: req.url } + ]; + // Render ejs page to output + res.render('search-results', { + breadcrumbs, + h: helpers, + query: qQuery, + page, + pageSize, + solrQuery: solrQuery, + totalResults, + totalPages, + ...solrResponse + }); + // res.render('search-error', { h: helpers, query: sanitizedQuery, error: { code: 400, message: 'Search query is required.'} }); + }) + .catch(error => { + if (typeof error === 'object' && error instanceof Error) { + // check for error from throw new Error(`Request HTTP error ${response.statusCode}: ${text}`) in solr.ts from + // solr-node-client dependency + const detectRequestHttpErrorRegExLit = /^Request HTTP error (?\d{1,3}): (?\{.*\}$)/s; + const detectRequestHttpErrorRegExp = new RegExp(detectRequestHttpErrorRegExLit); + const matchRequestHttpErrorRegExpInError = error.message.match(detectRequestHttpErrorRegExp); + const statusCode = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.statusCode); + const text = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.text); + if (text) { + let solrRequestHttpInternalError = JSON.parse(text); + error = { + message: "Solr Client Request HTTP Error", + code: statusCode, + innerError: solrRequestHttpInternalError + }; + } + else { + error = { + message: error + }; + } + } + // Build breadcrumbs + const breadcrumbs: Breadcrumb[] = [ + { title: `${config.siteName}`, url: '/' }, + { title: `Search Error` + (qQuery ? ` for ${qQuery}` : ``), url: req.url } + ]; + // Render ejs page to output + res.render('search-error', { breadcrumbs, error, h: helpers, query: qQuery}); + }); + } + + // // Sanitize search query to prevent code injection + // try { + // // Validate search query + // if (!query) { + // //return res.status(400).json({ error: 'q parameter is required' }); + // + // } + // else { + // // Send search query to Solr + // const response = await axios.get(solrUrl + '/select', { + // params: { + // q: `text:${sanitizedQuery}`, // Query string with field name + // hl: 'true', + // 'hl.method': 'unified', + // 'hl.fl': '*', + // 'hl.snippets': 5, + // 'hl.tag.pre': '', + // 'hl.tag.post': '', + // 'hl.usePhraseHighlighter': true, + // start, // Start offset for pagination + // rows: 10, // Number of rows to return + // wt: 'json', // Response format (JSON) + // }, + // }); + // + // // Extract search results from Solr response + // const searchResults = response.data.response.docs; + // const highlightedSnippets = response.data.highlighting; + + // // Calculate total number of results (needed for pagination) + // const totalResults = response.data.response.numFound; + + // // Calculate total number of pages + // const totalPages = Math.ceil(totalResults / pageSize); + + // // Send search results as JSON response + // //res.json('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); + // res.render('search-results', { h: helpers, query: sanitizedQuery, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); + // } + // } catch (error) { + // // console.error('Error searching Solr:', error.message); + // // res.status(500).json({ error: 'Internal server error' }); + // res.render('search-error', { h: helpers, query: sanitizedQuery, error }); + // } + }); + + return searchRouter; +}; \ No newline at end of file diff --git a/app/search/solr-doc.mts b/app/search/solr-doc.mts new file mode 100644 index 00000000..7c10e40a --- /dev/null +++ b/app/search/solr-doc.mts @@ -0,0 +1,25 @@ +/** + * Needed until the conversion is completed. + */ +export interface IncorrectStyleSolrDocument { + id: string; + sha256sum: string[]; + url: string[]; + content_length: number[]; + content_type: string[]; + text: string[]; + _version_?: number; +} + +/** + * Describes Solr full-text search properties for a document file in the public repository. + */ +export interface SolrDocument { + id: string; + sha256sum: string; + url: string; + content_length: number; + content_type: string; + text: string; + _version_?: number; +} diff --git a/app/server.mts b/app/server.mts index a373038c..902105a6 100644 --- a/app/server.mts +++ b/app/server.mts @@ -2,22 +2,12 @@ console.log(`Starting up nm3clol-express-app...`); import express from 'express'; import axios from 'axios'; +import pageRouter from './page/router.mjs'; +import searchRouter from './search/router.mjs'; +import { config } from './config.mjs'; const app = express(); -import serve from './vercel-serve.mjs'; -import path from 'path'; -import { globSync } from 'glob'; -import matter from 'gray-matter'; -import ejs from 'ejs'; -import { config } from './config.mjs'; -import helpers from './helpers/functions.mjs'; -import search from './routes/search.mjs'; -import fs from 'fs'; -import { readFile } from 'fs/promises'; -import { inspect } from 'util'; -// import advancedSearch from '../routes/advanced-search'; - console.log(`Running app configuration:`, config); // Set EJS as the view engine @@ -32,256 +22,14 @@ app.use(express.json()); // Middleware to rewrite requests //app.use(rewriter); -// // Serve static files (CSS, JavaScript, images, etc.) -// app.use(serve('../public', { -// dotfiles: 'ignore', -// index: false, -// })); - -// app.get('/', (req, res) => { -// res.send('Hello World!'); -// }) - -// console.log("Setting route for /ads.txt"); -// app.get('/ads.txt', (req, res) => { -// res.setHeader("Content-Type", "text/plain"); -// res.setHeader("Cache-Control", "no-cache"); -// res.send(`google.com, pub-8937572456576531, DIRECT, f08c47fec0942fa0`); -// }); - -console.log(`Serving /robots.txt from memory.`); -app.get('/robots.txt', (req, res) => { - res.setHeader("Content-Type", "text/plain"); - res.setHeader("Cache-Control", "no-cache"); - // TODO: Implement Site Map feature and provide sitemap url in robots.txt - res.send( -`User-agent: * -Allow: / - -# TODO: Implement Site Map feature and provide sitemap url in robots.txt -#sitemap: https://no-moss-3-carbo-landfill-library.online/sitemap.xml` - );//end of res.send() for robots.txt -}); - // Search endpoints -console.log(`Serving /search using search router.`); -app.use('/search', search.router); +console.log(`Assigning /search route to search router.`); +app.use('/search', searchRouter()); // app.use('/advanced-search', advancedSearch.router); -// Endpoints for all the site's pages. -console.log(`Scanning for pages in ${config.pagesPath} to create routes.`); -globSync('**/*.md', { - cwd: config.pagesPath, - matchBase: true, - follow: true, -}).forEach((filePath) => { - const expressRoutePathFromFilePath = (filePath: string) => { - filePath = filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); - if (!filePath.startsWith('/') && filePath.length > 0) { - filePath = `/${filePath}`; - } - return filePath; - }; - const route = expressRoutePathFromFilePath(filePath); - const fullFilePath = path.join(config.pagesPath, filePath); - let paths = route.split(path.posix.sep); - console.log(`Serving ${route} route as a page at ${fullFilePath}.`); - app.get(route, async (req, res) => { - const fm = matter.read(fullFilePath); - const fmData = { fm: fm.data, excerpt: fm.excerpt }; - const content = helpers.md.render(fm.content, fmData ); - const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; - res.render("page", { h: helpers, ...renderData }); - }); -}); - -// console.log("Scanning for documents to create routes."); -// glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { -// cwd: path.join(config.publicPath), -// matchBase: true, -// follow: true, -// }).forEach((filePath) => { -// const expressRoutePathFromFilePath = (filePath) => { -// return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); -// }; -// const route = expressRoutePathFromFilePath(filePath); -// const fullFilePath = path.join(config.publicPath, filePath); -// let paths = route.split(path.posix.sep); -// paths[0] = 'public'; -// console.log(`Setting route for ${route}`); -// app.get(route, async (req, res) => { -// const fm = matter.read(fullFilePath); -// const fmData = { fm: fm.data, excerpt: fm.excerpt }; -// const content = helpers.md.render(fm.content, fmData ); -// const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; -// res.render("page", { h: helpers, ...renderData }); -// }); -// }); - -//TODO: Rewrite this facility so that it utilizes Git index as a filesystem. -console.log("Scanning for web archive HTML documents to create routes."); -globSync('Web_Site_Archives/**/*{.htm,.html}', { - cwd: config.publicPath, - matchBase: true, - follow: true, -}).forEach((filePath) => { - const expressRoutePathFromFilePath = (filePath: string) => { - return '/' + filePath.replaceAll(path.sep, path.posix.sep); - }; - const route = expressRoutePathFromFilePath(filePath); - const fullFilePath = path.join(config.publicPath, filePath); - let paths = route.split(path.posix.sep); - paths[0] = 'public'; - console.log(`Setting route for ${route}`); - app.get(route, async (req, res) => { - const html = fs.readFileSync(fullFilePath).toString(); - const renderData = { route, filePath, fullFilePath, req, paths, html }; - res.render("archive", { h: helpers, ...renderData }); - }); -}); - - -//TODO: Rewrite this facility so that it utilizes Git index as a filesystem. -console.log("Scanning for archived videos to create routes."); -globSync(['Russell_County/Board_of_Supervisors/YouTube_Archive/**/*.info.json', 'Virginia_Energy/YouTube_Archive/**/*.info.json', 'Virginia_Governor/**/*.info.json'], { - cwd: config.publicPath, - matchBase: true, - follow: true, -}).forEach((filePath: string) => { - const expressRoutePathFromFilePath = (filePath: string) => { - return path.posix.sep+filePath.substring(0, filePath.lastIndexOf(path.sep)).replaceAll(path.sep, path.posix.sep); - }; - const dirFromFilePath = (filePath: string) => { - return filePath.substring(0, filePath.lastIndexOf(path.sep)); - } - const directory = dirFromFilePath(filePath); - let videoURL = ""+globSync("*.{mpg,mpeg,mp4,mkv,webm}", { - cwd: path.join(config.publicPath, directory), - matchBase: true, - follow: true, - }).pop(); - let subtitleURL = ""+globSync("*.en.vtt", { - cwd: path.join(config.publicPath, directory), - matchBase: true, - follow: true, - }).pop(); - let subtitleFile = path.join(config.publicPath, directory, subtitleURL); - const route = encodeURI(expressRoutePathFromFilePath(filePath)); - let paths = filePath - .substring(0, filePath.lastIndexOf(path.sep) > 0 ? filePath.lastIndexOf(path.sep) : filePath.length-1) - .split(path.sep) - .map((name, idx, aPaths) => { - let url = aPaths.slice(0, idx+1).join(path.posix.sep); - return { - name, - url, - }; - }); - const fullFilePath = path.join(config.publicPath, filePath); - console.log(`Setting route for ${route}`); - app.get(route, async (req, res) => { - if (!req.path.endsWith('/')) { - res.redirect(req.path + '/'); - } - else { - let info = JSON.parse((await readFile(fullFilePath)).toString()); - let subtitleVTT = fs.existsSync(subtitleFile) ? (await readFile(subtitleFile)).toString() : ''; - const renderData = { inspect, route, filePath, fullFilePath, req, paths, directory: path.join('public', directory), videoURL, subtitleURL, subtitleVTT, info }; - res.render("video-player", { h: helpers, ...renderData }); - } - }); -}); - -//app.get('/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding**', rewriter.rewrite('/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/$1')); - -console.log(`Serving /vendor/**/* route for all files in ${path.join(config.assetsPath, 'vendor')}`);; -app.get('/vendor/**/*', async (req, res) => { - await serve(req, res, { - public: config.assetsPath, - symlinks: true, - trailingSlash: true, - cleanUrls: false, - renderSingle: false, - unlisted: [ - ".DS_Store", - ".git", - "Thumbs.db", - "README*", - ], - }); -}); - -console.log(`Serving /css/*.css route for all files in ${path.join(config.assetsPath, 'css')}`);; -app.get('/css/*.css', async (req, res) => { - await serve(req, res, { - public: config.assetsPath, - symlinks: true, - trailingSlash: true, - cleanUrls: false, - renderSingle: false, - unlisted: [ - ".DS_Store", - ".git", - "Thumbs.db", - "README*", - ], - }); -}); - -console.log(`Serving /svg/*.svg route for all files in ${path.join(config.assetsPath, 'svg')}`);; -app.get('/svg/*.svg', async (req, res) => { - await serve(req, res, { - public: config.assetsPath, - symlinks: true, - trailingSlash: true, - cleanUrls: false, - renderSingle: false, - unlisted: [ - ".DS_Store", - ".git", - "Thumbs.db", - "README*", - ], - }); -}); - -//TODO: Rewrite this facility so that it utilizes Git index as a filesystem. -console.log(`Serving * default route for all files in ${config.publicPath}`);; -app.get('*', async (req, res) => { - await serve(req, res, { - public: config.publicPath, - symlinks: true, - trailingSlash: true, - cleanUrls: false, - renderSingle: false, - unlisted: [ - ".*", //dot files/folders - "Thumbs.db" - ], - redirects: [ - { - source: "/:year(\d{4})-:mo(\d{2})-:dd(\d{2})_:hh(\d{2})_:mm(\d{2})/", - destination: "/Web_Site_Archives/Russell_County_Web_Site-:year-:mo-:dd_:hh_:mm/" - }, - { - source: "/OCR-Encoded-PDFs", - destination: "/Web_Site_Archives" - }, - { - source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding.zip", - destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding.zip" - }, - { - source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding/:u(.*)", - destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding:u" - }, - { source: '/YouTube Channel', destination: '/Russell_County/Board_of_Supervisors/YouTube_Archive/@russellcountyvirginia8228' }, - // { source: '/YouTube Channel.zip', destination: '/Russell_County_BOS/YouTube_Channel.zip' }, - // { source: '/YouTube Channel/:u?', destination: '/Russell_County_BOS/YouTube_Channel/:u' }, - { source: '/Project Reclaim [WI19KR9Ogwg].mkv', destination: '/YouTube_Archives/@VADMME/Project Reclaim [WI19KR9Ogwg].mkv' }, - ] - }); -}); +// Search endpoints +console.log(`Assigning / route to page router.`); +app.use('/', pageRouter()); // Start server app.listen(config.appHttpPort, () => { diff --git a/app/TikaClient/client.mts b/app/tika/client.mts similarity index 100% rename from app/TikaClient/client.mts rename to app/tika/client.mts diff --git a/app/TikaClient/index.mts b/app/tika/index.mts similarity index 100% rename from app/TikaClient/index.mts rename to app/tika/index.mts diff --git a/app/TikaClient/types.mts b/app/tika/types.mts similarity index 100% rename from app/TikaClient/types.mts rename to app/tika/types.mts diff --git a/app/views/directory.ejs b/app/views/directory.ejs index be9660b9..03644cf9 100644 --- a/app/views/directory.ejs +++ b/app/views/directory.ejs @@ -13,25 +13,7 @@
-

- <% paths.forEach(function(value, index) { %> - <% if (h.shouldShowDirectorySeparator(index)) { %> - - <% } %> - <% if (h.shouldShowSiteWelcomeMessage(paths)) { %> -   - <%= h.getSiteWelcomeMessage() %> - <% } else if (h.shouldOmitLinkOnLastBreadcrumb(paths, index)) { %> - <%= h.trimSlashes(value.name).replaceAll('_', ' ') %> - <% } else if (index == 0) { %> - <%= h.getSiteName() %> - <% } else { %> - - <%= h.getDirectoryName(value.name).replaceAll('_', ' ') %> - - <% } %> - <% }); %> -

+ <%- include('./includes/breadcrumbs.ejs') %>
<% if (h.directoryContainsReadme(directory)) {%> @@ -54,12 +36,6 @@
<%- include('./includes/bottom-navbar.ejs') %> - - - - - - - + <%- include('./includes/bottom-scripts.ejs') %> \ No newline at end of file diff --git a/app/views/includes/bottom-scripts.ejs b/app/views/includes/bottom-scripts.ejs new file mode 100644 index 00000000..c0bfd4da --- /dev/null +++ b/app/views/includes/bottom-scripts.ejs @@ -0,0 +1,6 @@ + + + + + + diff --git a/app/views/includes/breadcrumbs.ejs b/app/views/includes/breadcrumbs.ejs new file mode 100644 index 00000000..06e8165d --- /dev/null +++ b/app/views/includes/breadcrumbs.ejs @@ -0,0 +1,12 @@ +

+ <% breadcrumbs.forEach(function(breadcrumb, index) { %> + <% if (index > 0 && index < breadcrumbs.length) { %> + + <% } %> + <% if (index === breadcrumbs.length-1) { %> + <%=breadcrumb.title%> + <% } else { %> + <%=breadcrumb.title%> + <% } %> + <% }) %> +

diff --git a/app/views/page.ejs b/app/views/page.ejs index c9b4b96e..be4d05d1 100644 --- a/app/views/page.ejs +++ b/app/views/page.ejs @@ -13,22 +13,7 @@
-

- <% paths.forEach(function(value, index) { %> - <% if (h.shouldShowDirectorySeparator({index})) { %> - - <% } %> - <% if (h.shouldOmitLinkOnLastBreadcrumb({paths, index})) { %> - <%= (typeof fm.title !== 'undefined') ? `${fm.title}` : value %> - <% } else if (index == 0) { %> - <%= h.getSiteName() %> - <% } else { %> - - <%= (value == 'public' ? h.getSiteName() : value) %> - - <% } %> - <% }) %> -

+ <%- include('./includes/breadcrumbs.ejs') %>
<% if (typeof content !== 'undefined') {%> @@ -57,12 +42,6 @@
<%- include('./includes/bottom-navbar.ejs') %> - - - - - - - + <%- include('./includes/bottom-scripts.ejs') %> \ No newline at end of file diff --git a/app/views/search-error.ejs b/app/views/search-error.ejs index 545a0cec..54adcd00 100644 --- a/app/views/search-error.ejs +++ b/app/views/search-error.ejs @@ -9,11 +9,7 @@ <%- include('./includes/no-trash-svg.ejs') %>
-

- <%=h.getSiteName()%> - - Search Error<% if ((typeof query != undefined) && query != '') { %> for <%- query %><% } %> -

+ <%- include('./includes/breadcrumbs.ejs') %>

@@ -34,12 +30,6 @@

<%- include('./includes/bottom-navbar.ejs') %> - - - - - - - + <%- include('./includes/bottom-scripts.ejs') %> diff --git a/app/views/search-results.ejs b/app/views/search-results.ejs index 88e75d9f..0ac46ae9 100644 --- a/app/views/search-results.ejs +++ b/app/views/search-results.ejs @@ -9,11 +9,7 @@ <%- include('./includes/no-trash-svg.ejs') %>
-

- <%=h.getSiteName()%> - - Search Results for <%- query %> -

+ <%- include('./includes/breadcrumbs.ejs') %>

@@ -72,12 +68,6 @@

<%- include('./includes/bottom-navbar.ejs') %> - - - - - - - + <%- include('./includes/bottom-scripts.ejs') %> diff --git a/app/views/video-player.ejs b/app/views/video-player.ejs index b48bed39..8a0bdda7 100644 --- a/app/views/video-player.ejs +++ b/app/views/video-player.ejs @@ -62,7 +62,7 @@ Video (.<%=info.ext%>) | Subtitles (.vtt)

- + <%}%> @@ -87,12 +87,6 @@
<%- include('./includes/bottom-navbar.ejs') %> - - - - - - - + <%- include('./includes/bottom-scripts.ejs') %> \ No newline at end of file diff --git a/gulpfile.mts b/gulpfile.mts index bb193e98..0691534d 100644 --- a/gulpfile.mts +++ b/gulpfile.mts @@ -6,13 +6,13 @@ import fs, { WriteStream } from 'fs'; import path from 'path'; import crypto from 'crypto'; import url from 'url'; -import { TikaClient } from './app/TikaClient/index.mjs'; +import { TikaClient } from './app/tika/index.mjs'; import { Readable, Writable } from 'stream'; import dotenv from 'dotenv'; import dotenvExpand from 'dotenv-expand'; import process from 'process'; import { config } from './app/config.mjs'; -import { SolrDocument } from './app/models/solr-doc.mjs'; +import { SolrDocument } from './app/search/solr-doc.mjs'; // import ts from 'gulp-typescript'; import run from 'gulp-run'; import { deleteAsync } from 'del';