From efdd1161eb6a8953ab74582a5ab20ea10f31f794 Mon Sep 17 00:00:00 2001 From: David Ball Date: Mon, 29 Apr 2024 01:22:14 -0400 Subject: [PATCH] Updated stylesheet to add totals to tables, updated gulpfile, updated .gitignore, updated web.config to prevent IIS from accessing documents outside public, updated video and search views to remove WEBVTT noise on the fly. --- .gitignore | 9 +++++++- app/server.js | 44 +++++++++++++++++++------------------- gulpfile.js | 26 ++++++++++++++++++++++ routes/search.js | 2 ++ static/css/nm3clol.css | 8 +++++++ views/helpers/functions.js | 18 ++++++++++++++++ views/search-results.ejs | 2 +- views/video-player.ejs | 2 +- web.config | 9 ++++++-- 9 files changed, 93 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 0c4fa7f4..256fe834 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,11 @@ dist iisnode/ # transpiler artifacts -app/TikaClient/build/ \ No newline at end of file +app/TikaClient/build/ + +# cookies +youtube-cookies.txt + +# SSL certificates +*.cer +*csr.txt \ No newline at end of file diff --git a/app/server.js b/app/server.js index d7321a2b..9c5eadb6 100644 --- a/app/server.js +++ b/app/server.js @@ -86,28 +86,28 @@ glob.globSync('pages/**/*.md', { }); }); -console.log("Scanning for documents to create routes"); -glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { - cwd: path.join(__dirname, '..', 'public'), - matchBase: true, - follow: true, -}).forEach((filePath) => { - const expressRoutePathFromFilePath = (filePath) => { - return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); - }; - const route = expressRoutePathFromFilePath(filePath); - const fullFilePath = path.join(__dirname, '..', 'public', filePath); - let paths = route.split(path.posix.sep); - paths[0] = 'public'; - console.log(`Setting route for ${route}`); - app.get(route, async (req, res) => { - const fm = matter.read(fullFilePath); - const fmData = { fm: fm.data, excerpt: fm.excerpt }; - const content = helpers.md.render(fm.content, fmData ); - const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; - res.render("page", { h: helpers, ...renderData }); - }); -}); +// console.log("Scanning for documents to create routes"); +// glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { +// cwd: path.join(__dirname, '..', 'public'), +// matchBase: true, +// follow: true, +// }).forEach((filePath) => { +// const expressRoutePathFromFilePath = (filePath) => { +// return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); +// }; +// const route = expressRoutePathFromFilePath(filePath); +// const fullFilePath = path.join(__dirname, '..', 'public', filePath); +// let paths = route.split(path.posix.sep); +// paths[0] = 'public'; +// console.log(`Setting route for ${route}`); +// app.get(route, async (req, res) => { +// const fm = matter.read(fullFilePath); +// const fmData = { fm: fm.data, excerpt: fm.excerpt }; +// const content = helpers.md.render(fm.content, fmData ); +// const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; +// res.render("page", { h: helpers, ...renderData }); +// }); +// }); // Endpoints for all the site's YouTube videos. console.log("Scanning for archived videos to create routes"); diff --git a/gulpfile.js b/gulpfile.js index e74fe44f..53c6a53f 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -25,6 +25,32 @@ gulp.task('index:clear', async () => { }); }); +gulp.task('dbfromsolr', async () => { + let docs = await request({ + uri: `${solrUrl}/select`, + qs: { + q: '*:*', + wt: 'json', + rows: 10000, + }, + json: true + }); + docs = docs?.response?.docs?.map(({id, sha256sum, url, content_length, content_type, text, _version_}) => + { + return { + id, + url: url.join(''), + content_length: parseInt(content_length.join()), + sha256sum: sha256sum.join(''), + content_type: content_type.join(''), + text: text.join(''), + _version_, + } + }).map(doc => { + + }) +}); + async function calculateSHA256Hash(filePath) { return new Promise((resolve, reject) => { const readStream = fs.createReadStream(filePath); diff --git a/routes/search.js b/routes/search.js index 92b3d296..62a73cfa 100644 --- a/routes/search.js +++ b/routes/search.js @@ -8,6 +8,8 @@ const helpers = require('../views/helpers/functions'); router.get('/', (req, res) => { // Extract paging parameters from request query parameters let { q, page = 1, pageSize = 10 } = req.query; + // Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes + q = q.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\''); if (page instanceof String) page = parseInt(page); if (pageSize instanceof String) pageSize = parseInt(pageSize); // Cap at 100 max per page diff --git a/static/css/nm3clol.css b/static/css/nm3clol.css index ca3b6a35..40a4e92b 100644 --- a/static/css/nm3clol.css +++ b/static/css/nm3clol.css @@ -227,4 +227,12 @@ table { tbody, td, tfoot, th, thead, tr { font-family: 'Sometype Mono'; padding: 5pt; +} + +thead { + border-bottom: 2pt solid #222; +} + +.total { + border-top: 5pt double #222; } \ No newline at end of file diff --git a/views/helpers/functions.js b/views/helpers/functions.js index 98d9ca3f..89b5da4f 100644 --- a/views/helpers/functions.js +++ b/views/helpers/functions.js @@ -63,6 +63,23 @@ const printMarkdownFile = ({file}) => { const printReadme = ({directory}) => { return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString()); }; +const stripWebVTT = (webvttText) => { + const searchHeader = "WEBVTT\nKind: captions\nLanguage: en\n\n"; + if (webvttText.startsWith(searchHeader)) { + webvttText = webvttText.substring(searchHeader.length-1); // remove WEBVTT header + webvttText = webvttText.replaceAll(' align:start position:0%', ''); // remove this align and position junk + webvttText = webvttText + .split('\n') + .map((line) => { return line.replaceAll(/.*<\d{2}:\d{2}:\d{2}.\d{3}>.*/g, '').trim() }) // remove all the animated subtitles and trim the whitespace on each line + .join('\n'); + while (webvttText.indexOf('\n\n\n') > -1) { + webvttText = webvttText.replace('\n\n\n', '\n\n'); // remove every instance of triple vertical white space, while allowing double vertical white space + } + webvttText = webvttText.replaceAll(/(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})\n(.*)\n\n(\2) --> (\d{2}:\d{2}:\d{2}.\d{3})\n\3\n/g, '$1 --> $5\n$3\n'); // remove every duplicate entry detected + webvttText = webvttText.replaceAll('\n', '
'); // convert \n to
+ } + return webvttText; +}; module.exports = { trimSlashes, @@ -75,6 +92,7 @@ module.exports = { shouldOmitLinkOnLastBreadcrumb, directoryContainsReadme, printReadme, + stripWebVTT, md, moment, }; \ No newline at end of file diff --git a/views/search-results.ejs b/views/search-results.ejs index fb25b4e3..233402d4 100644 --- a/views/search-results.ejs +++ b/views/search-results.ejs @@ -33,7 +33,7 @@
<%= doc.title %>
<% if (highlighting[doc.id] && highlighting[doc.id].text) { %> <% highlighting[doc.id].text.forEach(snippet => { %> -

<%- snippet %>

+
<%- h.stripWebVTT(snippet) %>
<% }); %> <% } else { %> diff --git a/views/video-player.ejs b/views/video-player.ejs index 19e64bf3..0836e41a 100644 --- a/views/video-player.ejs +++ b/views/video-player.ejs @@ -73,7 +73,7 @@ Search feature. Please review the Search Policy for details about the site features.

-
<%- subtitleVTT %>
+
<%- h.stripWebVTT(subtitleVTT) %>
<% } %> diff --git a/web.config b/web.config index 6fdec002..73bdd53c 100644 --- a/web.config +++ b/web.config @@ -8,9 +8,9 @@ - + @@ -22,9 +22,14 @@ + + + + + \ No newline at end of file