diff --git a/.gitignore b/.gitignore index 0c4fa7f4..256fe834 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,11 @@ dist iisnode/ # transpiler artifacts -app/TikaClient/build/ \ No newline at end of file +app/TikaClient/build/ + +# cookies +youtube-cookies.txt + +# SSL certificates +*.cer +*csr.txt \ No newline at end of file diff --git a/app/server.js b/app/server.js index d7321a2b..9c5eadb6 100644 --- a/app/server.js +++ b/app/server.js @@ -86,28 +86,28 @@ glob.globSync('pages/**/*.md', { }); }); -console.log("Scanning for documents to create routes"); -glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { - cwd: path.join(__dirname, '..', 'public'), - matchBase: true, - follow: true, -}).forEach((filePath) => { - const expressRoutePathFromFilePath = (filePath) => { - return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); - }; - const route = expressRoutePathFromFilePath(filePath); - const fullFilePath = path.join(__dirname, '..', 'public', filePath); - let paths = route.split(path.posix.sep); - paths[0] = 'public'; - console.log(`Setting route for ${route}`); - app.get(route, async (req, res) => { - const fm = matter.read(fullFilePath); - const fmData = { fm: fm.data, excerpt: fm.excerpt }; - const content = helpers.md.render(fm.content, fmData ); - const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; - res.render("page", { h: helpers, ...renderData }); - }); -}); +// console.log("Scanning for documents to create routes"); +// glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { +// cwd: path.join(__dirname, '..', 'public'), +// matchBase: true, +// follow: true, +// }).forEach((filePath) => { +// const expressRoutePathFromFilePath = (filePath) => { +// return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); +// }; +// const route = expressRoutePathFromFilePath(filePath); +// const fullFilePath = path.join(__dirname, '..', 'public', filePath); +// let paths = route.split(path.posix.sep); +// paths[0] = 'public'; +// console.log(`Setting route for ${route}`); +// app.get(route, async (req, res) => { +// const fm = matter.read(fullFilePath); +// const fmData = { fm: fm.data, excerpt: fm.excerpt }; +// const content = helpers.md.render(fm.content, fmData ); +// const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; +// res.render("page", { h: helpers, ...renderData }); +// }); +// }); // Endpoints for all the site's YouTube videos. console.log("Scanning for archived videos to create routes"); diff --git a/gulpfile.js b/gulpfile.js index e74fe44f..53c6a53f 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -25,6 +25,32 @@ gulp.task('index:clear', async () => { }); }); +gulp.task('dbfromsolr', async () => { + let docs = await request({ + uri: `${solrUrl}/select`, + qs: { + q: '*:*', + wt: 'json', + rows: 10000, + }, + json: true + }); + docs = docs?.response?.docs?.map(({id, sha256sum, url, content_length, content_type, text, _version_}) => + { + return { + id, + url: url.join(''), + content_length: parseInt(content_length.join()), + sha256sum: sha256sum.join(''), + content_type: content_type.join(''), + text: text.join(''), + _version_, + } + }).map(doc => { + + }) +}); + async function calculateSHA256Hash(filePath) { return new Promise((resolve, reject) => { const readStream = fs.createReadStream(filePath); diff --git a/routes/search.js b/routes/search.js index 92b3d296..62a73cfa 100644 --- a/routes/search.js +++ b/routes/search.js @@ -8,6 +8,8 @@ const helpers = require('../views/helpers/functions'); router.get('/', (req, res) => { // Extract paging parameters from request query parameters let { q, page = 1, pageSize = 10 } = req.query; + // Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes + q = q.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\''); if (page instanceof String) page = parseInt(page); if (pageSize instanceof String) pageSize = parseInt(pageSize); // Cap at 100 max per page diff --git a/static/css/nm3clol.css b/static/css/nm3clol.css index ca3b6a35..40a4e92b 100644 --- a/static/css/nm3clol.css +++ b/static/css/nm3clol.css @@ -227,4 +227,12 @@ table { tbody, td, tfoot, th, thead, tr { font-family: 'Sometype Mono'; padding: 5pt; +} + +thead { + border-bottom: 2pt solid #222; +} + +.total { + border-top: 5pt double #222; } \ No newline at end of file diff --git a/views/helpers/functions.js b/views/helpers/functions.js index 98d9ca3f..89b5da4f 100644 --- a/views/helpers/functions.js +++ b/views/helpers/functions.js @@ -63,6 +63,23 @@ const printMarkdownFile = ({file}) => { const printReadme = ({directory}) => { return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString()); }; +const stripWebVTT = (webvttText) => { + const searchHeader = "WEBVTT\nKind: captions\nLanguage: en\n\n"; + if (webvttText.startsWith(searchHeader)) { + webvttText = webvttText.substring(searchHeader.length-1); // remove WEBVTT header + webvttText = webvttText.replaceAll(' align:start position:0%', ''); // remove this align and position junk + webvttText = webvttText + .split('\n') + .map((line) => { return line.replaceAll(/.*<\d{2}:\d{2}:\d{2}.\d{3}>.*/g, '').trim() }) // remove all the animated subtitles and trim the whitespace on each line + .join('\n'); + while (webvttText.indexOf('\n\n\n') > -1) { + webvttText = webvttText.replace('\n\n\n', '\n\n'); // remove every instance of triple vertical white space, while allowing double vertical white space + } + webvttText = webvttText.replaceAll(/(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})\n(.*)\n\n(\2) --> (\d{2}:\d{2}:\d{2}.\d{3})\n\3\n/g, '$1 --> $5\n$3\n'); // remove every duplicate entry detected + webvttText = webvttText.replaceAll('\n', '
'); // convert \n to
+ } + return webvttText; +}; module.exports = { trimSlashes, @@ -75,6 +92,7 @@ module.exports = { shouldOmitLinkOnLastBreadcrumb, directoryContainsReadme, printReadme, + stripWebVTT, md, moment, }; \ No newline at end of file diff --git a/views/search-results.ejs b/views/search-results.ejs index fb25b4e3..233402d4 100644 --- a/views/search-results.ejs +++ b/views/search-results.ejs @@ -33,7 +33,7 @@
<%= doc.title %>
<% if (highlighting[doc.id] && highlighting[doc.id].text) { %> <% highlighting[doc.id].text.forEach(snippet => { %> -

<%- snippet %>

+
<%- h.stripWebVTT(snippet) %>
<% }); %> <% } else { %> diff --git a/views/video-player.ejs b/views/video-player.ejs index 19e64bf3..0836e41a 100644 --- a/views/video-player.ejs +++ b/views/video-player.ejs @@ -73,7 +73,7 @@ Search feature. Please review the Search Policy for details about the site features.

-
<%- subtitleVTT %>
+
<%- h.stripWebVTT(subtitleVTT) %>
<% } %> diff --git a/web.config b/web.config index 6fdec002..73bdd53c 100644 --- a/web.config +++ b/web.config @@ -8,9 +8,9 @@ - + @@ -22,9 +22,14 @@ + + + + + \ No newline at end of file