Updated stylesheet to add totals to tables, updated gulpfile, updated .gitignore, updated web.config to prevent IIS from accessing documents outside public, updated video and search views to remove WEBVTT noise on the fly.

This commit is contained in:
David Ball 2024-04-29 01:22:14 -04:00
parent 9dbe87e166
commit efdd1161eb
9 changed files with 93 additions and 27 deletions

9
.gitignore vendored
View File

@ -136,4 +136,11 @@ dist
iisnode/ iisnode/
# transpiler artifacts # transpiler artifacts
app/TikaClient/build/ app/TikaClient/build/
# cookies
youtube-cookies.txt
# SSL certificates
*.cer
*csr.txt

View File

@ -86,28 +86,28 @@ glob.globSync('pages/**/*.md', {
}); });
}); });
console.log("Scanning for documents to create routes"); // console.log("Scanning for documents to create routes");
glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', { // glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', {
cwd: path.join(__dirname, '..', 'public'), // cwd: path.join(__dirname, '..', 'public'),
matchBase: true, // matchBase: true,
follow: true, // follow: true,
}).forEach((filePath) => { // }).forEach((filePath) => {
const expressRoutePathFromFilePath = (filePath) => { // const expressRoutePathFromFilePath = (filePath) => {
return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); // return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep);
}; // };
const route = expressRoutePathFromFilePath(filePath); // const route = expressRoutePathFromFilePath(filePath);
const fullFilePath = path.join(__dirname, '..', 'public', filePath); // const fullFilePath = path.join(__dirname, '..', 'public', filePath);
let paths = route.split(path.posix.sep); // let paths = route.split(path.posix.sep);
paths[0] = 'public'; // paths[0] = 'public';
console.log(`Setting route for ${route}`); // console.log(`Setting route for ${route}`);
app.get(route, async (req, res) => { // app.get(route, async (req, res) => {
const fm = matter.read(fullFilePath); // const fm = matter.read(fullFilePath);
const fmData = { fm: fm.data, excerpt: fm.excerpt }; // const fmData = { fm: fm.data, excerpt: fm.excerpt };
const content = helpers.md.render(fm.content, fmData ); // const content = helpers.md.render(fm.content, fmData );
const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; // const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData };
res.render("page", { h: helpers, ...renderData }); // res.render("page", { h: helpers, ...renderData });
}); // });
}); // });
// Endpoints for all the site's YouTube videos. // Endpoints for all the site's YouTube videos.
console.log("Scanning for archived videos to create routes"); console.log("Scanning for archived videos to create routes");

View File

@ -25,6 +25,32 @@ gulp.task('index:clear', async () => {
}); });
}); });
gulp.task('dbfromsolr', async () => {
let docs = await request({
uri: `${solrUrl}/select`,
qs: {
q: '*:*',
wt: 'json',
rows: 10000,
},
json: true
});
docs = docs?.response?.docs?.map(({id, sha256sum, url, content_length, content_type, text, _version_}) =>
{
return {
id,
url: url.join(''),
content_length: parseInt(content_length.join()),
sha256sum: sha256sum.join(''),
content_type: content_type.join(''),
text: text.join(''),
_version_,
}
}).map(doc => {
})
});
async function calculateSHA256Hash(filePath) { async function calculateSHA256Hash(filePath) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const readStream = fs.createReadStream(filePath); const readStream = fs.createReadStream(filePath);

View File

@ -8,6 +8,8 @@ const helpers = require('../views/helpers/functions');
router.get('/', (req, res) => { router.get('/', (req, res) => {
// Extract paging parameters from request query parameters // Extract paging parameters from request query parameters
let { q, page = 1, pageSize = 10 } = req.query; let { q, page = 1, pageSize = 10 } = req.query;
// Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes
q = q.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\'');
if (page instanceof String) page = parseInt(page); if (page instanceof String) page = parseInt(page);
if (pageSize instanceof String) pageSize = parseInt(pageSize); if (pageSize instanceof String) pageSize = parseInt(pageSize);
// Cap at 100 max per page // Cap at 100 max per page

View File

@ -227,4 +227,12 @@ table {
tbody, td, tfoot, th, thead, tr { tbody, td, tfoot, th, thead, tr {
font-family: 'Sometype Mono'; font-family: 'Sometype Mono';
padding: 5pt; padding: 5pt;
}
thead {
border-bottom: 2pt solid #222;
}
.total {
border-top: 5pt double #222;
} }

View File

@ -63,6 +63,23 @@ const printMarkdownFile = ({file}) => {
const printReadme = ({directory}) => { const printReadme = ({directory}) => {
return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString()); return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString());
}; };
const stripWebVTT = (webvttText) => {
const searchHeader = "WEBVTT\nKind: captions\nLanguage: en\n\n";
if (webvttText.startsWith(searchHeader)) {
webvttText = webvttText.substring(searchHeader.length-1); // remove WEBVTT header
webvttText = webvttText.replaceAll(' align:start position:0%', ''); // remove this align and position junk
webvttText = webvttText
.split('\n')
.map((line) => { return line.replaceAll(/.*<\d{2}:\d{2}:\d{2}.\d{3}>.*/g, '').trim() }) // remove all the animated subtitles and trim the whitespace on each line
.join('\n');
while (webvttText.indexOf('\n\n\n') > -1) {
webvttText = webvttText.replace('\n\n\n', '\n\n'); // remove every instance of triple vertical white space, while allowing double vertical white space
}
webvttText = webvttText.replaceAll(/(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})\n(.*)\n\n(\2) --> (\d{2}:\d{2}:\d{2}.\d{3})\n\3\n/g, '$1 --> $5\n$3\n'); // remove every duplicate entry detected
webvttText = webvttText.replaceAll('\n', '<br/>'); // convert \n to <br/>
}
return webvttText;
};
module.exports = { module.exports = {
trimSlashes, trimSlashes,
@ -75,6 +92,7 @@ module.exports = {
shouldOmitLinkOnLastBreadcrumb, shouldOmitLinkOnLastBreadcrumb,
directoryContainsReadme, directoryContainsReadme,
printReadme, printReadme,
stripWebVTT,
md, md,
moment, moment,
}; };

View File

@ -33,7 +33,7 @@
<h5><%= doc.title %></h5> <h5><%= doc.title %></h5>
<% if (highlighting[doc.id] && highlighting[doc.id].text) { %> <% if (highlighting[doc.id] && highlighting[doc.id].text) { %>
<% highlighting[doc.id].text.forEach(snippet => { %> <% highlighting[doc.id].text.forEach(snippet => { %>
<p><%- snippet %></p> <pre><%- h.stripWebVTT(snippet) %></pre>
<% }); %> <% }); %>
<% } else { %> <% } else { %>
<!-- <p>No highlight available.</p> --> <!-- <p>No highlight available.</p> -->

View File

@ -73,7 +73,7 @@
Search feature. Please review the <a href="/search-policy" target="_blank">Search Policy</a> Search feature. Please review the <a href="/search-policy" target="_blank">Search Policy</a>
for details about the site features. for details about the site features.
</p> </p>
<pre style="white-space:pre-wrap;overflow-wrap:anywhere"><%- subtitleVTT %></pre> <pre style="white-space:pre-wrap;overflow-wrap:anywhere"><%- h.stripWebVTT(subtitleVTT) %></pre>
</div> </div>
</div> </div>
<% } %> <% } %>

View File

@ -8,9 +8,9 @@
<rules> <rules>
<rule name="nodejs"> <rule name="nodejs">
<match url="(.*)" /> <match url="(.*)" />
<conditions> <!-- <conditions>
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" /> <add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
</conditions> </conditions> -->
<action type="Rewrite" url="app/server.js" /> <action type="Rewrite" url="app/server.js" />
</rule> </rule>
</rules> </rules>
@ -22,9 +22,14 @@
<add segment="node_modules" /> <add segment="node_modules" />
<add segment="iisnode" /> <add segment="iisnode" />
</hiddenSegments> </hiddenSegments>
<fileExtensions>
</fileExtensions>
</requestFiltering> </requestFiltering>
</security> </security>
<httpErrors errorMode="Detailed" /> <httpErrors errorMode="Detailed" />
<urlCompression doDynamicCompression="false" /> <urlCompression doDynamicCompression="false" />
<staticContent>
<mimeMap fileExtension=".md" mimeType="text/markdown" />
</staticContent>
</system.webServer> </system.webServer>
</configuration> </configuration>