Updated stylesheet to add totals to tables, updated gulpfile, updated .gitignore, updated web.config to prevent IIS from accessing documents outside public, updated video and search views to remove WEBVTT noise on the fly.

This commit is contained in:
David Ball 2024-04-29 01:22:14 -04:00
parent 9dbe87e166
commit efdd1161eb
9 changed files with 93 additions and 27 deletions

9
.gitignore vendored
View File

@ -136,4 +136,11 @@ dist
iisnode/
# transpiler artifacts
app/TikaClient/build/
app/TikaClient/build/
# cookies
youtube-cookies.txt
# SSL certificates
*.cer
*csr.txt

View File

@ -86,28 +86,28 @@ glob.globSync('pages/**/*.md', {
});
});
console.log("Scanning for documents to create routes");
glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', {
cwd: path.join(__dirname, '..', 'public'),
matchBase: true,
follow: true,
}).forEach((filePath) => {
const expressRoutePathFromFilePath = (filePath) => {
return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep);
};
const route = expressRoutePathFromFilePath(filePath);
const fullFilePath = path.join(__dirname, '..', 'public', filePath);
let paths = route.split(path.posix.sep);
paths[0] = 'public';
console.log(`Setting route for ${route}`);
app.get(route, async (req, res) => {
const fm = matter.read(fullFilePath);
const fmData = { fm: fm.data, excerpt: fm.excerpt };
const content = helpers.md.render(fm.content, fmData );
const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData };
res.render("page", { h: helpers, ...renderData });
});
});
// console.log("Scanning for documents to create routes");
// glob.globSync('**/*{.pdf,.docx,.xlsx,.pptx,.doc,.xls,.ppt}', {
// cwd: path.join(__dirname, '..', 'public'),
// matchBase: true,
// follow: true,
// }).forEach((filePath) => {
// const expressRoutePathFromFilePath = (filePath) => {
// return filePath.substring(0, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep);
// };
// const route = expressRoutePathFromFilePath(filePath);
// const fullFilePath = path.join(__dirname, '..', 'public', filePath);
// let paths = route.split(path.posix.sep);
// paths[0] = 'public';
// console.log(`Setting route for ${route}`);
// app.get(route, async (req, res) => {
// const fm = matter.read(fullFilePath);
// const fmData = { fm: fm.data, excerpt: fm.excerpt };
// const content = helpers.md.render(fm.content, fmData );
// const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData };
// res.render("page", { h: helpers, ...renderData });
// });
// });
// Endpoints for all the site's YouTube videos.
console.log("Scanning for archived videos to create routes");

View File

@ -25,6 +25,32 @@ gulp.task('index:clear', async () => {
});
});
gulp.task('dbfromsolr', async () => {
let docs = await request({
uri: `${solrUrl}/select`,
qs: {
q: '*:*',
wt: 'json',
rows: 10000,
},
json: true
});
docs = docs?.response?.docs?.map(({id, sha256sum, url, content_length, content_type, text, _version_}) =>
{
return {
id,
url: url.join(''),
content_length: parseInt(content_length.join()),
sha256sum: sha256sum.join(''),
content_type: content_type.join(''),
text: text.join(''),
_version_,
}
}).map(doc => {
})
});
async function calculateSHA256Hash(filePath) {
return new Promise((resolve, reject) => {
const readStream = fs.createReadStream(filePath);

View File

@ -8,6 +8,8 @@ const helpers = require('../views/helpers/functions');
router.get('/', (req, res) => {
// Extract paging parameters from request query parameters
let { q, page = 1, pageSize = 10 } = req.query;
// Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes
q = q.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\'');
if (page instanceof String) page = parseInt(page);
if (pageSize instanceof String) pageSize = parseInt(pageSize);
// Cap at 100 max per page

View File

@ -227,4 +227,12 @@ table {
tbody, td, tfoot, th, thead, tr {
font-family: 'Sometype Mono';
padding: 5pt;
}
thead {
border-bottom: 2pt solid #222;
}
.total {
border-top: 5pt double #222;
}

View File

@ -63,6 +63,23 @@ const printMarkdownFile = ({file}) => {
const printReadme = ({directory}) => {
return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString());
};
const stripWebVTT = (webvttText) => {
const searchHeader = "WEBVTT\nKind: captions\nLanguage: en\n\n";
if (webvttText.startsWith(searchHeader)) {
webvttText = webvttText.substring(searchHeader.length-1); // remove WEBVTT header
webvttText = webvttText.replaceAll(' align:start position:0%', ''); // remove this align and position junk
webvttText = webvttText
.split('\n')
.map((line) => { return line.replaceAll(/.*<\d{2}:\d{2}:\d{2}.\d{3}>.*/g, '').trim() }) // remove all the animated subtitles and trim the whitespace on each line
.join('\n');
while (webvttText.indexOf('\n\n\n') > -1) {
webvttText = webvttText.replace('\n\n\n', '\n\n'); // remove every instance of triple vertical white space, while allowing double vertical white space
}
webvttText = webvttText.replaceAll(/(\d{2}:\d{2}:\d{2}.\d{3}) --> (\d{2}:\d{2}:\d{2}.\d{3})\n(.*)\n\n(\2) --> (\d{2}:\d{2}:\d{2}.\d{3})\n\3\n/g, '$1 --> $5\n$3\n'); // remove every duplicate entry detected
webvttText = webvttText.replaceAll('\n', '<br/>'); // convert \n to <br/>
}
return webvttText;
};
module.exports = {
trimSlashes,
@ -75,6 +92,7 @@ module.exports = {
shouldOmitLinkOnLastBreadcrumb,
directoryContainsReadme,
printReadme,
stripWebVTT,
md,
moment,
};

View File

@ -33,7 +33,7 @@
<h5><%= doc.title %></h5>
<% if (highlighting[doc.id] && highlighting[doc.id].text) { %>
<% highlighting[doc.id].text.forEach(snippet => { %>
<p><%- snippet %></p>
<pre><%- h.stripWebVTT(snippet) %></pre>
<% }); %>
<% } else { %>
<!-- <p>No highlight available.</p> -->

View File

@ -73,7 +73,7 @@
Search feature. Please review the <a href="/search-policy" target="_blank">Search Policy</a>
for details about the site features.
</p>
<pre style="white-space:pre-wrap;overflow-wrap:anywhere"><%- subtitleVTT %></pre>
<pre style="white-space:pre-wrap;overflow-wrap:anywhere"><%- h.stripWebVTT(subtitleVTT) %></pre>
</div>
</div>
<% } %>

View File

@ -8,9 +8,9 @@
<rules>
<rule name="nodejs">
<match url="(.*)" />
<conditions>
<!-- <conditions>
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
</conditions>
</conditions> -->
<action type="Rewrite" url="app/server.js" />
</rule>
</rules>
@ -22,9 +22,14 @@
<add segment="node_modules" />
<add segment="iisnode" />
</hiddenSegments>
<fileExtensions>
</fileExtensions>
</requestFiltering>
</security>
<httpErrors errorMode="Detailed" />
<urlCompression doDynamicCompression="false" />
<staticContent>
<mimeMap fileExtension=".md" mimeType="text/markdown" />
</staticContent>
</system.webServer>
</configuration>