From 7f2119bb0d46078428f8beedffec216d481e35bd Mon Sep 17 00:00:00 2001 From: David Ball Date: Sun, 10 Mar 2024 06:39:02 -0400 Subject: [PATCH] Initial state of the development and production site as of 2024-03-10. --- .gitignore | 139 ++++ README.md | 0 app/TikaClient/src/client.ts | 102 +++ app/TikaClient/src/index.ts | 2 + app/TikaClient/src/types.ts | 81 ++ app/glob-slash.js | 9 + app/search.js | 83 ++ app/server.js | 164 ++++ app/vercel-serve.js | 783 ++++++++++++++++++ gulpfile.js | 191 +++++ package.json | 79 ++ pages/comment-policy.md | 97 +++ pages/privacy-policy.md | 268 ++++++ pages/search-policy.md | 73 ++ public/README.md | 31 + .../@russellcountyvirginia8228/README.md | 6 + .../README.md | 12 + tsconfig.build.json | 4 + tsconfig.json | 26 + views/directory.ejs | 61 ++ views/error.ejs | 122 +++ views/helpers/functions.js | 72 ++ views/includes/bottom-navbar.ejs | 12 + views/includes/common-head.ejs | 155 ++++ views/includes/top-navbar.ejs | 17 + views/page.ejs | 65 ++ views/search-error.ejs | 42 + views/search-results.ejs | 78 ++ web.config | 28 + 29 files changed, 2802 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app/TikaClient/src/client.ts create mode 100644 app/TikaClient/src/index.ts create mode 100644 app/TikaClient/src/types.ts create mode 100644 app/glob-slash.js create mode 100644 app/search.js create mode 100644 app/server.js create mode 100644 app/vercel-serve.js create mode 100644 gulpfile.js create mode 100644 package.json create mode 100644 pages/comment-policy.md create mode 100644 pages/privacy-policy.md create mode 100644 pages/search-policy.md create mode 100644 public/README.md create mode 100644 public/Russell_County_BOS/YouTube_Archive/@russellcountyvirginia8228/README.md create mode 100644 public/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/README.md create mode 100644 tsconfig.build.json create mode 100644 tsconfig.json create mode 100644 views/directory.ejs create mode 100644 views/error.ejs create mode 100644 views/helpers/functions.js create mode 100644 views/includes/bottom-navbar.ejs create mode 100644 views/includes/common-head.ejs create mode 100644 views/includes/top-navbar.ejs create mode 100644 views/page.ejs create mode 100644 views/search-error.ejs create mode 100644 views/search-results.ejs create mode 100644 web.config diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0c4fa7f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,139 @@ +# Mac Finder artifacts +.DS_Store + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# iisnode +iisnode/ + +# transpiler artifacts +app/TikaClient/build/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..e69de29b diff --git a/app/TikaClient/src/client.ts b/app/TikaClient/src/client.ts new file mode 100644 index 00000000..8ec24379 --- /dev/null +++ b/app/TikaClient/src/client.ts @@ -0,0 +1,102 @@ +import { ReadStream, WriteStream } from 'fs' +import fetch from 'node-fetch' +let join = (...args: String[]) => { + let output = ""; + args.forEach((arg) => { + output += arg; + }) + return output; +}; +import { ContentResource, MetadataResource } from './types' + +export interface TikaClientOptions { + host: string +} + +export type TikaContentType = 'text/plain' | 'text/xml' | 'text/html' | 'application/json' + +export interface TikaGetTextOptions { + filename?: string + contentType: TikaContentType +} + +export class TikaClient { + private metaUrl: string + private tikaUrl: string + + constructor(private options: TikaClientOptions) { + this.metaUrl = join(options.host, '/meta') + this.tikaUrl = join(options.host, '/tika') + } + + async getMetadata(body: ReadStream): Promise { + const response = await fetch(this.metaUrl, { + method: 'PUT', + body, + headers: { 'Accept': 'application/json', 'Content-Disposition': 'attachment' } + }) + return response.json() + } + + async getContent(body: ReadStream, contentType: Exclude, filename?: string): Promise + async getContent(body: ReadStream, contentType: Extract, filename?: string): Promise + async getContent(body: ReadStream, contentType: TikaContentType, filename?: string): Promise { + const response = await fetch(this.tikaUrl, { + method: 'PUT', + body, + headers: { + 'Accept': contentType, + 'Content-Disposition': `attachment${filename ? `; filename=${filename}` : ''}` + } + }) + return (contentType === 'application/json') ? response.json() : response.text() + } + + async getStream(body: ReadStream, contentType: TikaContentType, filename?: string): Promise { + const response = await fetch(this.tikaUrl, { + method: 'PUT', + body, + headers: { + 'Accept': contentType, + 'Content-Disposition': `attachment${filename ? `; filename=${filename}` : ''}` + } + }) + return response.body + } + + async pipe(readStream: ReadStream, writeStream: WriteStream, contentType: TikaContentType = 'text/plain', filename?: string): Promise { + const tikaStream = await this.getStream(readStream, contentType, filename) + return new Promise((resolve, reject) => { + const stream = tikaStream.pipe(writeStream) + stream.on('error', (error) => { reject(error) }) + stream.on('finish', () => { resolve() }) + }) + } + + private async getResource(resource: string, accept: 'application/json'): Promise + private async getResource(resource: string, accept: 'text/plain'): Promise + private async getResource(resource: string, accept: 'application/json' | 'text/plain' = 'application/json'): Promise { + const response = await fetch(join(this.options.host, resource), { + method: 'GET', + headers: { 'Accept': accept } + }) + if (accept === 'text/plain') { + return response.text() as T + } else { + return response.json() + } + } + + getMimeTypes() { return this.getResource('/mime-types', 'application/json') } + + getDetectors() { return this.getResource('/detectors', 'application/json') } + + getParsers() { return this.getResource('/parsers', 'application/json') } + + getDetailedParsers() { return this.getResource('/parsers/details', 'application/json') } + + async getVersion() { + const response = await this.getResource('/version', 'text/plain') + return response.trim() + } +} \ No newline at end of file diff --git a/app/TikaClient/src/index.ts b/app/TikaClient/src/index.ts new file mode 100644 index 00000000..0b28ed7f --- /dev/null +++ b/app/TikaClient/src/index.ts @@ -0,0 +1,2 @@ +export * from './client' +export * from './types' \ No newline at end of file diff --git a/app/TikaClient/src/types.ts b/app/TikaClient/src/types.ts new file mode 100644 index 00000000..1ad11bb8 --- /dev/null +++ b/app/TikaClient/src/types.ts @@ -0,0 +1,81 @@ +export interface MetadataResource { + 'pdf:unmappedUnicodeCharsPerPage': string[] + 'pdf:PDFVersion': string + 'xmp:CreatorTool': string + 'pdf:hasXFA': string + 'access_permission:modify_annotations': string + 'access_permission:can_print_degraded': string + 'X-TIKA:Parsed-By-Full-Set': string[] + 'pdf:num3DAnnotations': string + 'dcterms:created': string + 'language': string + 'dcterms:modified': string + 'dc:format': string + 'pdf:docinfo:creator_tool': string + 'pdf:overallPercentageUnmappedUnicodeChars': string + 'access_permission:fill_in_form': string + 'pdf:docinfo:modified': string + 'pdf:hasCollection': string + 'pdf:encrypted': string + 'pdf:containsNonEmbeddedFont': string + 'Content-Length': string + 'pdf:hasMarkedContent': string + 'Content-Type': string + 'pdf:producer': string + 'pdf:totalUnmappedUnicodeChars': string + 'access_permission:extract_for_accessibility': string + 'access_permission:assemble_document': string + 'xmpTPg:NPages': string + 'pdf:hasXMP': string + 'pdf:charsPerPage': string[] + 'access_permission:extract_content': string + 'access_permission:can_print': string + 'X-TIKA:Parsed-By': string[] + 'pdf:annotationTypes': string + 'access_permission:can_modify': string + 'pdf:docinfo:producer': string + 'pdf:docinfo:created': string + 'pdf:annotationSubtypes': string + 'pdf:containsDamagedFont': string + } + + export interface ContentResource { + 'pdf:unmappedUnicodeCharsPerPage': string[] + 'pdf:PDFVersion': string + 'xmp:CreatorTool': string + 'pdf:hasXFA': string + 'access_permission:modify_annotations': string + 'access_permission:can_print_degraded': string + 'X-TIKA:Parsed-By-Full-Set': string[] + 'pdf:num3DAnnotations': string + 'dcterms:created': string + 'dcterms:modified': string + 'dc:format': string + 'pdf:docinfo:creator_tool': string + 'pdf:overallPercentageUnmappedUnicodeChars': string + 'access_permission:fill_in_form': string + 'pdf:docinfo:modified': string + 'pdf:hasCollection': string + 'pdf:encrypted': string + 'pdf:containsNonEmbeddedFont': string + 'Content-Length': string + 'pdf:hasMarkedContent': string + 'Content-Type': string + 'pdf:producer': string + 'pdf:totalUnmappedUnicodeChars': string + 'access_permission:extract_for_accessibility': string + 'access_permission:assemble_document': string + 'xmpTPg:NPages': string + 'pdf:hasXMP': string + 'pdf:charsPerPage': string[] + 'access_permission:extract_content': string + 'access_permission:can_print': string + 'X-TIKA:Parsed-By': string[] + 'X-TIKA:content': string + 'pdf:annotationTypes': string + 'access_permission:can_modify': string + 'pdf:docinfo:producer': string + 'pdf:docinfo:created': string + 'pdf:annotationSubtypes': string + 'pdf:containsDamagedFont': string + } \ No newline at end of file diff --git a/app/glob-slash.js b/app/glob-slash.js new file mode 100644 index 00000000..cc924e58 --- /dev/null +++ b/app/glob-slash.js @@ -0,0 +1,9 @@ +/* ! The MIT License (MIT) Copyright (c) 2014 Scott Corgan */ + +// This is adopted from https://github.com/scottcorgan/glob-slash/ + +const path = require('path'); +const normalize = value => path.posix.normalize(path.posix.join('/', value)); + +module.exports = value => (value.charAt(0) === '!' ? `!${normalize(value.substr(1))}` : normalize(value)); +module.exports.normalize = normalize; \ No newline at end of file diff --git a/app/search.js b/app/search.js new file mode 100644 index 00000000..9fa56a7d --- /dev/null +++ b/app/search.js @@ -0,0 +1,83 @@ +const express = require('express'); +const axios = require('axios'); +const app = express(); +const path = require('path'); + +// Set EJS as the view engine +app.set('view engine', 'ejs'); + +// Specify the views directory +app.set('views', path.join(__dirname, 'views')); + +// Middleware to parse JSON request body +app.use(express.json()); + +// Serve static files (CSS, JavaScript, images, etc.) +app.use(express.static('public')); + +// Search endpoint +app.get('/search', async (req, res) => { + try { + // Extract search query from request query parameters + const { q, page = 1, pageSize = 10 } = req.query; + const query = q; + + // Validate search query + if (!query) { + return res.status(400).json({ error: 'q parameter is required' }); + } + + // Calculate start offset for pagination + const start = (page - 1) * pageSize; + + // Sanitize search query to prevent code injection + const sanitizedQuery = sanitizeQuery(query); + + // Send search query to Solr + const response = await axios.get(solrUrl + '/select', { + params: { + q: `text:${sanitizedQuery}`, // Query string with field name + hl: 'true', + 'hl.method': 'unified', + 'hl.fl': '*', + 'hl.snippets': 5, + 'hl.tag.pre': '', + 'hl.tag.post': '', + 'hl.usePhraseHighlighter': true, + start, // Start offset for pagination + rows: 10, // Number of rows to return + wt: 'json', // Response format (JSON) + }, + }); + + // Extract search results from Solr response + const searchResults = response.data.response.docs; + const highlightedSnippets = response.data.highlighting; + + // Calculate total number of results (needed for pagination) + const totalResults = response.data.response.numFound; + + // Calculate total number of pages + const totalPages = Math.ceil(totalResults / pageSize); + + // Send search results as JSON response + //res.json({ searchResults, highlightedSnippets }); + res.render('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); + } catch (error) { + console.error('Error searching Solr:', error.message); + res.status(500).json({ error: 'Internal server error' }); + } +}); + +// Function to sanitize search query to prevent code injection +function sanitizeQuery(query) { + // Remove any characters that are not alphanumeric or whitespace + return query.replace(/[^\w\s"]/gi, ''); +} + +// Start server +const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance +const PORT = process.env.PORT || 3000; +app.listen(PORT, () => { + console.log(`Server is running on port ${PORT}`); +}); diff --git a/app/server.js b/app/server.js new file mode 100644 index 00000000..7fb675b5 --- /dev/null +++ b/app/server.js @@ -0,0 +1,164 @@ +const express = require('express'); +const axios = require('axios'); +const app = express(); +const serve = require('./vercel-serve'); +const path = require('path'); +const glob = require('glob'); +const matter = require('gray-matter'); +const ejs = require('ejs'); +const helpers = require('../views/helpers/functions'); + +// Port number for HTTP server +const port = process.env.PORT||3000; + +// Solr instance URL +const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; + +// Set EJS as the view engine +app.set('view engine', 'ejs'); + +// Specify the views directory +app.set('views', path.join(__dirname, '..', 'views')); + +// Middleware to parse JSON request body +app.use(express.json()); + +// Middleware to rewrite requests +//app.use(rewriter); + +// // Serve static files (CSS, JavaScript, images, etc.) +// app.use(serve('../public', { +// dotfiles: 'ignore', +// index: false, +// })); + +// app.get('/', (req, res) => { +// res.send('Hello World!'); +// }) + +// Endpoints for all the site's pages. +glob.globSync('pages/**/*.md', { + cwd: path.join(__dirname, '..'), + matchBase: true, + follow: true, +}).forEach((filePath) => { + const expressRoutePathFromFilePath = (filePath) => { + return filePath.substring('pages'.length, filePath.length - path.extname(filePath).length).replaceAll(path.sep, path.posix.sep); + }; + const route = expressRoutePathFromFilePath(filePath); + const fullFilePath = path.join(__dirname, '..', filePath); + let paths = route.split(path.posix.sep); + paths[0] = 'public'; + app.get(route, async (req, res) => { + const fm = matter.read(fullFilePath); + const fmData = { fm: fm.data, excerpt: fm.excerpt }; + const content = helpers.md.render(fm.content, fmData ); + const renderData = { content, route, filePath, fullFilePath, req, paths, ...fmData }; + res.render("page", { h: helpers, ...renderData }); + }); +}); + +// Search endpoint +app.get('/search', async (req, res) => { + // Extract search query from request query parameters + let { q, page = 1, pageSize = 10 } = req.query; + pageSize = Math.min(pageSize, 100); // cap at 100 + const query = q; + // Calculate start offset for pagination + const start = (page - 1) * pageSize; + + // Sanitize search query to prevent code injection + const sanitizedQuery = sanitizeQuery(query); + try { + // Validate search query + if (!query) { + //return res.status(400).json({ error: 'q parameter is required' }); + res.render('search-error', { h: helpers, query: sanitizedQuery, error: { code: 400, message: 'Search query is required.'} }); + } + + // Send search query to Solr + const response = await axios.get(solrUrl + '/select', { + params: { + q: `text:${sanitizedQuery}`, // Query string with field name + hl: 'true', + 'hl.method': 'unified', + 'hl.fl': '*', + 'hl.snippets': 5, + 'hl.tag.pre': '', + 'hl.tag.post': '', + 'hl.usePhraseHighlighter': true, + start, // Start offset for pagination + rows: 10, // Number of rows to return + wt: 'json', // Response format (JSON) + }, + }); + + // Extract search results from Solr response + const searchResults = response.data.response.docs; + const highlightedSnippets = response.data.highlighting; + + // Calculate total number of results (needed for pagination) + const totalResults = response.data.response.numFound; + + // Calculate total number of pages + const totalPages = Math.ceil(totalResults / pageSize); + + // Send search results as JSON response + //res.json('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); + res.render('search-results', { h: helpers, query: sanitizedQuery, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages }); + } catch (error) { + // console.error('Error searching Solr:', error.message); + // res.status(500).json({ error: 'Internal server error' }); + res.render('search-error', { h: helpers, query: sanitizedQuery, error }); + } +}); + +// Function to sanitize search query to prevent code injection +function sanitizeQuery(query) { + // Remove any characters that are not alphanumeric or whitespace + return query.replace(/[^\w\s*,."]/gi, ''); +} + +//app.get('/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding**', rewriter.rewrite('/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/$1')); + +app.get('*', async (req, res) => { + await serve(req, res, { + public: path.join(__dirname, '..', 'public'), + symlinks: true, + trailingSlash: true, + cleanUrls: false, + renderSingle: false, + unlisted: [ + ".DS_Store", + ".git", + "README*" + ], + redirects: [ + { + source: "/:year(\d{4})-:mo(\d{2})-:dd(\d{2})_:hh(\d{2})_:mm(\d{2})/", + destination: "/Web_Site_Archives/Russell_County_Web_Site-:year-:mo-:dd_:hh_:mm/" + }, + { + source: "/OCR-Encoded-PDFs", + destination: "/Web_Site_Archives" + }, + { + source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding.zip", + destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding.zip" + }, + { + source: "/OCR-Encoded-PDFs/Russell-County-Web-Site_2024-02-13_19_50_Modified-With-OCR-Encoding/:u(.*)", + destination: "/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding:u" + }, + { source: '/YouTube Channel', destination: '/Russell_County_BOS/YouTube_Channel' }, + { source: '/YouTube Channel.zip', destination: '/Russell_County_BOS/YouTube_Channel.zip' }, + { source: '/YouTube Channel/:u?', destination: '/Russell_County_BOS/YouTube_Channel/:u' }, + { source: '/Project Reclaim [WI19KR9Ogwg].mkv', destination: '/YouTube_Archives/@VADMME/Project Reclaim [WI19KR9Ogwg].mkv' }, + ] + }); +}); + +// Start server +app.listen(port, () => { + console.log(`no-moss-3-carbo-landfill-library.online app listening on port ${port}`); +}); diff --git a/app/vercel-serve.js b/app/vercel-serve.js new file mode 100644 index 00000000..7f684562 --- /dev/null +++ b/app/vercel-serve.js @@ -0,0 +1,783 @@ +// Adapted from https://raw.githubusercontent.com/vercel/serve-handler/main/src/index.js + +// Native +const {promisify} = require('util'); +const path = require('path'); +const {createHash} = require('crypto'); +const {realpath, lstat, createReadStream, readdir} = require('fs'); + +// Packages +const url = require('fast-url-parser'); +const slasher = require('./glob-slash'); +const minimatch = require('minimatch'); +const pathToRegExp = require('path-to-regexp'); +const mime = require('mime-types'); +const bytes = require('bytes'); +const contentDisposition = require('content-disposition'); +const isPathInside = require('path-is-inside'); +const parseRange = require('range-parser'); + +// Convert to EJS +const ejs = require('ejs'); +const helpers = require('../views/helpers/functions'); +// Original jst: +// const directoryTemplate = require('./views/directory'); +// const errorTemplate = require('./views/error'); +const directoryTemplate = (vals) => { + return new Promise((resolve, reject) => { + ejs.renderFile(path.join(__dirname, '..', 'views', 'directory.ejs'), { h: helpers, ...vals }, (err, str) => { + if (err) { + reject(err); + } else { + resolve(str); + } + }); + }); +}; +// const errorTemplate = (vals) => { +// return new Promise((resolve, reject) => { +// ejs.renderFile("views/error.ejs", { h: helpers, ...vals }, (err, str) => { +// if (err) { +// reject(err); +// } else { +// resolve(str); +// } +// }); +// }); +// }; + +const etags = new Map(); + +const calculateSha = (handlers, absolutePath) => + new Promise((resolve, reject) => { + const hash = createHash('sha1'); + hash.update(path.extname(absolutePath)); + hash.update('-'); + const rs = handlers.createReadStream(absolutePath); + rs.on('error', reject); + rs.on('data', buf => hash.update(buf)); + rs.on('end', () => { + const sha = hash.digest('hex'); + resolve(sha); + }); + }); + +const sourceMatches = (source, requestPath, allowSegments) => { + const keys = []; + const slashed = slasher(source); + const resolvedPath = path.posix.resolve(requestPath); + + let results = null; + + if (allowSegments) { + const normalized = slashed.replace('*', '(.*)'); + const expression = pathToRegExp(normalized, keys); + + results = expression.exec(resolvedPath); + + if (!results) { + // clear keys so that they are not used + // later with empty results. this may + // happen if minimatch returns true + keys.length = 0; + } + } + + if (results || minimatch(resolvedPath, slashed)) { + return { + keys, + results + }; + } + + return null; +}; + +const toTarget = (source, destination, previousPath) => { + const matches = sourceMatches(source, previousPath, true); + + if (!matches) { + return null; + } + + const {keys, results} = matches; + + const props = {}; + const {protocol} = url.parse(destination); + const normalizedDest = protocol ? destination : slasher(destination); + const toPath = pathToRegExp.compile(normalizedDest); + + for (let index = 0; index < keys.length; index++) { + const {name} = keys[index]; + props[name] = results[index + 1]; + } + + return toPath(props); +}; + +const applyRewrites = (requestPath, rewrites = [], repetitive) => { + // We need to copy the array, since we're going to modify it. + const rewritesCopy = rewrites.slice(); + + // If the method was called again, the path was already rewritten + // so we need to make sure to return it. + const fallback = repetitive ? requestPath : null; + + if (rewritesCopy.length === 0) { + return fallback; + } + + for (let index = 0; index < rewritesCopy.length; index++) { + const {source, destination} = rewrites[index]; + const target = toTarget(source, destination, requestPath); + + if (target) { + // Remove rules that were already applied + rewritesCopy.splice(index, 1); + + // Check if there are remaining ones to be applied + return applyRewrites(slasher(target), rewritesCopy, true); + } + } + + return fallback; +}; + +const ensureSlashStart = target => (target.startsWith('/') ? target : `/${target}`); + +const shouldRedirect = (decodedPath, {redirects = [], trailingSlash}, cleanUrl) => { + const slashing = typeof trailingSlash === 'boolean'; + const defaultType = 301; + const matchHTML = /(\.html|\/index)$/g; + + if (redirects.length === 0 && !slashing && !cleanUrl) { + return null; + } + + // By stripping the HTML parts from the decoded + // path *before* handling the trailing slash, we make + // sure that only *one* redirect occurs if both + // config options are used. + if (cleanUrl && matchHTML.test(decodedPath)) { + decodedPath = decodedPath.replace(matchHTML, ''); + if (decodedPath.indexOf('//') > -1) { + decodedPath = decodedPath.replace(/\/+/g, '/'); + } + return { + target: ensureSlashStart(decodedPath), + statusCode: defaultType + }; + } + + if (slashing) { + const {ext, name} = path.parse(decodedPath); + const isTrailed = decodedPath.endsWith('/'); + const isDotfile = name.startsWith('.'); + + let target = null; + + if (!trailingSlash && isTrailed) { + target = decodedPath.slice(0, -1); + } else if (trailingSlash && !isTrailed && !ext && !isDotfile) { + target = `${decodedPath}/`; + } + + if (decodedPath.indexOf('//') > -1) { + target = decodedPath.replace(/\/+/g, '/'); + } + + if (target) { + return { + target: ensureSlashStart(target), + statusCode: defaultType + }; + } + } + + // This is currently the fastest way to + // iterate over an array + for (let index = 0; index < redirects.length; index++) { + const {source, destination, type} = redirects[index]; + const target = toTarget(source, destination, decodedPath); + + if (target) { + return { + target, + statusCode: type || defaultType + }; + } + } + + return null; +}; + +const appendHeaders = (target, source) => { + for (let index = 0; index < source.length; index++) { + const {key, value} = source[index]; + target[key] = value; + } +}; + +const getHeaders = async (handlers, config, current, absolutePath, stats) => { + const {headers: customHeaders = [], etag = false} = config; + const related = {}; + const {base} = path.parse(absolutePath); + const relativePath = path.relative(current, absolutePath); + + if (customHeaders.length > 0) { + // By iterating over all headers and never stopping, developers + // can specify multiple header sources in the config that + // might match a single path. + for (let index = 0; index < customHeaders.length; index++) { + const {source, headers} = customHeaders[index]; + + if (sourceMatches(source, slasher(relativePath))) { + appendHeaders(related, headers); + } + } + } + + let defaultHeaders = {}; + + if (stats) { + defaultHeaders = { + 'Content-Length': stats.size, + // Default to "inline", which always tries to render in the browser, + // if that's not working, it will save the file. But to be clear: This + // only happens if it cannot find a appropiate value. + 'Content-Disposition': contentDisposition(base, { + type: 'inline' + }), + 'Accept-Ranges': 'bytes' + }; + + if (etag) { + let [mtime, sha] = etags.get(absolutePath) || []; + if (Number(mtime) !== Number(stats.mtime)) { + sha = await calculateSha(handlers, absolutePath); + etags.set(absolutePath, [stats.mtime, sha]); + } + defaultHeaders['ETag'] = `"${sha}"`; + } else { + defaultHeaders['Last-Modified'] = stats.mtime.toUTCString(); + } + + const contentType = mime.contentType(base); + + if (contentType) { + defaultHeaders['Content-Type'] = contentType; + } + } + + const headers = Object.assign(defaultHeaders, related); + + for (const key in headers) { + if (headers.hasOwnProperty(key) && headers[key] === null) { + delete headers[key]; + } + } + + return headers; +}; + +const applicable = (decodedPath, configEntry) => { + if (typeof configEntry === 'boolean') { + return configEntry; + } + + if (Array.isArray(configEntry)) { + for (let index = 0; index < configEntry.length; index++) { + const source = configEntry[index]; + + if (sourceMatches(source, decodedPath)) { + return true; + } + } + + return false; + } + + return true; +}; + +const getPossiblePaths = (relativePath, extension) => [ + path.join(relativePath, `index${extension}`), + relativePath.endsWith('/') ? relativePath.replace(/\/$/g, extension) : (relativePath + extension) +].filter(item => path.basename(item) !== extension); + +const findRelated = async (current, relativePath, rewrittenPath, originalStat) => { + const possible = rewrittenPath ? [rewrittenPath] : getPossiblePaths(relativePath, '.html'); + + let stats = null; + + for (let index = 0; index < possible.length; index++) { + const related = possible[index]; + const absolutePath = path.join(current, related); + + try { + stats = await originalStat(absolutePath); + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ENOTDIR') { + throw err; + } + } + + if (stats) { + return { + stats, + absolutePath + }; + } + } + + return null; +}; + +const canBeListed = (excluded, file) => { + const slashed = slasher(file); + let whether = true; + + for (let mark = 0; mark < excluded.length; mark++) { + const source = excluded[mark]; + + if (sourceMatches(source, slashed)) { + whether = false; + break; + } + } + + return whether; +}; + +const renderDirectory = async (current, acceptsJSON, handlers, methods, config, paths) => { + const {directoryListing, trailingSlash, unlisted = [], renderSingle} = config; + const slashSuffix = typeof trailingSlash === 'boolean' ? (trailingSlash ? '/' : '') : '/'; + const {relativePath, absolutePath} = paths; + + const excluded = [ + '.DS_Store', + '.git', + ...unlisted + ]; + + if (!applicable(relativePath, directoryListing) && !renderSingle) { + return {}; + } + + let files = await handlers.readdir(absolutePath); + + const canRenderSingle = renderSingle && (files.length === 1); + + for (let index = 0; index < files.length; index++) { + const file = files[index]; + + const filePath = path.resolve(absolutePath, file); + const details = path.parse(filePath); + + // It's important to indicate that the `stat` call was + // spawned by the directory listing, as Now is + // simulating those calls and needs to special-case this. + let stats = null; + + if (methods.lstat) { + stats = await handlers.lstat(filePath, true); + } else { + stats = await handlers.lstat(filePath); + } + + details.relative = path.join(relativePath, details.base); + + if (stats.isDirectory()) { + details.base += slashSuffix; + details.relative += slashSuffix; + details.type = 'folder'; + } else { + if (canRenderSingle) { + return { + singleFile: true, + absolutePath: filePath, + stats + }; + } + + details.ext = details.ext.split('.')[1] || 'txt'; + details.type = 'file'; + + details.size = bytes(stats.size, { + unitSeparator: ' ', + decimalPlaces: 0 + }); + } + + details.title = details.base; + + if (canBeListed(excluded, file)) { + files[index] = details; + } else { + delete files[index]; + } + } + + const toRoot = path.relative(current, absolutePath); + const directory = path.join(path.basename(current), toRoot, slashSuffix); + const pathParts = directory.split(path.sep).filter(Boolean); + + // Sort to list directories first, then sort alphabetically + files = files.sort((a, b) => { + const aIsDir = a.type === 'directory' || a.type === 'folder'; + const bIsDir = b.type === 'directory' || b.type === 'folder'; + + /* istanbul ignore next */ + if (aIsDir && !bIsDir) { + return -1; + } + + if ((bIsDir && !aIsDir) || (a.base > b.base)) { + return 1; + } + + /* istanbul ignore next */ + if (a.base < b.base) { + return -1; + } + + /* istanbul ignore next */ + return 0; + }).filter(Boolean); + + // Add parent directory to the head of the sorted files array + if (toRoot.length > 0) { + const directoryPath = [...pathParts].slice(1); + const relative = path.join('/', ...directoryPath, '..', slashSuffix); + + files.unshift({ + type: 'directory', + base: '..', + relative, + title: relative, + ext: '' + }); + } + + const subPaths = []; + + for (let index = 0; index < pathParts.length; index++) { + const parents = []; + const isLast = index === (pathParts.length - 1); + + let before = 0; + + while (before <= index) { + parents.push(pathParts[before]); + before++; + } + + parents.shift(); + + subPaths.push({ + name: pathParts[index] + (isLast ? slashSuffix : '/'), + url: index === 0 ? '' : parents.join('/') + slashSuffix + }); + } + + const spec = { + files, + directory, + paths: subPaths + }; + + const output = acceptsJSON ? JSON.stringify(spec) : await directoryTemplate(spec); + + return {directory: output}; +}; + +const sendError = async (absolutePath, response, acceptsJSON, current, handlers, config, spec) => { + const {err: original, message, code, statusCode} = spec; + + /* istanbul ignore next */ + if (original && process.env.NODE_ENV !== 'test') { + console.error(original); + } + + response.statusCode = statusCode; + + if (acceptsJSON) { + response.setHeader('Content-Type', 'application/json; charset=utf-8'); + + response.end(JSON.stringify({ + error: { + code, + message + } + })); + + return; + } + + let stats = null; + + const errorPage = path.join(current, `${statusCode}.html`); + + try { + stats = await handlers.lstat(errorPage); + } catch (err) { + if (err.code !== 'ENOENT') { + console.error(err); + } + } + + if (stats) { + let stream = null; + + try { + stream = await handlers.createReadStream(errorPage); + + const headers = await getHeaders(handlers, config, current, errorPage, stats); + + response.writeHead(statusCode, headers); + stream.pipe(response); + + return; + } catch (err) { + console.error(err); + } + } + + const headers = await getHeaders(handlers, config, current, absolutePath, null); + headers['Content-Type'] = 'text/html; charset=utf-8'; + + response.writeHead(statusCode, headers); + response.end(await errorTemplate({statusCode, message})); +}; + +const internalError = async (...args) => { + const lastIndex = args.length - 1; + const err = args[lastIndex]; + + args[lastIndex] = { + statusCode: 500, + code: 'internal_server_error', + message: 'A server error has occurred', + err + }; + + return sendError(...args); +}; + +const getHandlers = methods => Object.assign({ + lstat: promisify(lstat), + realpath: promisify(realpath), + createReadStream, + readdir: promisify(readdir), + sendError +}, methods); + +module.exports = async (request, response, config = {}, methods = {}) => { + const cwd = process.cwd(); + const current = config.public ? path.resolve(cwd, config.public) : cwd; + const handlers = getHandlers(methods); + + let relativePath = null; + let acceptsJSON = null; + + if (request.headers.accept) { + acceptsJSON = request.headers.accept.includes('application/json'); + } + + try { + relativePath = decodeURIComponent(url.parse(request.url).pathname); + } catch (err) { + return sendError('/', response, acceptsJSON, current, handlers, config, { + statusCode: 400, + code: 'bad_request', + message: 'Bad Request' + }); + } + + let absolutePath = path.join(current, relativePath); + + // Prevent path traversal vulnerabilities. We could do this + // by ourselves, but using the package covers all the edge cases. + if (!isPathInside(absolutePath, current)) { + return sendError(absolutePath, response, acceptsJSON, current, handlers, config, { + statusCode: 400, + code: 'bad_request', + message: 'Bad Request' + }); + } + + const cleanUrl = applicable(relativePath, config.cleanUrls); + const redirect = shouldRedirect(relativePath, config, cleanUrl); + + if (redirect) { + response.writeHead(redirect.statusCode, { + Location: encodeURI(redirect.target) + }); + + response.end(); + return; + } + + let stats = null; + + // It's extremely important that we're doing multiple stat calls. This one + // right here could technically be removed, but then the program + // would be slower. Because for directories, we always want to see if a related file + // exists and then (after that), fetch the directory itself if no + // related file was found. However (for files, of which most have extensions), we should + // always stat right away. + // + // When simulating a file system without directory indexes, calculating whether a + // directory exists requires loading all the file paths and then checking if + // one of them includes the path of the directory. As that's a very + // performance-expensive thing to do, we need to ensure it's not happening if not really necessary. + + if (path.extname(relativePath) !== '') { + try { + stats = await handlers.lstat(absolutePath); + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ENOTDIR') { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + } + } + + const rewrittenPath = applyRewrites(relativePath, config.rewrites); + + if (!stats && (cleanUrl || rewrittenPath)) { + try { + const related = await findRelated(current, relativePath, rewrittenPath, handlers.lstat); + + if (related) { + ({stats, absolutePath} = related); + } + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ENOTDIR') { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + } + } + + if (!stats) { + try { + stats = await handlers.lstat(absolutePath); + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ENOTDIR') { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + } + } + + if (stats && stats.isDirectory()) { + let directory = null; + let singleFile = null; + + try { + const related = await renderDirectory(current, acceptsJSON, handlers, methods, config, { + relativePath, + absolutePath + }); + + if (related.singleFile) { + ({stats, absolutePath, singleFile} = related); + } else { + ({directory} = related); + } + } catch (err) { + if (err.code !== 'ENOENT') { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + } + + if (directory) { + const contentType = acceptsJSON ? 'application/json; charset=utf-8' : 'text/html; charset=utf-8'; + + response.statusCode = 200; + response.setHeader('Content-Type', contentType); + response.end(directory); + + return; + } + + if (!singleFile) { + // The directory listing is disabled, so we want to + // render a 404 error. + stats = null; + } + } + + const isSymLink = stats && stats.isSymbolicLink(); + + // There are two scenarios in which we want to reply with + // a 404 error: Either the path does not exist, or it is a + // symlink while the `symlinks` option is disabled (which it is by default). + if (!stats || (!config.symlinks && isSymLink)) { + // allow for custom 404 handling + return handlers.sendError(absolutePath, response, acceptsJSON, current, handlers, config, { + statusCode: 404, + code: 'not_found', + message: 'The requested path could not be found' + }); + } + + // If we figured out that the target is a symlink, we need to + // resolve the symlink and run a new `stat` call just for the + // target of that symlink. + if (isSymLink) { + absolutePath = await handlers.realpath(absolutePath); + stats = await handlers.lstat(absolutePath); + } + + const streamOpts = {}; + + // TODO ? if-range + if (request.headers.range && stats.size) { + const range = parseRange(stats.size, request.headers.range); + + if (typeof range === 'object' && range.type === 'bytes') { + const {start, end} = range[0]; + + streamOpts.start = start; + streamOpts.end = end; + + response.statusCode = 206; + } else { + response.statusCode = 416; + response.setHeader('Content-Range', `bytes */${stats.size}`); + } + } + + // TODO ? multiple ranges + + let stream = null; + + try { + stream = await handlers.createReadStream(absolutePath, streamOpts); + } catch (err) { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + + const headers = await getHeaders(handlers, config, current, absolutePath, stats); + + // eslint-disable-next-line no-undefined + if (streamOpts.start !== undefined && streamOpts.end !== undefined) { + headers['Content-Range'] = `bytes ${streamOpts.start}-${streamOpts.end}/${stats.size}`; + headers['Content-Length'] = streamOpts.end - streamOpts.start + 1; + } + + // We need to check for `headers.ETag` being truthy first, otherwise it will + // match `undefined` being equal to `undefined`, which is true. + // + // Checking for `undefined` and `null` is also important, because `Range` can be `0`. + // + // eslint-disable-next-line no-eq-null + if (request.headers.range == null && headers.ETag && headers.ETag === request.headers['if-none-match']) { + response.statusCode = 304; + response.end(); + + return; + } + + response.writeHead(response.statusCode || 200, headers); + stream.pipe(response); +}; \ No newline at end of file diff --git a/gulpfile.js b/gulpfile.js new file mode 100644 index 00000000..51d3946c --- /dev/null +++ b/gulpfile.js @@ -0,0 +1,191 @@ +const gulp = require('gulp'); +const request = require('request-promise-native'); +const axios = require('axios'); +const glob = require('glob'); +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); +const url = require('url') +const { TikaClient } = require('./app/TikaClient/build'); +const { Readable, Writable } = require('stream'); + +const relPathToFiles = './public'; +const baseUrl = 'https://no-moss-3-carbo-landfill-library.online'; // URL of the document to download and index +const tikaUrl = 'http://solr.services.cleveland.daball.me:9998'; // URL of the Tika instance +const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance + +// Task to clear out previous Solr data +gulp.task('index:clear', async () => { + await request({ + uri: `${solrUrl}/update?commit=true`, + method: 'POST', + body: { delete: { query: '*:*' } }, // Delete all documents + json: true, + }); +}); + +async function calculateSHA256Hash(filePath) { + return new Promise((resolve, reject) => { + const readStream = fs.createReadStream(filePath); + const hash = crypto.createHash('sha256'); + + readStream.on('data', (chunk) => { + hash.update(chunk); + }); + + readStream.on('end', () => { + const sha256Hash = hash.digest('hex'); + resolve(sha256Hash); + }); + + readStream.on('error', (error) => { + reject(error); + }); + }); +} + +// Function to retrieve metadata of a file from Solr +async function retrieveMetadataFromSolr(url) { + // Retrieve metadata from Solr based on the file URL or unique identifier + // const response = await axios.get(`${solrUrl}/select?q=id:"${encodeURIComponent(url)}"&fl=${encodeURIComponent('sha256sum, content_length')}`, { + // responseType: 'json' + // }); + const fl = encodeURIComponent("sha256sum, content_length"); + const q = encodeURIComponent("id:")+"\""+encodeURIComponent(url)+"\"";//encodeURIComponent(`id:"${url}"`); + const uri = `${solrUrl}/select?q=${q}&fl=${fl}`; + const response = await request({ uri: `${uri}`, json: true }); + return response && response.response && response.response.docs && response.response.docs[0]; +} + +async function indexDocumentInSolr(document) { + try { + // Send document to Solr using the Solr REST API or a Solr client library + // Example code to send document using Axios: + await axios.post(solrUrl + '/update/json/docs', document, { + params: { + commit: true, // Commit changes immediately + }, + }); + } catch (error) { + throw new Error('Error indexing document in Solr: ' + error.message); + } +} + +function extToMime(file_name) { + switch (path.extname(file_name)) { + case '.htm': + case '.html': + return 'text/html'; + case '.pdf': + return 'application/pdf'; + case '.md': + case '.txt': + default: + return 'text/plain'; + } +} + + +// Task to index files into Solr +gulp.task('index:docs', async () => { + let globs = [ + 'Potesta_&_Associates/**/*.{pdf, docx, jpg, png, txt}', + // 'Russell_County_BOS/Documents/**/*.{pdf, docx, jpg, png, txt}', + 'Russell_County_BOS/Meetings/**/*.{pdf, docx, jpg, png, txt}', + 'Russell_County_BOS/Ordinances/**/*.{pdf, docx, jpg, png, txt}', + 'Russell_County_IDA/Meetings/**/*.{pdf, docx, jpg, png, txt}', + 'Russell_County_Tourism/Agenda/**/*.{pdf, docx, jpg, png, txt}', + 'Russell_County_Tourism/Minutes/**/*.{pdf, docx, jpg, png, txt}', + 'United_Mine_Workers_of_America/**/*.{pdf, docx, jpg, png, txt}', + 'Virginia_Energy/**/*.{pdf, docx, jpg, png, txt}', + ]; + // Use glob to match files in the local directories + let files = []; + let cwd = path.resolve(__dirname, relPathToFiles.replaceAll('/', path.sep)); + globs.forEach(async (globPattern) => { + files = files.concat(glob.globSync(globPattern, { + cwd, + matchBase: true, + follow: true, + })); + }); + console.log(`Found ${files.length} files to index using ${globs.length} glob patterns.`); + // Loop through each file and process them + for (let f = 0; f < files.length; f++) { + const file = files[f]; + console.log(`${f+1}/${files.length}: ${file}`); + + const fileFullPath = path.join(cwd, file); + + const url = `https://no-moss-3-carbo-landfill-library.online/${file.replaceAll(path.sep, '/')}`; + console.log('URL: ' + url); + + // Retrieve metadata of the file from Solr (if it exists) + const metadata = await retrieveMetadataFromSolr(url); + + // Calculate file size + const stats = fs.statSync(fileFullPath); + const fileSize = stats.size; + + // Calculate SHA256 checksum + // const checksum = crypto.createHash('sha256').update(fileContents).digest('hex'); + const checksum = await calculateSHA256Hash(fileFullPath); + + // Compare metadata + if (!metadata || parseInt(metadata.content_length[0]) != fileSize || metadata.sha256sum[0] != checksum) { + // Metadata mismatch or file not found in Solr, proceed with indexing + console.log(`Processing text from file using Tika.`); + const client = new TikaClient({ host: tikaUrl }); + const version = await client.getVersion(); + console.info(`Tika Server Version: ${version}`); + + // Create a Readable stream for the file contents + let f = fs.createReadStream(fileFullPath); + + // Create a writable stream to capture the extracted text content into a string + let extractedText = ''; + const writableStream = new Writable({ + write(chunk, encoding, callback) { + extractedText += chunk.toString(); // Append the chunk to the extracted text + callback(); + } + }); + + // Use the TikaClient's pipe method to extract text content + await client.pipe(f, writableStream, 'text/plain', path.basename(file)); + console.log("Extracted Text:", extractedText); + + // Create Solr document + const solrDocument = { + id: url, // Replace with a unique identifier for the document + text: extractedText, // Add the extracted text content + sha256sum: checksum, // Add the checksum + //html: response.data, + url: url, + content_length: fileSize, + content_type: extToMime(url), + // Add additional fields as needed (e.g., title, author, etc.) + }; + + // Send document to Solr for indexing + // Index the file with its text content and metadata + console.log(`Indexing ${url}`); + await indexDocumentInSolr(solrDocument); + + // Continue + console.log(`Done.`); + } else { + // Metadata matches, skip the file + console.log(`Skipping file '${file}' as metadata matches existing metadata in Solr index.`); + } + } +}); + +// Task to optionally run both clearing and indexing +gulp.task('index:reindex', gulp.series('index:clear', 'index:docs')); + +// Default task to run indexing +gulp.task('index', gulp.series('index:docs')); + +// Default task to run indexing +gulp.task('default', gulp.series('index')); diff --git a/package.json b/package.json new file mode 100644 index 00000000..5282619a --- /dev/null +++ b/package.json @@ -0,0 +1,79 @@ +{ + "name": "express", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "transpile:ts": "tsc -project tsconfig.build.json", + "index": "gulp index", + "index:clear": "gulp index:clear", + "index:docs": "gulp index:docs", + "index:reindex": "gulp index:reindex" + }, + "author": "", + "license": "ISC", + "dependencies": { + "axios": "^1.6.7", + "bytes": "3.0.0", + "cheerio": "^1.0.0-rc.12", + "content-disposition": "0.5.2", + "ejs": "^3.1.9", + "express": "^4.18.3", + "fast-url-parser": "1.1.3", + "glob": "^10.3.10", + "gray-matter": "^4.0.3", + "gulp": "^4.0.2", + "gulp-if": "^3.0.0", + "js-yaml": "^4.1.0", + "markdown-it": "^14.0.0", + "markdown-it-attrs": "^4.1.6", + "mime-types": "2.1.18", + "minimatch": "3.1.2", + "moment": "^2.30.1", + "moment-timezone": "^0.5.45", + "multer": "^1.4.5-lts.1", + "needle": "^3.3.1", + "node-fetch": "^2", + "path-is-inside": "1.0.2", + "path-to-regexp": "2.2.1", + "range-parser": "1.2.0", + "request-promise-native": "^1.0.9", + "ssh2-sftp-client": "^10.0.3", + "superagent": "^8.1.2", + "through2": "^4.0.2", + "tika-js": "^1.0.2", + "tslib": "^2.6.2", + "xml2js": "^0.6.2" + }, + "devDependencies": { + "@swc-node/register": "^1.6.8", + "@swc/cli": "^0.1.63", + "@swc/core": "^1.3.104", + "@swc/helpers": "^0.5.3", + "@swc/register": "^0.1.10", + "@types/needle": "^3.3.0", + "@types/node": "^20.11.5", + "@types/node-fetch": "^2.6.11", + "@types/superagent": "^8.1.4", + "@typescript-eslint/eslint-plugin": "^6.19.0", + "@typescript-eslint/parser": "^6.19.0", + "@zeit/eslint-config-node": "0.2.13", + "@zeit/git-hooks": "0.1.4", + "ava": "2.2.0", + "commander": "2.15.1", + "eslint": "^8.56.0", + "eslint-plugin-eslint-plugin": "^5.2.1", + "eslint-plugin-import": "^2.29.1", + "fs-extra": "6.0.1", + "micro": "9.3.2", + "node-fetch": "2.6.1", + "nyc": "14.1.1", + "request": "2.87.0", + "sleep-promise": "6.0.0", + "test-listen": "1.1.0", + "ts-node": "^10.9.2", + "tslib": "^2.6.2", + "typescript": "^5.3.3" + } +} diff --git a/pages/comment-policy.md b/pages/comment-policy.md new file mode 100644 index 00000000..148c6807 --- /dev/null +++ b/pages/comment-policy.md @@ -0,0 +1,97 @@ +--- +title: Comment Policy +docDate: 2024-03-09 +--- + +## Online Comments Section Policy + +No Moss 3 Carbo Landfill Online Library, operated by David Allen Ball ("David A. Ball"), respects +the freedom of speech of its users. I encourage open and respectful discussions among its users. +In order to facilitate open dialogue, this website embeds into its comments section feature a +third-party comment service provider, specifically Disqus. To ensure a positive and constructive +environment, I would like to ask that all users adhere to the following guidelines in this +Comment Policy ("Policy"): + +### Respectful Conduct + +Please be courteous and respectful towards others. Personal attacks, offensive language, hate speech, +harassment, and bullying will not be tolerated. + +### Stay on Topic + +Keep comments relevant to the content of the post or article. Off-topic comments may be removed at +the discretion of the moderators or myself. + +### No Spam or Self-Promotion + +Avoid posting spam, advertisements, or self-promotional content. Comments solely aimed at promoting +products, services, businesses, or websites may be removed at the discretion of the moderators or +myself. + +### Avoid Plagiarism + +Respect copyright laws and intellectual property rights. Do not plagiarize or reproduce content without +proper attribution or permission. + +### Constructive Criticism + +Constructive criticism and feedback are welcome, but please express your opinions in a constructive and +respectful manner. + +### Be Mindful of Privacy + +Do not share personal information or private details about yourself or others in the comments. + +### Moderation + +Comments are subject to moderation. The moderators and myself reserve the right to remove comments that +violate these guidelines or are otherwise deemed inappropriate. + +### Discretion to Disable Comments + +David A. Ball reserves the right to disable or remove the online comment feature at any time, without +prior notice or explanation. This may occur in situations where maintaining the comment section becomes +impractical or undesirable, such as in cases of overwhelming spam, persistent violations of community +guidelines, or technical issues. + +### Storage of Comment Data + +Please be aware that comment data, including text, user information, and any other data provided by users, +is stored and managed by Disqus, the commenting platform used by No Moss 3 Carbo Landfill Online Library. +By participating in the comment section, you acknowledge and agree to +[Disqus' Privacy policy](https://disqus.com/privacy-policy/) and any other terms of service required by +Disqus. Disqus integrates with other login providers and social networks at their own discretion and your +use of these features constitutes your agreement with their policies. + +### Continued Use + +By using the comments section provided by No Moss 3 Carbo Landfill Online Library, users acknowledge and +agree to the terms outlined in this Comment Policy and the [Privacy Policy](/privacy). +Continued use of the comments section feature implies acceptance of these terms and conditions. + +### No Guarantee or Warranty + +No Moss 3 Carbo Landfill Online Library or David A. Ball does not provide any guarantee or warranty +regarding the availability, functionality, or content of the comment feature. Users acknowledge and +accept that the comment feature may be subject to interruptions, errors, or removal without notice. + +### Modification of Policy + +No Moss 3 Carbo Landfill Online Library reserves the right to modify or update this Comment Policy +at any time without prior notice. Users are advised to review this policy periodically for any changes. + +### Effective Date + +This Policy is effective as of March 9, 2024, and applies to all users accessing the search +functionality provided by No Moss 3 Carbo Landfill Online Library. + +### Disclaimer + +The views expressed in the comments are those of the individual commenters and do not necessarily reflect +the views of No Moss 3 Carbo Landfill Online Library, David A. Ball, the Disqus service, or any of our +affiliates. + +By participating in the comment feature, you agree to abide by these guidelines. Failure to comply may +result in the removal of comments and, in severe cases, the suspension of commenting privileges. + +Thank you for contributing to a respectful and engaging community! \ No newline at end of file diff --git a/pages/privacy-policy.md b/pages/privacy-policy.md new file mode 100644 index 00000000..f062ee21 --- /dev/null +++ b/pages/privacy-policy.md @@ -0,0 +1,268 @@ +--- +title: Privacy Policy +docDate: 2024-03-09 +--- + +## Online Privacy Policy Agreement + +No Moss 3 Carbo Landfill Online Library, operated by David Allen Ball ("David A. Ball"), prioritizes +the privacy of its users. This Privacy Policy ("Policy") outlines how I collect and utilize personal +information from visitors to No Moss 3 Carbo Landfill Online Library and users of my online services. +It is designed to provide clarity on what information is collected, how it is used, and the measures +taken to safeguard user privacy. By reading this Policy, you will gain insight into my practices +and my commitment to upholding privacy standards. + +I reserve the right to modify this Policy at any time. To stay informed of any updates, I recommend +checking this page periodically. If, at any point, I decide to utilize personally identifiable +information in a manner significantly different from what was originally stated, any individuals +affected will be promptly notified via email. They will then have the option to consent to or decline +the use of their information in this revised manner. + +This Policy governs any and all data collection and usage by David A. Ball, specifically concerning +the operation of the No Moss 3 Carbo Landfill Online Library online services. By accessing or using +the online library and its services, you acknowledge and agree that certain anonymous data may be +collected to improve the functionality and performance of the website, such as aggregated usage +statistics and search criteria. For more information on the types of data collected and how they +are used, please refer to the sections below. + +This Policy governs any and all data collection and usage by David A. Ball, specifically concerning +the operation of the No Moss 3 Carbo Landfill Online Library online services. By accessing or using +the online library and its services, you acknowledge and agree that certain anonymous data may be +collected to improve the functionality and performance of the website, such as aggregated usage +statistics and search criteria. Additionally, users have the option to voluntarily submit personal +information, such as feedback about the website or resources they wish to add to the collection. +Any voluntary information provided will be handled in accordance with this Policy and used solely +for the stated purposes. For more information on the types of data collected and how they are used, +please refer to the sections below. + +Please be aware that this Policy solely governs the collection and use of information by David A. +Ball. It does not extend to companies or individuals beyond my control or employment. When visiting +websites mentioned or linked to, it's important to review their respective privacy policies before +disclosing any information. I highly recommend reviewing the privacy policies and statements of any +website you visit regularly to comprehend how they collect, use, and share information. + +This Policy provides detailed information on the following aspects: + +1. The types of personally identifiable information collected from you through No Moss 3 Carbo + Landfill Online Library; +2. The purposes for collecting personally identifiable information and the legal basis for such + collection; +3. How the collected information is used and with whom it may be shared; +4. Your available choices regarding the use of your data; and +5. The security measures implemented to safeguard against the misuse of your information. + +### Information I Collect + +It is always up to you whether to disclose personally identifiable information to us. I reserve +the right not to provide you with any services. This website collects two types of information: +personally identifiable information and non-personally identifiable information. The types of +information collected may include, but are not limited to: + +1. Voluntarily provided information which may include personally identifiable information + including, but not limited to, your: + * Name + * Address + * Email address + * Phone + * Social media profiles + * Message subjects + * Message contents + * Message attachments + {.p-10} + +2. Information automatically collected when visiting No Moss 3 Carbo Landfill Online Library, which + may include cookies, third party tracking technologies, specifically Google Analytics, and server + logs, including, but not limited to, your: + * IP address + * Browser type + * Internet Service Provider (ISP) + * Referring/exit pages + * Date/time stamp + * Clickstream data + * Search queries within the website + * Other information related to your visit to the website + +Additionally, David A. Ball may occasionally collect non-personal anonymous demographic information, +such as age, gender, household income, political affiliation, race, and religion, along with details +like the type of browser you're using, your IP address, or the type of operating system. This +information helps in delivering and maintaining high-quality service. + +David A. Ball may also find it necessary, on occasion, to monitor websites that our users visit to +better understand the preferences of our users or the general public regarding popular services and +products. + +Please rest assured that this site will only collect personal information that you knowingly and +willingly provide to me by way of surveys, completed forms, and emails. The intent of this website +is to use personal information solely for the purpose for which it was requested, and any additional +uses will be specifically provided for in this Policy. + +### Why I Collect Information and For How Long + +I collect information to better understand your needs and provide you with the best service possible, +including allowing you to submit content for publication on the website. I reserve the right to publish +or not publish any submissions. Additionally, I may use the information for the following purposes: + +* To improve my website and services. +* To personalize your experience. +* To facilitate the submission and publication of user-uploaded content. +* To analyze and transform content into full-text searchable metadata. +* To send periodic emails regarding updates, news, or other information related to the website. +* To administer contests, promotions, surveys, questionnaires, or other site features. + +The data I collect from you will be stored for no longer than necessary. The length of time I +retain said information will be determined based upon the following criteria: the length of time +your personal information remains relevant; the length of time it is reasonable to keep records +to demonstrate that I have fulfilled my duties and obligations; any limitation periods within +which claims might be made; any retention periods prescribed by law or recommended by regulators, +professional bodies or associations; the type of contract I have with you, the existence of your +consent, and my legitimate interest in keeping such information as stated in this Policy. + +The data I collect from you will be stored for no longer than necessary, and the length of time +it is retained will be determined based on various factors. These factors include the length of +time your personal information remains relevant, the necessity to fulfill duties and obligations, +any applicable limitation periods for claims, retention periods prescribed by law or recommended +by regulators, professional bodies, or associations, the type of contract we have with you, the +existence of your consent, and my legitimate interest in keeping such information as stated in +this Policy. Additionally, analytics data collected through third-party services like Google +Analytics may be retained indefinitely for the purpose of improving No Moss 3 Carbo Landfill +Online Library's performance, enhancing user experience, and analyzing trends over time. Such +data may be subject to the terms and conditions of the third-party service provider and governed +by their respective privacy policies. + +### Use of Information Collected + +David A. Ball does not now, nor will I in the future, sell, rent or lease any of my user lists +and/or names to any third parties. + +David A. Ball may collect and use personal information to assist in the operation of No Moss 3 Carbo +Landfill Online Library and to ensure delivery of the services you need and request. At times, I may +find it necessary to use personally identifiable information to keep you informed of other possible +products and/or services that may be available to you from No Moss 3 Carbo Landfill Online Library. + +David A. Ball may also contact you regarding surveys and/or research questionnaires related to your +opinion of current or potential future services that may be offered. + +Additionally, David A. Ball may need to contact you regarding any submissions you make to the website. + +### Disclosure of Information + +David A. Ball may use or disclose the information provided by you under the following +circumstances: + +* As necessary to provide the services offered on the website. +* In other ways described in this Policy or to which you have otherwise consented. +* In aggregate with other information in such a way so that your identity cannot reasonably + be determined. +* As required by law, or in response to a subpoena or search warrant. +* To outside auditors who have agreed to keep the information confidential. +* As necessary to enforce any applicable terms of service. +* As necessary to maintain, safeguard, and preserve all the rights and property of David A. Ball. + +### Non-Marketing Purposes + +David A. Ball greatly respects your privacy. I maintain and reserve the right to contact you +if needed for non-marketing purposes, such as bug alerts, security breaches, account issues, +and/or changes in David A. Ball products and services. These communications are essential for +ensuring the security, functionality, and quality of my services. In certain circumstances, I +may use this website, newspapers, or other public means to post a notice. + +### Children under the age of 13 + +David A. Ball's website is not directed to, and does not knowingly collect personal identifiable +information from, children under the age of thirteen (13). If it is determined that such +information has been inadvertently collected on anyone under the age of thirteen (13), I shall +immediately take the necessary steps to ensure that such information is deleted from my system's +database, or in the alternative, that verifiable parental consent is obtained for the use and +storage of such information. Anyone under the age of thirteen (13) must seek and obtain parent +or guardian permission to use this website. + +David A. Ball's website is not directed to, and does not knowingly collect personal identifiable +information from, children under the age of thirteen (13). If it is determined that such +information has been inadvertently collected on anyone under the age of thirteen (13), I shall +immediately take the necessary steps to ensure that such information is deleted from my system's +database, or in the alternative, that verifiable parental consent is obtained for the use and +storage of such information. Anyone under the age of thirteen (13) must seek and obtain parent +or guardian permission to use this website. + +If a parent or guardian becomes aware that their child has provided personal information without +their consent, they should contact me immediately to address the issue at the contact information +provided below. + +### Unsubscribe or Opt-Out + +All users and visitors to our No Moss 3 Carbo Landfill Online Library have the option to +discontinue receiving communications from me by way of email or newsletters. To unsubscribe, +please send an email expressing your wish to unsubscribe@daball.me. If you wish to opt-out +from communications from any third-party websites, you must go to that specific website to +unsubscribe or opt-out. David A. Ball will continue to adhere to this Policy with respect to +any personal information previously collected. + +### Links to Other Websites + +No Moss 3 Carbo Landfill Online Library contains links to affiliate and other websites. +David A. Ball does not claim nor accept responsibility for the privacy policies, practices, +and procedures of other such websites. Therefore, I encourage all users and visitors to be +aware when they leave my website to read the privacy statements of each and every website that +collects personally identifiable information. The aforementioned Privacy Policy agreement +applies only and solely to the information collected by No Moss 3 Carbo Landfill Online Library. + +### Storage of Comment Data + +No Moss 3 Carbo Landfill Online Library incorporates by feature a third-party comment service, +specifically Disqus. Please be aware that comment data, including text, user information, +and any other data provided by users, is stored and managed by Disqus, the commenting platform +used by No Moss 3 Carbo Landfill Online Library. By participating in the comment section, you +also acknowledge and agree to [Disqus' Privacy policy](https://disqus.com/privacy-policy/). + +### Security + +David A. Ball shall endeavor and shall take every precaution to maintain adequate physical, +procedural and technical security with respect to its offices and information storage facilities +so as to prevent any loss, misuse, unauthorized access, disclosure or modification of the user's +personal information under my control. + +The company also uses Secure Socket Layer (SSL) for authentication and private communications +in an effort to build users' trust and confidence in the internet and website use by providing +simple and secure access and communication of credit card and personal information. + +### Changes to Privacy Policy Agreement + +David A. Ball reserves the right to update and/or change the terms of this privacy policy, and +as such, I will post any changes to the No Moss 3 Carbo Landfill Online Library Privacy Policy +page, which can be accessed through the website homepage at +[https://no-moss-3-carbo-landfill-library.online](https://no-moss-3-carbo-landfill-library.online/), +so that my users and/or visitors are always aware of the type of information I collect, how +it will be used, and under what circumstances, if any, I may disclose such information. If at +any point in time David A. Ball decides to make use of any personally identifiable information +on file, in a manner vastly different from that which was stated when this information was +initially collected, the user or users shall be promptly notified by email. Users at that time +shall have the option as to whether to permit the use of their information in this separate manner. + +### Effective Date + +This Policy is effective as of March 9, 2024, and applies to all users accessing the search +functionality provided by No Moss 3 Carbo Landfill Online Library. + +### Acceptance of Terms + +Your use of this website constitutes acceptance of the terms and conditions outlined in the +above Privacy Policy agreement. If you do not agree with these terms and conditions, please +refrain from using this website and its services. Continued use of the No Moss 3 Carbo Landfill +Online Library website after any updates or changes to these terms and conditions indicates your +acceptance of such modifications. + +### How to Contact Me + +If you have any questions or concerns regarding the Privacy Policy Agreement related to the +No Moss 3 Carbo Landfill Online Library website, please feel free to contact me at the following: + +
+ + +**David A. Ball**
+[daball.me](https://daball.me)
+50 Ball Dr
+Cleveland, VA 24225-7145
+[david@daball.me](mailto:david@daball.me)
+[(276) 336-1797](tel:+12763361797)
+
\ No newline at end of file diff --git a/pages/search-policy.md b/pages/search-policy.md new file mode 100644 index 00000000..148fcacf --- /dev/null +++ b/pages/search-policy.md @@ -0,0 +1,73 @@ +--- +title: Search Policy +docDate: 2024-03-09 +--- + +## Online Search Policy Agreement + +The search functionality provided by No Moss 3 Carbo Landfill Online Library, operated by David Allen Ball +("David A. Ball"), is intended to enhance user experience by enabling the retrieval of information contained +within documents stored on the website. This Search Policy ("Policy") outlines some of the understood +functions of the search feature as well as some limitations of the service. This Policy may not cover all of +the limitations of the search feature. + +### Optical Character Recognition (OCR) + +Sourced mostly from the public domain, this service utilizes Optical Character Recognition (OCR) technology +to extract text from images embedded in PDF files. This website relies heavily upon Apache Tika, PDFKit, and +Tesseract in order to perform this function. + +While some initial effort is made to ensure the accuracy and completeness of the OCR process, it is important +to note that OCR results may not always be error-free. Inaccuracies may occur due to various factors, including +but not limited to, the quality of the original document, variations in font styles, image resolution, image tilt, +skew, image visibility, color, etc. + +In some cases, I may choose to correct any anomalies discovered during the OCR process and index the changes +based on these corrections. I can not guarantee the completeness of any metadata, even if partially corrected. + +### YouTube Auto-Generated Subtitles + +Relying heavily upon YouTube's automatic subtitles feature, this service utilizes `youtube-dl` in order to +copy YouTube videos along with any subtitles and generated automatic subtitles. + +The publisher of the subtitles or the YouTube generator of the automatic subtitles is responsible for that +content. + +### No Guarantee or Warranty + +No Moss 3 Carbo Landfill Online Library does not provide any guarantee or warranty regarding the accuracy, +reliability, or suitability of the OCR results obtained through the search functionality. Users acknowledge and +accept that the accuracy of OCR-generated text may vary and should exercise discretion when relying on search results. + +### User Responsibility + +Users are solely responsible for verifying the accuracy and relevance of the information retrieved through the +search functionality. It is recommended to cross-reference OCR-generated text with the original source documents +whenever possible. + +### Limitation of Liability + +In no event shall No Moss 3 Carbo Landfill Online Library or its affiliates be liable for any damages, losses, +or liabilities arising from the use of the search functionality or reliance on OCR-generated text, including but +not limited to, indirect, incidental, or consequential damages. + +### Continued Use + +By using the search functionality provided by No Moss 3 Carbo Landfill Online Library, users acknowledge and agree +to the terms outlined in this Search Policy and the [Privacy Policy](/privacy). Continued use of the search service +implies acceptance of these terms and conditions. + +### Feedback and Reporting + +Users are encouraged to provide feedback regarding any inaccuracies or issues encountered during the search process. +To report errors or share feedback, please contact david@daball.me. + +### Modification of Policy + +No Moss 3 Carbo Landfill Online Library reserves the right to modify or update this Search Policy at any time without +prior notice. Users are advised to review this policy periodically for any changes. + +### Effective Date + +This Policy is effective as of March 9, 2024, and applies to all users accessing the search +functionality provided by No Moss 3 Carbo Landfill Online Library. \ No newline at end of file diff --git a/public/README.md b/public/README.md new file mode 100644 index 00000000..dc6c9646 --- /dev/null +++ b/public/README.md @@ -0,0 +1,31 @@ +# Contents + +## [Potesta & Associates](/Potesta_&_Associates) + +## [Russell County Board of Supervisors](/Russell_County_BOS) ++ ### [Meetings](/Russell_County_BOS/Meetings) + + #### [Agenda Packets](/Russell_County_BOS/Meetings/Agenda_Packets) + + #### [Agendas](/Russell_County_BOS/Meetings/Agendas) + + #### [Minutes](/Russell_County_BOS/Meetings/Minutes) ++ ### [Ordinances](/Russell_County_BOS/Ordinances) + + #### [Solid Waste](/Russell_County_BOS/Ordinances/Solid_Waste) ++ ### [YouTube Archive](/Russell_County_BOS/YouTube_Archive) + + #### [@russellcountyvirginia8228](/Russell_County_BOS/YouTube_Archive/@russellcountyvirginia8228) + +## [Russell County Industrial Development Authority](/Russell_County_IDA) ++ ### [Meetings](/Russell_County_IDA/Meetings) + + #### [Agenda Packets](/Russell_County_IDA/Meetings/Agenda_Packets) + +## [Russell County Tourism](/Russell_County_Tourism) ++ ### [Meetings](/Russell_County_Tourism/Meetings) + + #### [Agenda](/Russell_County_Tourism/Meetings/Agenda) + + #### [Minutes](/Russell_County_Tourism/Meetings/Minutes) + +## [United Mine Workers of America](/United_Mine_Workers_of_America) ++ ### [Art](/United_Mine_Workers_of_America/Art) ++ ### [Press Releases](/United_Mine_Workers_of_America/Press_Releases) + +## [Virginia Energy](/Virginia_Energy) ++ ### [Russell County Reclamation, LLC](/Virginia_Energy/Russell_County_Reclamation_LLC) ++ ### [YouTube Archive](/Virginia_Energy/YouTube_Archive) + + #### [@VADMME](/Virginia_Energy/YouTube_Archive/@VADMME) diff --git a/public/Russell_County_BOS/YouTube_Archive/@russellcountyvirginia8228/README.md b/public/Russell_County_BOS/YouTube_Archive/@russellcountyvirginia8228/README.md new file mode 100644 index 00000000..7af04dde --- /dev/null +++ b/public/Russell_County_BOS/YouTube_Archive/@russellcountyvirginia8228/README.md @@ -0,0 +1,6 @@ +# YouTube Video Archives + +These videos are archives of the videos released to the YouTube channel for Russell County Board of Supervisors. + +The original channel for these videos is +[@russellcountyvirginia8228](https://www.youtube.com/@russellcountyvirginia8228). diff --git a/public/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/README.md b/public/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/README.md new file mode 100644 index 00000000..7ba20184 --- /dev/null +++ b/public/Web_Site_Archives/Russell_County_Web_Site-2024-02-13_19_50_Modified_With_OCR_Encoding/README.md @@ -0,0 +1,12 @@ +# About this folder + +The PDF files stored in this folder have been modified from their original archived versions. +They have been ran through the open source Tesseract OCR software in order to encode optically +recognized characters as plain text, which is searchable using other software tools. These files +do not match the binary version as released by Russell County and is provided to the community +here as a resource you can use to index and search them yourself. + +Consider referring back to the original versions of the files if you find something interesting +and want to make sure it hasn't been altered in a way that changes the meaning of the documents. +I can not assure the quality of these OCR encoded PDFs. I have ran them through both ClamAV and +Avast Antivirus. Use them at your own risk. They are here for your convenience. diff --git a/tsconfig.build.json b/tsconfig.build.json new file mode 100644 index 00000000..c8629880 --- /dev/null +++ b/tsconfig.build.json @@ -0,0 +1,4 @@ +{ + "extends": "./tsconfig.json", + "exclude": ["TikaClient/src/test.ts"] + } \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 00000000..1a3a3d19 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "rootDir": "app/TikaClient/src", + "outDir": "app/TikaClient/build", + "module": "commonjs", + "target": "esnext", + "sourceMap": false, + "declaration": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true, + "moduleResolution": "node", + "isolatedModules": false, + "suppressImplicitAnyIndexErrors": false, + "noImplicitAny": true, + "strictNullChecks": true, + "noUnusedLocals": false, + "noUnusedParameters": false, + "allowSyntheticDefaultImports": true, + "importHelpers": true, + "noEmit": false, + "esModuleInterop": true, + "resolveJsonModule": true, + "lib": ["es7", "dom", "ESNext.AsyncIterable"] + }, + "include": ["app/TikaClient/src"] + } \ No newline at end of file diff --git a/views/directory.ejs b/views/directory.ejs new file mode 100644 index 00000000..902e73e7 --- /dev/null +++ b/views/directory.ejs @@ -0,0 +1,61 @@ + + + + <%=h.getDirectoryTitle({directory})%> + <%- include('./includes/common-head.ejs') %> + + + + + <%- include('./includes/top-navbar.ejs') %> + +
+
+

+   + <% paths.forEach(function(value, index) { %> + <% if (h.shouldShowDirectorySeparator({index})) { %> + + <% } %> + <% if (h.shouldShowWelcomeBanner({paths})) { %> + Welcome to <%= h.getDirectoryTitle({directory}) %> + <% } else if (h.shouldOmitLinkOnLastBreadcrumb({paths, index})) { %> + <%= h.trimSlashes({path: value.name}) %> + <% } else { %> + + <%= h.getDirectoryTitle({directory: value.name}) %> + + <% } %> + <% }); %> +

+
+ + <% if (h.directoryContainsReadme({directory})) {%> +
+
+ <%- h.printReadme({directory}) %> +
+
+ <% } %> + +
    + <% files.forEach(function(value, index) { %> +
  • + +
  • + <% }); %> +
+
+ + <%- include('./includes/bottom-navbar.ejs') %> + + + + + + + + + \ No newline at end of file diff --git a/views/error.ejs b/views/error.ejs new file mode 100644 index 00000000..6fe37907 --- /dev/null +++ b/views/error.ejs @@ -0,0 +1,122 @@ + + + + + + + + + +
+
+ <%= statusCode %> +

<%= message %>

+
+
+ + + diff --git a/views/helpers/functions.js b/views/helpers/functions.js new file mode 100644 index 00000000..0cf88fa2 --- /dev/null +++ b/views/helpers/functions.js @@ -0,0 +1,72 @@ +const path = require('path'); +const fs = require('fs'); +const process = require('process'); +const markdownit = require('markdown-it'); +var markdownItAttrs = require('markdown-it-attrs'); +const md = markdownit({ + html: true, + linkify: true, + typographer: true, +}).use( + markdownItAttrs, { + // optional, these are default options + leftDelimiter: '{', + rightDelimiter: '}', + allowedAttributes: [] // empty array = all attributes are allowed + } +); +const moment = require('moment-timezone').tz.setDefault("UTC"); + +const getSiteName = () => { + return 'No Moss 3 Carbo Landfill Online Library'; +} + +const trimSlashes = ({path}) => { + return path.replace(/^[\/\\]|[\/\\]$/g, ''); +}; +const getDirectoryTitle = ({directory}) => { + directory = trimSlashes({path: directory}); + let title = trimSlashes({path: directory.replace("public", "")}).replaceAll(path.sep, path.posix.sep); + return (directory=="public") ? getSiteName() : `${title} Listing - ${getSiteName()}`; +}; +const getWelcomeBanner = ({directory}) => { + return trimSlashes({path: directory.replace("public", `Welcome to ${getSiteName()}`)}); +}; +const shouldShowDirectorySeparator = ({index}) => (index > 0); +const shouldShowWelcomeBanner = ({paths}) => (paths.length == 1); +const shouldOmitLinkOnLastBreadcrumb = ({paths, index}) => (index == paths.length-1); + +const resolveReadmeFile = ({directory}) => { + const resolveFile = (file) => { + const pathToFile = path.join(process.cwd(), "..", directory, file) + return fs.existsSync(pathToFile) ? pathToFile : ""; + }; + return ( + resolveFile("README.md") || + resolveFile("README.txt") || + resolveFile("README") || + resolveFile("README.html") || + "" + ); +}; +const directoryContainsReadme = ({directory}) => resolveReadmeFile({directory}); +const printMarkdownFile = ({file}) => { + +}; +const printReadme = ({directory}) => { + return md.render(fs.readFileSync(resolveReadmeFile({directory})).toString()); +}; + +module.exports = { + trimSlashes, + getSiteName, + getDirectoryTitle, + getWelcomeBanner, + shouldShowDirectorySeparator, + shouldShowWelcomeBanner, + shouldOmitLinkOnLastBreadcrumb, + directoryContainsReadme, + printReadme, + md, + moment, +}; \ No newline at end of file diff --git a/views/includes/bottom-navbar.ejs b/views/includes/bottom-navbar.ejs new file mode 100644 index 00000000..76429c02 --- /dev/null +++ b/views/includes/bottom-navbar.ejs @@ -0,0 +1,12 @@ + diff --git a/views/includes/common-head.ejs b/views/includes/common-head.ejs new file mode 100644 index 00000000..b3cc194f --- /dev/null +++ b/views/includes/common-head.ejs @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/views/includes/top-navbar.ejs b/views/includes/top-navbar.ejs new file mode 100644 index 00000000..450eab85 --- /dev/null +++ b/views/includes/top-navbar.ejs @@ -0,0 +1,17 @@ + diff --git a/views/page.ejs b/views/page.ejs new file mode 100644 index 00000000..6b29efe7 --- /dev/null +++ b/views/page.ejs @@ -0,0 +1,65 @@ + + + + <%= (typeof fm.title !== 'undefined') ? `${fm.title} - ${h.getSiteName()}` : h.getSiteName() %> + <%- include('./includes/common-head.ejs') %> + + + + + <%- include('./includes/top-navbar.ejs') %> + +
+
+

+   + <% paths.forEach(function(value, index) { %> + <% if (h.shouldShowDirectorySeparator({index})) { %> + + <% } %> + <% if (h.shouldOmitLinkOnLastBreadcrumb({paths, index})) { %> + <%= (typeof fm.title !== 'undefined') ? `${fm.title}` : value %> + <% } else { %> + + <%= (value == 'public' ? h.getSiteName() : value) %> + + <% } %> + <% }) %> +

+
+ + <% if (typeof content !== 'undefined') {%> +
+
+

<%= (typeof fm.title !== 'undefined') ? fm.title : require('path').basename(filePath) %>

+

+ + <%if (typeof fm.docDate !== 'undefined') { %>Document Date: <%= h.moment(fm.docDate).format('MMMM D, YYYY') %><% } %> + +

+ <%- content %> +
+
+ <% } %> + +
    + <% if (typeof files !== 'undefined') files.forEach(function(value, index) { %> +
  • + +
  • + <% }) %> +
+
+ + <%- include('./includes/bottom-navbar.ejs') %> + + + + + + + + + \ No newline at end of file diff --git a/views/search-error.ejs b/views/search-error.ejs new file mode 100644 index 00000000..4036815c --- /dev/null +++ b/views/search-error.ejs @@ -0,0 +1,42 @@ + + + + Search Error for <%- query %> - <%- h.getSiteName() %> + <%- include('./includes/common-head.ejs') %> + + + <%- include('./includes/top-navbar.ejs') %> +
+
+

+   + No Moss 3 Carbo Landfill Online Library + + Search Error<% if ((typeof query != undefined) && query != '') { %> for <%- query %><% } %> +

+
+
+

+ Disclaimer: Use of the search feature is subject to both the Search + Policy and the Privacy Policy. +

+
+
+ <% if (typeof error != undefined) {%> +

An error occurred while attempting to perform a search.

+ <% if (typeof query != undefined) {%>

Search Query: <%= query %>

<% } %> + <% if (typeof error.code != undefined) {%>

Error Code: <%= error.code %>

<% } %> + <% if (typeof error.message != undefined) {%>

Error Message: <%= error.message %>

<% } %> + <% } %> +
+ + <%- include('./includes/bottom-navbar.ejs') %> + + + + + + + + + diff --git a/views/search-results.ejs b/views/search-results.ejs new file mode 100644 index 00000000..f8005298 --- /dev/null +++ b/views/search-results.ejs @@ -0,0 +1,78 @@ + + + + Search Results for <%- query %> - <%- h.getSiteName() %> + <%- include('./includes/common-head.ejs') %> + + + <%- include('./includes/top-navbar.ejs') %> +
+
+

+   + No Moss 3 Carbo Landfill Online Library + + Search Results for <%- query %> +

+
+
+

+ Disclaimer: Use of the search feature is subject to both the Search + Policy and the Privacy Policy. +

+
+
+ + <% if (searchResults.length === 0) { %> +

No documents found matching the search query.

+ <% } else { %> +
    + <% searchResults.forEach(result => { %> +
  • +
    <%= result.title %>
    + <% if (highlightedSnippets[result.id] && highlightedSnippets[result.id].text) { %> + <% highlightedSnippets[result.id].text.forEach(snippet => { %> +

    <%- snippet %>

    + <% }); %> + <% } else { %> +

    No snippet available

    + <% } %> + <%= result.url %> +
  • + <% }); %> +
+ <% } %> +
+ + + +
+ + <%- include('./includes/bottom-navbar.ejs') %> + + + + + + + + + diff --git a/web.config b/web.config new file mode 100644 index 00000000..0860aaa7 --- /dev/null +++ b/web.config @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file