Compare commits

...

5 Commits

22 changed files with 45654 additions and 0 deletions

115
index/build-meetings-dir.js Normal file
View File

@ -0,0 +1,115 @@
const path = require('path');
let meetingDocs = require('../bos-tourism-meetings.json');
let meetings = [];
let meetingsFiles = [];
const yaml = require('js-yaml');
const cheerio = require('cheerio');
function convertTimeStr(hh_mm_ampm) {
if (!hh_mm_ampm) return "";
let space_parts = hh_mm_ampm.split(" ");
let time_parts = space_parts[0].split(":");
time_parts = time_parts.map((time_part) => {
return parseInt(time_part);
})
if (space_parts[1] == "PM") time_parts[0] += 12;
return (time_parts[0]<10?"0":"") + time_parts[0] + "_" + (time_parts[1]<10?"0":"") + time_parts[1];
}
function getAgencyLongName(agency) {
switch (agency) {
case "Russell_BOS": return "Russell County Board of Supervisors";
case "Russell_Tourism": return "Russell County Tourism Advisory Committee";
}
}
function getKeyName(doc) {
if (doc.Agency == "BOS") doc.Agency = "Russell_BOS";
else if (doc.Agency == "Tourism") doc.Agency = "Russell_Tourism";
return doc["MeetingDate"] + "_" + convertTimeStr(doc["MeetingTime"])+ "-" + doc["Agency"] + "-" + doc["MeetingName"].replaceAll(" ", "_");
}
function getMeetingFilePath(keyName) {
return path.join("..", "astro", "src", "content", "meetings", keyName + ".md");
}
// create meetings dictionary
meetingDocs.forEach(doc => {
let keyName = getKeyName(doc);
let fileName = getMeetingFilePath(keyName);
if (meetingsFiles.indexOf(keyName) == -1) {
meetingsFiles.push({
fileName,
mdContent: {
frontMatter: {
title: getAgencyLongName(doc.Agency) + " - " + doc.MeetingName + " - " + doc.MeetingDate + " @ " + doc.MeetingTime + "",
meetingDate: doc.MeetingDate,
attachments: [],
},
content: "",
}
});
}
});
// go back through and add all of the documents to the meetings in the files
meetingsFiles = meetingsFiles.map(file => {
meetingDocs.forEach(doc => {
let keyName = getKeyName(doc);
let fileName = getMeetingFilePath(keyName);
if (fileName == file.fileName) {
file.mdContent.frontMatter.attachments.push({
title: doc.DocumentCategory,
relativePath: doc.Host + doc.AgendaPath + doc.AgendaFile.replace('\'', ''),
});
}
});
return file;
});
const trimSlashes = (str) => str.replace(/^\/|\/$/g, '');
function getFileExt(file) {
let ext = path.extname(file.replaceAll('\'', ''));
if (!ext) {
if (file.endsWith("RC Tourism Committee Meeting Minutes - July 18 2017 Regular")) {
return ".pdf";
}
if (file.endsWith('_05222023-162') || file.endsWith('_03062023-33') || file.endsWith('_03062023-157')) {
return ".docx";
}
}
return ext;
}
console.log("@echo off");
meetingDocs.forEach(doc => {
let keyName = getKeyName(doc);
let fileName = getMeetingFilePath(keyName);
console.log(
"copy " +
" \"" +
path.join(
"..",
"..",
"Web_Site_Archives",
"Russell_County_Web_Site-latest",
"russellcountyva.us",
doc.Host,
trimSlashes(doc.AgendaPath).replaceAll("/", path.sep),
doc.AgendaFile.replaceAll('\'', ''),
) +
"\" \"" +
getKeyName(doc) + "-" + doc.DocumentCategory.replaceAll(" ", "_") +
getFileExt(doc.AgendaFile) +
"\" " +
" >> copy-files.log"
);
});
// meetingsFiles.forEach(file => {
// //console.log("cp " file.fileName);
// // console.log("---\n"+yaml.dump(file.mdContent.frontMatter)+"\n---");
// });

View File

@ -0,0 +1,144 @@
const axios = require('axios');
const fs = require('fs');
const cheerio = require('cheerio');
// Read the list of files from the text file
const fileLines = fs.readFileSync('file_list.txt', 'utf8').split('\n');
// Filter the list to include only files with certain file extensions
const allowedExtensions = ['.txt', '.html', '.htm', '.md', '.pdf']; // Add more extensions as needed
const filesToIndex = fileLines.filter(line => {
const extension = line.substring(line.lastIndexOf('.')).toLowerCase();
return allowedExtensions.includes(extension);
});
async function downloadFile(url, filePath) {
const writer = fs.createWriteStream(filePath);
const response = await axios({
url,
method: 'GET',
responseType: 'stream'
});
response.data.pipe(writer);
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
}
async function extractAndIndexWithTika(url, solrUrl) {
try {
const tempFilePath = 'tempfile';
console.log(`Downloading ${url}`);
await downloadFile(url, tempFilePath);
console.log(`Downloaded ${url}.`);
// Read file contents
const fileData = fs.readFileSync(tempFilePath);
// Make request to Tika
const response = await axios.put(tikaUrl, fileData, {
headers: {
// 'Content-Type': 'application/octet-stream',
'Content-Type': 'application/pdf',
'X-Tika-Output-Format': 'solr',
'X-Tika-SolrUrl': solrUrl
}
});
console.log('Tika response:', response.data);
// Parse XML response from Tika
const textContent = sanitizeIndexData(extractTextFromHtml(response.data));
// Create Solr document
const solrDocument = {
id: documentUrl, // Replace with a unique identifier for the document
text: textContent, // Add the extracted text content
html: response.data,
url: url,
content_length: textContent.length,
content_type: "application/pdf",
// Add additional fields as needed (e.g., title, author, etc.)
};
// Send document to Solr for indexing
await indexDocumentInSolr(solrDocument);
console.log('Document indexed successfully:', solrDocument.id);
} catch (error) {
console.error('Error extracting text with Tika:', error.message);
}
}
function extractTextFromHtml(html) {
// Parse HTML using Cheerio
const $ = cheerio.load(html);
// Extract text content from HTML
const textContent = $('body').text().trim();
return textContent;
}
async function indexDocumentInSolr(document) {
try {
// Send document to Solr using the Solr REST API or a Solr client library
// Example code to send document using Axios:
await axios.post(solrUrl + '/update/json/docs', document, {
params: {
commit: true, // Commit changes immediately
},
});
} catch (error) {
throw new Error('Error indexing document in Solr: ' + error.message);
}
}
function sanitizeIndexData(data) {
// Convert all whitespace characters to spaces
let sanitizedData = data.replace(/\s+/g, ' ');
// Remove double whitespace recursively
while (sanitizedData !== (sanitizedData = sanitizedData.replace(/ /g, ' '))) {}
return sanitizedData.trim(); // Trim leading and trailing spaces
}
async function clearSolrIndex() {
try {
// Send delete query to Solr to delete all documents
const response = await axios.post(solrUrl + '/update', {
delete: {
query: '*:*'
},
commit: {}
}, {
headers: {
'Content-Type': 'application/json'
}
});
console.log('Deleted ' + response.data.responseHeader.status + ' documents');
} catch (error) {
console.error('Error clearing Solr index:', error.message);
}
}
// Example usage
const tikaUrl = 'http://solr.services.cleveland.daball.me:9998/tika'; // URL of the Tika instance
const baseUrl = 'https://russell-county-archives.daball.me'; // URL of the document to download and index
const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
// Call the function to clear the Solr index
clearSolrIndex();
for (let l = 0; l < filesToIndex.length; l++) {
let line = filesToIndex[l];
let documentUrl = baseUrl + line;
extractAndIndexWithTika(documentUrl, solrUrl);
}

22431
index/file_list.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,288 @@
const gulp = require('gulp');
//const shell = require('gulp-shell');
const axios = require('axios');
const fs = require('fs');
const os = require ("os");
const path = require('path');
const crypto = require('crypto');
const cheerio = require('cheerio');
const SftpClient = require('ssh2-sftp-client');
const { TikaClient } = require('tika-js');
const baseUrl = 'https://russell-county-archives.daball.me'; // URL of the document to download and index
const sftpBasePath = 'david@caddy.services.cleveland.daball.me:/srv/www/russell-county-archives.daball.me/archives'; // SSH path
const tikaUrl = 'http://solr.services.cleveland.daball.me:9998'; // URL of the Tika instance
const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
// Read the list of files from the text file
const fileLines = fs.readFileSync('file_list.txt', 'utf8').split('\n');
// Filter the list to include only files with certain file extensions
const allowedExtensions = ['.txt', '.html', '.htm', '.md', '.pdf'];
const filesToIndex = fileLines.filter(line => {
const extension = line.substring(line.lastIndexOf('.')).toLowerCase();
return allowedExtensions.includes(extension);
});
function extToMime(file_name) {
switch (path.extname(file_name)) {
case '.htm':
case '.html':
return 'text/html';
case '.pdf':
return 'application/pdf';
case '.md':
case '.txt':
default:
return 'text/plain';
}
}
const readSshRsaKey = (keyFilePath) => {
try {
// Read the contents of the SSH RSA key file
const key = fs.readFileSync(keyFilePath, 'utf8');
return key.trim(); // Trim whitespace from the key
} catch (error) {
console.error('Error reading SSH RSA key:', error);
return null;
}
};
// Example usage
const defaultKeyFilePath = path.join(os.homedir(), ".ssh", "id_rsa");
const sshRsaKey = readSshRsaKey(defaultKeyFilePath);
// const downloadFileWithRsync = (sourceUrl, destinationPath) => {
// return new Promise((resolve, reject) => {
// const rsyncCommand = `rsync -av --progress "${sourceUrl}" "${destinationPath}"`;
// exec(rsyncCommand, (error, stdout, stderr) => {
// if (error) {
// console.error('Error downloading file with rsync:', stderr);
// reject(error);
// } else {
// console.log('File downloaded successfully:', stdout);
// resolve();
// }
// });
// });
// };
const parseSftpUrl = (sftpUrl) => {
const regex = /^(?<username>[^@]+)@(?<host>[^:]+):(?<path>.+)$/;
const match = sftpUrl.match(regex);
if (match) {
return {
username: match.groups.username,
host: match.groups.host,
path: match.groups.path
};
} else {
throw new Error('Invalid SFTP URL format');
}
};
const downloadFileWithSftp = async (remotePath, localPath, options = {}) => {
const sftp = new SftpClient();
try {
// Connect to the SFTP server
const sftpUrlParts = parseSftpUrl(remotePath);
await sftp.connect({
host: sftpUrlParts.host,
username: sftpUrlParts.username,
privateKey: sshRsaKey,
});
// Download the file
const writer = fs.createWriteStream(localPath, { start: 0 });
sftp.get(sftpUrlParts.path, writer);
//console.log('File downloaded successfully');
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
} catch (error) {
console.error('Error downloading file:', error);
} finally {
// Disconnect from the SFTP server
await sftp.end();
}
};
async function downloadFile(url, filePath) {
// if (url.startsWith(baseUrl)) {
// //downloadFileWithRsync(url.replace(baseUrl, sftpBasePath), filePath);
// const sftpUrl = url.replace(baseUrl, sftpBasePath);
// console.log(`Downloading: ${sftpUrl}`);
// downloadFileWithSftp(sftpUrl, filePath);
// console.log(`Download complete: ${sftpUrl} => ${filePath}`);
// }
// else {
console.log(`Downloading: ${url}`);
const writer = fs.createWriteStream(filePath, { start: 0 });
const response = await axios({
url,
method: 'GET',
responseType: 'stream'
});
response.data.pipe(writer);
console.log(`Download complete: ${url} => ${filePath}`);
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
// }
}
async function getSolrIndexedFileChecksum(url) {
}
async function extractAndIndexWithTika(url, solrUrl) {
try {
const tempFilePath = 'tempfile';
await downloadFile(url, tempFilePath);
const fileContent = fs.readFileSync(tempFilePath);
const checksum = crypto.createHash('sha256').update(fileContent).digest('hex');
// Query Solr to check if the file is already indexed
const solrChecksumResponse = await axios.get(`${solrUrl}/select?q=id:"${url}"&fl=sha256sum`);
const solrChecksum = solrChecksumResponse.data.response.docs[0]?.sha256sum;
if (solrChecksum && solrChecksum === checksum) {
console.log(`File ${filePath} hasn't changed. Skipping.`);
return;
}
// Make request to Tika
console.log(`Processing ${url}`);
// const response = await axios.post(tikaUrl + '/form', formData, {
// headers: {
// // 'Content-Type': 'application/octet-stream',
// //'Content-Type': extToMime(url),
// //'Content-Length': fs.
// 'Content-Type': 'multipart/form-data',
// 'X-Tika-Output-Format': 'solr',
// //'X-Tika-SolrUrl': solrUrl
// },
// timeout: 40000000
// });
const client = new TikaClient({ host: tikaUrl });
const version = await client.getVersion()
console.info(`version: ${version}`)
console.info(extToMime(url), await client.getContent(fs.createReadStream(tempFilePath), extToMime(url), path.basename(url)));
await client.pipe(fs.createReadStream(tempFilePath), fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
//console.log('Tika response:', fs.readFileSync('output.txt'));
const fileData = fs.readFileSync('output.txt');
//const contentLength = await fs.stat(tempFilePath).size;
// Parse XML response from Tika
const textContent = sanitizeIndexData(extractTextFromHtml(fileData));
// Create Solr document
const solrDocument = {
id: url, // Replace with a unique identifier for the document
text: textContent, // Add the extracted text content
sha256sum: checksum, // Add the checksum
//html: response.data,
url: url,
//content_length: contentLength,
content_type: extToMime(url),
// Add additional fields as needed (e.g., title, author, etc.)
};
// Send document to Solr for indexing
console.log(`Indexing ${url}`);
await indexDocumentInSolr(solrDocument);
console.log('Document indexed successfully:', solrDocument.id);
} catch (error) {
console.error('Error extracting text with Tika:', error.message);
}
}
function extractTextFromHtml(html) {
// Parse HTML using Cheerio
const $ = cheerio.load(html);
// Extract text content from HTML
const textContent = $('body').text().trim();
return textContent;
}
async function indexDocumentInSolr(document) {
try {
// Send document to Solr using the Solr REST API or a Solr client library
// Example code to send document using Axios:
await axios.post(solrUrl + '/update/json/docs', document, {
params: {
commit: true, // Commit changes immediately
},
});
} catch (error) {
throw new Error('Error indexing document in Solr: ' + error.message);
}
}
function sanitizeIndexData(data) {
// Convert all whitespace characters to spaces
let sanitizedData = data.replace(/\s+/g, ' ');
// Remove double whitespace recursively
while (sanitizedData !== (sanitizedData = sanitizedData.replace(/ /g, ' '))) {}
return sanitizedData.trim(); // Trim leading and trailing spaces
}
async function clearSolrIndex() {
try {
// Send delete query to Solr to delete all documents
const response = await axios.post(solrUrl + '/update', {
delete: {
query: '*:*'
},
commit: {}
}, {
headers: {
'Content-Type': 'application/json'
}
});
console.log('Deleted ' + response.data.responseHeader.status + ' documents');
} catch (error) {
console.error('Error clearing Solr index:', error.message);
}
}
// Clears SOLR search index
const index_clear = async function() {
await clearSolrIndex();
};
index_clear.displayName = 'index:clear';
gulp.task(index_clear);
// Reindexes SOLR search index with plaintext results from Tika
const index_index = async function() {
for (let l = 0; l < filesToIndex.length; l++) {
let line = filesToIndex[l];
let documentUrl = baseUrl + line;
await extractAndIndexWithTika(documentUrl, solrUrl);
}
};
index_index.displayName = 'index:index';
gulp.task(index_index);
// Define index tasks
gulp.task('index:reindex', gulp.series('index:clear', 'index:index'));
gulp.task('index', gulp.series('index:index'));
// Define a default task (optional)
gulp.task('default', gulp.series('index'));

194
index/gulpfile.js Normal file
View File

@ -0,0 +1,194 @@
const gulp = require('gulp');
const gulpif = require('gulp-if');
const through2 = require('through2');
//const shell = require('gulp-shell');
const axios = require('axios');
const fs = require('fs');
const os = require ("os");
const path = require('path');
const crypto = require('crypto');
const cheerio = require('cheerio');
const { TikaClient } = require('tika-js');
const baseUrl = 'https://no-moss-3-carbo-landfill-library.online'; // URL of the document to download and index
const tikaUrl = 'http://solr.services.cleveland.daball.me:9998'; // URL of the Tika instance
const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
function extToMime(file_name) {
switch (path.extname(file_name)) {
case '.htm':
case '.html':
return 'text/html';
case '.pdf':
return 'application/pdf';
case '.md':
case '.txt':
default:
return 'text/plain';
}
}
function isFileExt(extname) {
const fileExtnameMatchesExtname = function(file) {
return file.extname == extname;
};
return file_extname_matches_extname;
}
async function tikaReadPdf(file) {
const client = new TikaClient({ host: tikaUrl });
const file_reader = fs.createReadStream(file);
await client.pipe(file_reader, fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
}
function index() {
src([
"public/Russell_County_IDA/**.pdf",
"public/Russell_County_IDA/**.pdf",
//['.txt', '.html', '.htm', '.md', '.pdf']
])
.pipe(gulpif(isFileExt(".pdf"), ))
}
async function extractAndIndexWithTika(url, solrUrl) {
try {
const tempFilePath = 'tempfile';
await downloadFile(url, tempFilePath);
const fileContent = fs.readFileSync(tempFilePath);
const checksum = crypto.createHash('sha256').update(fileContent).digest('hex');
// Query Solr to check if the file is already indexed
const solrChecksumResponse = await axios.get(`${solrUrl}/select?q=id:"${url}"&fl=sha256sum`);
const solrChecksum = solrChecksumResponse.data.response.docs[0]?.sha256sum;
if (solrChecksum && solrChecksum === checksum) {
console.log(`File ${filePath} hasn't changed. Skipping.`);
return;
}
// Make request to Tika
console.log(`Processing ${url}`);
// const response = await axios.post(tikaUrl + '/form', formData, {
// headers: {
// // 'Content-Type': 'application/octet-stream',
// //'Content-Type': extToMime(url),
// //'Content-Length': fs.
// 'Content-Type': 'multipart/form-data',
// 'X-Tika-Output-Format': 'solr',
// //'X-Tika-SolrUrl': solrUrl
// },
// timeout: 40000000
// });
const client = new TikaClient({ host: tikaUrl });
const version = await client.getVersion()
console.info(`version: ${version}`)
console.info(extToMime(url), await client.getContent(fs.createReadStream(tempFilePath), extToMime(url), path.basename(url)));
await client.pipe(fs.createReadStream(tempFilePath), fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
//console.log('Tika response:', fs.readFileSync('output.txt'));
const fileData = fs.readFileSync('output.txt');
//const contentLength = await fs.stat(tempFilePath).size;
// Parse XML response from Tika
const textContent = sanitizeIndexData(extractTextFromHtml(fileData));
// Create Solr document
const solrDocument = {
id: url, // Replace with a unique identifier for the document
text: textContent, // Add the extracted text content
sha256sum: checksum, // Add the checksum
//html: response.data,
url: url,
//content_length: contentLength,
content_type: extToMime(url),
// Add additional fields as needed (e.g., title, author, etc.)
};
// Send document to Solr for indexing
console.log(`Indexing ${url}`);
await indexDocumentInSolr(solrDocument);
console.log('Document indexed successfully:', solrDocument.id);
} catch (error) {
console.error('Error extracting text with Tika:', error.message);
}
}
function extractTextFromHtml(html) {
// Parse HTML using Cheerio
const $ = cheerio.load(html);
// Extract text content from HTML
const textContent = $('body').text().trim();
return textContent;
}
async function indexDocumentInSolr(document) {
try {
// Send document to Solr using the Solr REST API or a Solr client library
// Example code to send document using Axios:
await axios.post(solrUrl + '/update/json/docs', document, {
params: {
commit: true, // Commit changes immediately
},
});
} catch (error) {
throw new Error('Error indexing document in Solr: ' + error.message);
}
}
function sanitizeIndexData(data) {
// Convert all whitespace characters to spaces
let sanitizedData = data.replace(/\s+/g, ' ');
// Remove double whitespace recursively
while (sanitizedData !== (sanitizedData = sanitizedData.replace(/ /g, ' '))) {}
return sanitizedData.trim(); // Trim leading and trailing spaces
}
async function clearSolrIndex() {
try {
// Send delete query to Solr to delete all documents
const response = await axios.post(solrUrl + '/update', {
delete: {
query: '*:*'
},
commit: {}
}, {
headers: {
'Content-Type': 'application/json'
}
});
console.log('Deleted ' + response.data.responseHeader.status + ' documents');
} catch (error) {
console.error('Error clearing Solr index:', error.message);
}
}
// Clears SOLR search index
const index_clear = async function() {
await clearSolrIndex();
};
index_clear.displayName = 'index:clear';
gulp.task(index_clear);
// Reindexes SOLR search index with plaintext results from Tika
const index_index = async function() {
for (let l = 0; l < filesToIndex.length; l++) {
let line = filesToIndex[l];
let documentUrl = baseUrl + line;
await extractAndIndexWithTika(documentUrl, solrUrl);
}
};
index_index.displayName = 'index:index';
gulp.task(index_index);
// Define index tasks
gulp.task('index:reindex', gulp.series('index:clear', 'index:index'));
gulp.task('index', gulp.series('index:index'));
// Define a default task (optional)
gulp.task('default', gulp.series('index'));

0
index/output.txt Normal file
View File

5128
index/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

29
index/package.json Normal file
View File

@ -0,0 +1,29 @@
{
"name": "example_tika_solr_index",
"version": "1.0.0",
"description": "",
"main": "example_tika_upload.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"index": "gulp index",
"reindex": "gulp index:reindex"
},
"author": "",
"license": "ISC",
"dependencies": {
"axios": "^1.6.7",
"cheerio": "^1.0.0-rc.12",
"ejs": "^3.1.9",
"express": "^4.18.3",
"gulp-if": "^3.0.0",
"js-yaml": "^4.1.0",
"ssh2-sftp-client": "^10.0.3",
"through2": "^4.0.2",
"tika-js": "^1.0.2",
"tslib": "^2.6.2",
"xml2js": "^0.6.2"
},
"devDependencies": {
"gulp": "^4.0.2"
}
}

81
index/search_solr.js Normal file
View File

@ -0,0 +1,81 @@
const express = require('express');
const axios = require('axios');
const app = express();
const path = require('path');
// Set EJS as the view engine
app.set('view engine', 'ejs');
// Specify the views directory
app.set('views', path.join(__dirname, 'views'));
// Middleware to parse JSON request body
app.use(express.json());
// Serve static files (CSS, JavaScript, images, etc.)
app.use(express.static('public'));
// Search endpoint
app.get('/search', async (req, res) => {
try {
// Extract search query from request query parameters
const { query, page = 1, pageSize = 10 } = req.query;
// Validate search query
if (!query) {
return res.status(400).json({ error: 'Query parameter is required' });
}
// Calculate start offset for pagination
const start = (page - 1) * pageSize;
// Sanitize search query to prevent code injection
const sanitizedQuery = sanitizeQuery(query);
// Send search query to Solr
const response = await axios.get(solrUrl + '/select', {
params: {
q: `text:${sanitizedQuery}`, // Query string with field name
hl: 'true',
'hl.method': 'unified',
'hl.fl': '*',
'hl.snippets': 5,
'hl.tag.pre': '<b class=\"result-highlight\">',
'hl.tag.post': '</b>',
start, // Start offset for pagination
rows: 10, // Number of rows to return
wt: 'json', // Response format (JSON)
},
});
// Extract search results from Solr response
const searchResults = response.data.response.docs;
const highlightedSnippets = response.data.highlighting;
// Calculate total number of results (needed for pagination)
const totalResults = response.data.response.numFound;
// Calculate total number of pages
const totalPages = Math.ceil(totalResults / pageSize);
// Send search results as JSON response
//res.json({ searchResults, highlightedSnippets });
res.render('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages });
} catch (error) {
console.error('Error searching Solr:', error.message);
res.status(500).json({ error: 'Internal server error' });
}
});
// Function to sanitize search query to prevent code injection
function sanitizeQuery(query) {
// Remove any characters that are not alphanumeric or whitespace
return query.replace(/[^\w\s"]/gi, '');
}
// Start server
const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server is running on port ${PORT}`);
});

View File

@ -0,0 +1,88 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Search Results</title>
<!-- Bootstrap CSS -->
<link href="https://daball.me/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="https://fonts.googleapis.com/css?family=Saira+Extra+Condensed:100,200,300,400,500,600,700,800,900" rel="stylesheet">
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i,800,800i" rel="stylesheet">
<link href="https://daball.me/vendor/font-awesome/css/font-awesome.min.css" rel="stylesheet">
<link href="https://daball.me/vendor/devicons/css/devicons.min.css" rel="stylesheet">
<link href="https://daball.me/vendor/devicon/devicon.min.css" rel="stylesheet">
<link href="https://daball.me/vendor/simple-line-icons/css/simple-line-icons.css" rel="stylesheet">
<link href="https://daball.me/layouts/blog/css/blog.min.css" rel="stylesheet">
<style type="text/css"><!--
.result-highlight { background-color: #FBF719; font-weight: normal; }
// --></style>
</head>
<body>
<div class="navbar navbar-expand-lg fixed-top navbar-dark bg-primary">
<div class="container">
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation"><span class="navbar-toggler-icon"></span></button>
<div class="collapse navbar-collapse" id="navbarResponsive">
<a class="navbar-brand" href="/">David A. Ball</a>
<ul class="navbar-nav">
<li class="nav-item"><a class="nav-link" href="https://russell-county-archives.daball.me/">Russell County Archives</a></li>
</ul>
<!-- Search form -->
<form class="d-flex ms-auto">
<input class="form-control me-2" type="search" placeholder="Search" aria-label="Search" value="<s" name="query">
<button class="btn btn-outline-success" type="submit">Search</button>
</form>
</div>
</div>
</div>
<div class="container">
<h1 class="mt-5">Search Results</h1>
<div id="searchResults" class="mt-3">
<!-- Search results will be dynamically populated here -->
<ul class="list-group">
<% searchResults.forEach(result => { %>
<li class="list-group-item">
<h5><%= result.title %></h5>
<% if (highlightedSnippets[result.id] && highlightedSnippets[result.id].text) { %>
<% highlightedSnippets[result.id].text.forEach(snippet => { %>
<p><%- snippet %></p>
<% }); %>
<% } else { %>
<p>No snippet available</p>
<% } %>
<a href="<%= result.url %>"><%= result.url %></a>
</li>
<% }); %>
</ul>
</div>
</div>
<!-- Pagination controls -->
<nav aria-label="Search results pagination">
<ul class="pagination justify-content-center mt-4">
<% if (page > 1) { %>
<li class="page-item">
<a class="page-link" href="/search?query=<%= query %>&page=<%= page - 1 %>&pageSize=<%= pageSize %>">Previous</a>
</li>
<% } %>
<% for (let i = 1; i <= totalPages; i++) { %>
<li class="page-item <%= i === page ? 'active' : '' %>">
<a class="page-link" href="/search?query=<%= query %>&page=<%= i %>&pageSize=<%= pageSize %>"><%= i %></a>
</li>
<% } %>
<% if (page < totalPages) { %>
<li class="page-item">
<a class="page-link" href="/search?query=<%= query %>&page=<%= parseInt(page) + 1 %>&pageSize=<%= pageSize %>">Next</a>
</li>
<% } %>
</ul>
</nav>
</div>
<!-- Bootstrap JS (optional, if you need Bootstrap JS features) -->
<script src="https://daball.me/vendor/jquery/jquery.min.js"></script>
<script src="https://daball.me/vendor/popper.js/dist/popper.min.js"></script>
<script src="https://daball.me/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
<script src="https://daball.me/vendor/jquery-easing/jquery.easing.min.js"></script>
<script src="https://daball.me/layouts/blog/js/blog.min.js"></script>
</body>
</html>

17123
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
public/Russell_County/Documents/Kids%20Fishinge311.jpg (Stored with Git LFS) Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.