Added defunct code in index folder.

2024-05-23 22:13:47 -04:00 · 2024-05-23 22:13:47 -04:00 · 5d80a4ec81
commit 5d80a4ec81
parent 41a7a811a8
10 changed files with 28498 additions and 0 deletions
--- a/index/build-meetings-dir.js
+++ b/index/build-meetings-dir.js
@ -0,0 +1,115 @@
+const path = require('path');
+let meetingDocs = require('../bos-tourism-meetings.json');
+let meetings = [];
+let meetingsFiles = [];
+const yaml = require('js-yaml');
+const cheerio = require('cheerio');
+
+function convertTimeStr(hh_mm_ampm) {
+    if (!hh_mm_ampm) return "";
+    let space_parts = hh_mm_ampm.split(" ");
+    let time_parts = space_parts[0].split(":");
+    time_parts = time_parts.map((time_part) => {
+        return parseInt(time_part);
+    })
+    if (space_parts[1] == "PM") time_parts[0] += 12;
+    return (time_parts[0]<10?"0":"") + time_parts[0] + "_" + (time_parts[1]<10?"0":"") + time_parts[1];
+}
+
+function getAgencyLongName(agency) {
+    switch (agency) {
+        case "Russell_BOS": return "Russell County Board of Supervisors";
+        case "Russell_Tourism": return "Russell County Tourism Advisory Committee";
+    }
+}
+
+function getKeyName(doc) {
+    if (doc.Agency == "BOS") doc.Agency = "Russell_BOS";
+    else if (doc.Agency == "Tourism") doc.Agency = "Russell_Tourism";
+    return doc["MeetingDate"] + "_" + convertTimeStr(doc["MeetingTime"])+ "-" + doc["Agency"] + "-" + doc["MeetingName"].replaceAll(" ", "_");
+}
+
+function getMeetingFilePath(keyName) {
+    return path.join("..", "astro", "src", "content", "meetings", keyName + ".md");
+}
+
+// create meetings dictionary
+meetingDocs.forEach(doc => {
+    let keyName = getKeyName(doc);
+    let fileName = getMeetingFilePath(keyName);
+    if (meetingsFiles.indexOf(keyName) == -1) {
+        meetingsFiles.push({
+            fileName,
+            mdContent: {
+                frontMatter: {
+                    title: getAgencyLongName(doc.Agency) + " - " + doc.MeetingName + " - " + doc.MeetingDate + " @ " + doc.MeetingTime + "",
+                    meetingDate: doc.MeetingDate,
+                    attachments: [],
+                },
+                content: "",
+            }
+        });
+    }
+});
+
+// go back through and add all of the documents to the meetings in the files
+meetingsFiles = meetingsFiles.map(file => {
+    meetingDocs.forEach(doc => {
+        let keyName = getKeyName(doc);
+        let fileName = getMeetingFilePath(keyName);
+        if (fileName == file.fileName) {
+            file.mdContent.frontMatter.attachments.push({
+                title: doc.DocumentCategory,
+                relativePath: doc.Host + doc.AgendaPath + doc.AgendaFile.replace('\'', ''),
+            });
+        }
+    });
+    return file;
+});
+
+
+const trimSlashes = (str) => str.replace(/^\/|\/$/g, '');
+
+function getFileExt(file) {
+    let ext = path.extname(file.replaceAll('\'', ''));
+    if (!ext) {
+        if (file.endsWith("RC Tourism Committee Meeting Minutes - July 18 2017 Regular")) {
+            return ".pdf";
+        }
+        if (file.endsWith('_05222023-162') || file.endsWith('_03062023-33') || file.endsWith('_03062023-157')) {
+            return ".docx";
+        }
+    }
+    return ext;
+}
+
+console.log("@echo off");
+meetingDocs.forEach(doc => {
+    let keyName = getKeyName(doc);
+    let fileName = getMeetingFilePath(keyName);
+    console.log(
+        "copy " +
+        " \"" +
+        path.join(
+            "..",
+            "..",
+            "Web_Site_Archives",
+            "Russell_County_Web_Site-latest",
+            "russellcountyva.us",
+            doc.Host,
+            trimSlashes(doc.AgendaPath).replaceAll("/", path.sep),
+            doc.AgendaFile.replaceAll('\'', ''),
+        ) +
+        "\" \"" +
+        getKeyName(doc) + "-" + doc.DocumentCategory.replaceAll(" ", "_") +
+        getFileExt(doc.AgendaFile) +
+        "\" " +
+        " >> copy-files.log"
+    );
+});
+
+// meetingsFiles.forEach(file => {    
+//     //console.log("cp " file.fileName);
+//     // console.log("---\n"+yaml.dump(file.mdContent.frontMatter)+"\n---");
+// });
+
--- a/index/example_tika_upload.js
+++ b/index/example_tika_upload.js
@ -0,0 +1,144 @@
+const axios = require('axios');
+const fs = require('fs');
+const cheerio = require('cheerio');
+
+// Read the list of files from the text file
+const fileLines = fs.readFileSync('file_list.txt', 'utf8').split('\n');
+
+// Filter the list to include only files with certain file extensions
+const allowedExtensions = ['.txt', '.html', '.htm', '.md', '.pdf']; // Add more extensions as needed
+const filesToIndex = fileLines.filter(line => {
+    const extension = line.substring(line.lastIndexOf('.')).toLowerCase();
+    return allowedExtensions.includes(extension);
+});
+
+async function downloadFile(url, filePath) {
+    const writer = fs.createWriteStream(filePath);
+
+    const response = await axios({
+        url,
+        method: 'GET',
+        responseType: 'stream'
+    });
+
+    response.data.pipe(writer);
+
+    return new Promise((resolve, reject) => {
+        writer.on('finish', resolve);
+        writer.on('error', reject);
+    });
+}
+
+async function extractAndIndexWithTika(url, solrUrl) {
+    try {
+        const tempFilePath = 'tempfile';
+	console.log(`Downloading ${url}`);
+        await downloadFile(url, tempFilePath);
+	console.log(`Downloaded ${url}.`);
+
+        // Read file contents
+        const fileData = fs.readFileSync(tempFilePath);
+
+        // Make request to Tika
+        const response = await axios.put(tikaUrl, fileData, {
+            headers: {
+//                'Content-Type': 'application/octet-stream',
+		'Content-Type': 'application/pdf',
+                'X-Tika-Output-Format': 'solr',
+                'X-Tika-SolrUrl': solrUrl
+            }
+        });
+
+        console.log('Tika response:', response.data);
+        // Parse XML response from Tika
+
+        const textContent = sanitizeIndexData(extractTextFromHtml(response.data));
+
+        // Create Solr document
+        const solrDocument = {
+            id: documentUrl, // Replace with a unique identifier for the document
+            text: textContent, // Add the extracted text content
+            html: response.data,
+            url: url,
+            content_length: textContent.length,
+            content_type: "application/pdf",
+            // Add additional fields as needed (e.g., title, author, etc.)
+        };
+
+        // Send document to Solr for indexing
+        await indexDocumentInSolr(solrDocument);
+
+        console.log('Document indexed successfully:', solrDocument.id);
+
+    } catch (error) {
+        console.error('Error extracting text with Tika:', error.message);
+    }
+}
+
+function extractTextFromHtml(html) {
+    // Parse HTML using Cheerio
+    const $ = cheerio.load(html);
+
+    // Extract text content from HTML
+    const textContent = $('body').text().trim();
+
+    return textContent;
+}
+
+async function indexDocumentInSolr(document) {
+    try {
+        // Send document to Solr using the Solr REST API or a Solr client library
+        // Example code to send document using Axios:
+        await axios.post(solrUrl + '/update/json/docs', document, {
+            params: {
+                commit: true, // Commit changes immediately
+            },
+        });
+    } catch (error) {
+        throw new Error('Error indexing document in Solr: ' + error.message);
+    }
+}
+
+function sanitizeIndexData(data) {
+    // Convert all whitespace characters to spaces
+    let sanitizedData = data.replace(/\s+/g, ' ');
+
+    // Remove double whitespace recursively
+    while (sanitizedData !== (sanitizedData = sanitizedData.replace(/  /g, ' '))) {}
+
+    return sanitizedData.trim(); // Trim leading and trailing spaces
+}
+
+async function clearSolrIndex() {
+    try {
+        // Send delete query to Solr to delete all documents
+        const response = await axios.post(solrUrl + '/update', {
+            delete: {
+                query: '*:*'
+            },
+            commit: {}
+        }, {
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        });
+
+        console.log('Deleted ' + response.data.responseHeader.status + ' documents');
+    } catch (error) {
+        console.error('Error clearing Solr index:', error.message);
+    }
+}
+
+// Example usage
+const tikaUrl = 'http://solr.services.cleveland.daball.me:9998/tika'; // URL of the Tika instance
+const baseUrl = 'https://russell-county-archives.daball.me'; // URL of the document to download and index
+const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
+
+// Call the function to clear the Solr index
+clearSolrIndex();
+
+for (let l = 0; l < filesToIndex.length; l++) {
+	let line = filesToIndex[l];
+	let documentUrl = baseUrl + line;
+	extractAndIndexWithTika(documentUrl, solrUrl);
+}
--- a/index/file_list.txt
+++ b/index/file_list.txt
--- a/index/gulpfile
+++ b/index/gulpfile
@ -0,0 +1,288 @@
+const gulp = require('gulp');
+//const shell = require('gulp-shell');
+const axios = require('axios');
+const fs = require('fs');
+const os = require ("os");
+const path = require('path');
+const crypto = require('crypto');
+const cheerio = require('cheerio');
+const SftpClient = require('ssh2-sftp-client');
+const { TikaClient } = require('tika-js');
+
+const baseUrl = 'https://russell-county-archives.daball.me'; // URL of the document to download and index
+const sftpBasePath = 'david@caddy.services.cleveland.daball.me:/srv/www/russell-county-archives.daball.me/archives'; // SSH path
+const tikaUrl = 'http://solr.services.cleveland.daball.me:9998'; // URL of the Tika instance
+const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
+
+// Read the list of files from the text file
+const fileLines = fs.readFileSync('file_list.txt', 'utf8').split('\n');
+
+// Filter the list to include only files with certain file extensions
+const allowedExtensions = ['.txt', '.html', '.htm', '.md', '.pdf'];
+const filesToIndex = fileLines.filter(line => {
+    const extension = line.substring(line.lastIndexOf('.')).toLowerCase();
+    return allowedExtensions.includes(extension);
+});
+
+function extToMime(file_name) {
+    switch (path.extname(file_name)) {
+        case '.htm':
+        case '.html':
+            return 'text/html';
+        case '.pdf':
+            return 'application/pdf';
+        case '.md':
+        case '.txt':
+        default:
+            return 'text/plain';
+    }
+}
+
+const readSshRsaKey = (keyFilePath) => {
+    try {
+        // Read the contents of the SSH RSA key file
+        const key = fs.readFileSync(keyFilePath, 'utf8');
+        return key.trim(); // Trim whitespace from the key
+    } catch (error) {
+        console.error('Error reading SSH RSA key:', error);
+        return null;
+    }
+};
+
+// Example usage
+const defaultKeyFilePath =  path.join(os.homedir(), ".ssh", "id_rsa");
+const sshRsaKey = readSshRsaKey(defaultKeyFilePath);
+
+// const downloadFileWithRsync = (sourceUrl, destinationPath) => {
+//     return new Promise((resolve, reject) => {
+//         const rsyncCommand = `rsync -av --progress "${sourceUrl}" "${destinationPath}"`;
+//         exec(rsyncCommand, (error, stdout, stderr) => {
+//             if (error) {
+//                 console.error('Error downloading file with rsync:', stderr);
+//                 reject(error);
+//             } else {
+//                 console.log('File downloaded successfully:', stdout);
+//                 resolve();
+//             }
+//         });
+//     });
+// };
+
+const parseSftpUrl = (sftpUrl) => {
+    const regex = /^(?<username>[^@]+)@(?<host>[^:]+):(?<path>.+)$/;
+    const match = sftpUrl.match(regex);
+    if (match) {
+        return {
+            username: match.groups.username,
+            host: match.groups.host,
+            path: match.groups.path
+        };
+    } else {
+        throw new Error('Invalid SFTP URL format');
+    }
+};
+
+const downloadFileWithSftp = async (remotePath, localPath, options = {}) => {
+    const sftp = new SftpClient();
+    try {
+        // Connect to the SFTP server
+        const sftpUrlParts = parseSftpUrl(remotePath);
+
+        await sftp.connect({
+            host: sftpUrlParts.host,
+            username: sftpUrlParts.username,
+            privateKey: sshRsaKey,
+        });
+
+        // Download the file
+        const writer = fs.createWriteStream(localPath, { start: 0 });
+        sftp.get(sftpUrlParts.path, writer);
+
+        //console.log('File downloaded successfully');
+        return new Promise((resolve, reject) => {
+            writer.on('finish', resolve);
+            writer.on('error', reject);
+        });
+    } catch (error) {
+        console.error('Error downloading file:', error);
+    } finally {
+        // Disconnect from the SFTP server
+        await sftp.end();
+    }
+};
+
+async function downloadFile(url, filePath) {
+    // if (url.startsWith(baseUrl)) {
+    //     //downloadFileWithRsync(url.replace(baseUrl, sftpBasePath), filePath);
+    //     const sftpUrl = url.replace(baseUrl, sftpBasePath);
+	//     console.log(`Downloading: ${sftpUrl}`);
+    //     downloadFileWithSftp(sftpUrl, filePath);
+	//     console.log(`Download complete: ${sftpUrl} => ${filePath}`);
+    // }
+    // else {
+	    console.log(`Downloading: ${url}`);
+        const writer = fs.createWriteStream(filePath, { start: 0 });
+
+        const response = await axios({
+            url,
+            method: 'GET',
+            responseType: 'stream'
+        });
+
+        response.data.pipe(writer);
+
+        console.log(`Download complete:  ${url} => ${filePath}`);
+
+        return new Promise((resolve, reject) => {
+            writer.on('finish', resolve);
+            writer.on('error', reject);
+        });
+    // }
+}
+
+async function getSolrIndexedFileChecksum(url) {
+
+}
+
+async function extractAndIndexWithTika(url, solrUrl) {
+    try {
+        const tempFilePath = 'tempfile';
+        await downloadFile(url, tempFilePath);
+        const fileContent = fs.readFileSync(tempFilePath);
+        const checksum = crypto.createHash('sha256').update(fileContent).digest('hex');
+
+        // Query Solr to check if the file is already indexed
+        const solrChecksumResponse = await axios.get(`${solrUrl}/select?q=id:"${url}"&fl=sha256sum`);
+        const solrChecksum = solrChecksumResponse.data.response.docs[0]?.sha256sum;
+
+        if (solrChecksum && solrChecksum === checksum) {
+            console.log(`File ${filePath} hasn't changed. Skipping.`);
+            return;
+        }
+
+        // Make request to Tika
+        console.log(`Processing ${url}`);
+//         const response = await axios.post(tikaUrl + '/form', formData, {
+//             headers: {
+// //                'Content-Type': 'application/octet-stream',
+// 		        //'Content-Type': extToMime(url),
+//                 //'Content-Length': fs.
+//                 'Content-Type': 'multipart/form-data',
+//                 'X-Tika-Output-Format': 'solr',
+//                 //'X-Tika-SolrUrl': solrUrl
+//             },
+//             timeout: 40000000
+//         });
+        const client = new TikaClient({ host: tikaUrl });
+        const version = await client.getVersion()
+        console.info(`version: ${version}`)
+        console.info(extToMime(url), await client.getContent(fs.createReadStream(tempFilePath), extToMime(url), path.basename(url)));
+        await client.pipe(fs.createReadStream(tempFilePath), fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
+        
+        //console.log('Tika response:', fs.readFileSync('output.txt'));
+        const fileData = fs.readFileSync('output.txt');
+        //const contentLength = await fs.stat(tempFilePath).size;
+        // Parse XML response from Tika
+
+        const textContent = sanitizeIndexData(extractTextFromHtml(fileData));
+
+        // Create Solr document
+        const solrDocument = {
+            id: url, // Replace with a unique identifier for the document
+            text: textContent, // Add the extracted text content
+            sha256sum: checksum, // Add the checksum
+            //html: response.data,
+            url: url,
+            //content_length: contentLength,
+            content_type: extToMime(url),
+            // Add additional fields as needed (e.g., title, author, etc.)
+        };
+
+        // Send document to Solr for indexing
+        console.log(`Indexing ${url}`);
+        await indexDocumentInSolr(solrDocument);
+
+        console.log('Document indexed successfully:', solrDocument.id);
+
+    } catch (error) {
+        console.error('Error extracting text with Tika:', error.message);
+    }
+}
+
+function extractTextFromHtml(html) {
+    // Parse HTML using Cheerio
+    const $ = cheerio.load(html);
+
+    // Extract text content from HTML
+    const textContent = $('body').text().trim();
+
+    return textContent;
+}
+
+async function indexDocumentInSolr(document) {
+    try {
+        // Send document to Solr using the Solr REST API or a Solr client library
+        // Example code to send document using Axios:
+        await axios.post(solrUrl + '/update/json/docs', document, {
+            params: {
+                commit: true, // Commit changes immediately
+            },
+        });
+    } catch (error) {
+        throw new Error('Error indexing document in Solr: ' + error.message);
+    }
+}
+
+function sanitizeIndexData(data) {
+    // Convert all whitespace characters to spaces
+    let sanitizedData = data.replace(/\s+/g, ' ');
+
+    // Remove double whitespace recursively
+    while (sanitizedData !== (sanitizedData = sanitizedData.replace(/  /g, ' '))) {}
+
+    return sanitizedData.trim(); // Trim leading and trailing spaces
+}
+
+async function clearSolrIndex() {
+    try {
+        // Send delete query to Solr to delete all documents
+        const response = await axios.post(solrUrl + '/update', {
+            delete: {
+                query: '*:*'
+            },
+            commit: {}
+        }, {
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        });
+
+        console.log('Deleted ' + response.data.responseHeader.status + ' documents');
+    } catch (error) {
+        console.error('Error clearing Solr index:', error.message);
+    }
+}
+
+// Clears SOLR search index
+const index_clear = async function() {
+    await clearSolrIndex();
+};
+index_clear.displayName = 'index:clear';
+gulp.task(index_clear);
+
+// Reindexes SOLR search index with plaintext results from Tika
+const index_index = async function() {
+    for (let l = 0; l < filesToIndex.length; l++) {
+        let line = filesToIndex[l];
+        let documentUrl = baseUrl + line;
+        await extractAndIndexWithTika(documentUrl, solrUrl);
+    }
+};
+index_index.displayName = 'index:index';
+gulp.task(index_index);
+
+// Define index tasks
+gulp.task('index:reindex', gulp.series('index:clear', 'index:index'));
+gulp.task('index', gulp.series('index:index'));
+// Define a default task (optional)
+gulp.task('default', gulp.series('index'));
--- a/index/gulpfile.js
+++ b/index/gulpfile.js
@ -0,0 +1,194 @@
+const gulp = require('gulp');
+const gulpif = require('gulp-if');
+const through2 = require('through2');
+//const shell = require('gulp-shell');
+const axios = require('axios');
+const fs = require('fs');
+const os = require ("os");
+const path = require('path');
+const crypto = require('crypto');
+const cheerio = require('cheerio');
+const { TikaClient } = require('tika-js');
+
+const baseUrl = 'https://no-moss-3-carbo-landfill-library.online'; // URL of the document to download and index
+const tikaUrl = 'http://solr.services.cleveland.daball.me:9998'; // URL of the Tika instance
+const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
+
+function extToMime(file_name) {
+    switch (path.extname(file_name)) {
+        case '.htm':
+        case '.html':
+            return 'text/html';
+        case '.pdf':
+            return 'application/pdf';
+        case '.md':
+        case '.txt':
+        default:
+            return 'text/plain';
+    }
+}
+
+function isFileExt(extname) {
+    const fileExtnameMatchesExtname = function(file) {
+        return file.extname == extname;
+    };
+    return file_extname_matches_extname;
+}
+
+async function tikaReadPdf(file) {
+    const client = new TikaClient({ host: tikaUrl });
+    const file_reader = fs.createReadStream(file);
+    await client.pipe(file_reader, fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
+}
+
+function index() {
+    src([
+        "public/Russell_County_IDA/**.pdf",
+        "public/Russell_County_IDA/**.pdf",
+        //['.txt', '.html', '.htm', '.md', '.pdf']
+    ])
+    .pipe(gulpif(isFileExt(".pdf"), ))
+}
+
+async function extractAndIndexWithTika(url, solrUrl) {
+    try {
+        const tempFilePath = 'tempfile';
+        await downloadFile(url, tempFilePath);
+        const fileContent = fs.readFileSync(tempFilePath);
+        const checksum = crypto.createHash('sha256').update(fileContent).digest('hex');
+
+        // Query Solr to check if the file is already indexed
+        const solrChecksumResponse = await axios.get(`${solrUrl}/select?q=id:"${url}"&fl=sha256sum`);
+        const solrChecksum = solrChecksumResponse.data.response.docs[0]?.sha256sum;
+
+        if (solrChecksum && solrChecksum === checksum) {
+            console.log(`File ${filePath} hasn't changed. Skipping.`);
+            return;
+        }
+
+        // Make request to Tika
+        console.log(`Processing ${url}`);
+//         const response = await axios.post(tikaUrl + '/form', formData, {
+//             headers: {
+// //                'Content-Type': 'application/octet-stream',
+// 		        //'Content-Type': extToMime(url),
+//                 //'Content-Length': fs.
+//                 'Content-Type': 'multipart/form-data',
+//                 'X-Tika-Output-Format': 'solr',
+//                 //'X-Tika-SolrUrl': solrUrl
+//             },
+//             timeout: 40000000
+//         });
+        const client = new TikaClient({ host: tikaUrl });
+        const version = await client.getVersion()
+        console.info(`version: ${version}`)
+        console.info(extToMime(url), await client.getContent(fs.createReadStream(tempFilePath), extToMime(url), path.basename(url)));
+        await client.pipe(fs.createReadStream(tempFilePath), fs.createWriteStream('output.txt'), 'text/plain', tempFilePath);
+        
+        //console.log('Tika response:', fs.readFileSync('output.txt'));
+        const fileData = fs.readFileSync('output.txt');
+        //const contentLength = await fs.stat(tempFilePath).size;
+        // Parse XML response from Tika
+
+        const textContent = sanitizeIndexData(extractTextFromHtml(fileData));
+
+        // Create Solr document
+        const solrDocument = {
+            id: url, // Replace with a unique identifier for the document
+            text: textContent, // Add the extracted text content
+            sha256sum: checksum, // Add the checksum
+            //html: response.data,
+            url: url,
+            //content_length: contentLength,
+            content_type: extToMime(url),
+            // Add additional fields as needed (e.g., title, author, etc.)
+        };
+
+        // Send document to Solr for indexing
+        console.log(`Indexing ${url}`);
+        await indexDocumentInSolr(solrDocument);
+
+        console.log('Document indexed successfully:', solrDocument.id);
+
+    } catch (error) {
+        console.error('Error extracting text with Tika:', error.message);
+    }
+}
+
+function extractTextFromHtml(html) {
+    // Parse HTML using Cheerio
+    const $ = cheerio.load(html);
+
+    // Extract text content from HTML
+    const textContent = $('body').text().trim();
+
+    return textContent;
+}
+
+async function indexDocumentInSolr(document) {
+    try {
+        // Send document to Solr using the Solr REST API or a Solr client library
+        // Example code to send document using Axios:
+        await axios.post(solrUrl + '/update/json/docs', document, {
+            params: {
+                commit: true, // Commit changes immediately
+            },
+        });
+    } catch (error) {
+        throw new Error('Error indexing document in Solr: ' + error.message);
+    }
+}
+
+function sanitizeIndexData(data) {
+    // Convert all whitespace characters to spaces
+    let sanitizedData = data.replace(/\s+/g, ' ');
+
+    // Remove double whitespace recursively
+    while (sanitizedData !== (sanitizedData = sanitizedData.replace(/  /g, ' '))) {}
+
+    return sanitizedData.trim(); // Trim leading and trailing spaces
+}
+
+async function clearSolrIndex() {
+    try {
+        // Send delete query to Solr to delete all documents
+        const response = await axios.post(solrUrl + '/update', {
+            delete: {
+                query: '*:*'
+            },
+            commit: {}
+        }, {
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        });
+
+        console.log('Deleted ' + response.data.responseHeader.status + ' documents');
+    } catch (error) {
+        console.error('Error clearing Solr index:', error.message);
+    }
+}
+
+// Clears SOLR search index
+const index_clear = async function() {
+    await clearSolrIndex();
+};
+index_clear.displayName = 'index:clear';
+gulp.task(index_clear);
+
+// Reindexes SOLR search index with plaintext results from Tika
+const index_index = async function() {
+    for (let l = 0; l < filesToIndex.length; l++) {
+        let line = filesToIndex[l];
+        let documentUrl = baseUrl + line;
+        await extractAndIndexWithTika(documentUrl, solrUrl);
+    }
+};
+index_index.displayName = 'index:index';
+gulp.task(index_index);
+
+// Define index tasks
+gulp.task('index:reindex', gulp.series('index:clear', 'index:index'));
+gulp.task('index', gulp.series('index:index'));
+// Define a default task (optional)
+gulp.task('default', gulp.series('index'));
--- a/index/output.txt
+++ b/index/output.txt
--- a/index/package-lock.json
+++ b/index/package-lock.json
--- a/index/package.json
+++ b/index/package.json
@ -0,0 +1,29 @@
+{
+  "name": "example_tika_solr_index",
+  "version": "1.0.0",
+  "description": "",
+  "main": "example_tika_upload.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "index": "gulp index",
+    "reindex": "gulp index:reindex"
+  },
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "axios": "^1.6.7",
+    "cheerio": "^1.0.0-rc.12",
+    "ejs": "^3.1.9",
+    "express": "^4.18.3",
+    "gulp-if": "^3.0.0",
+    "js-yaml": "^4.1.0",
+    "ssh2-sftp-client": "^10.0.3",
+    "through2": "^4.0.2",
+    "tika-js": "^1.0.2",
+    "tslib": "^2.6.2",
+    "xml2js": "^0.6.2"
+  },
+  "devDependencies": {
+    "gulp": "^4.0.2"
+  }
+}
--- a/index/search_solr.js
+++ b/index/search_solr.js
@ -0,0 +1,81 @@
+const express = require('express');
+const axios = require('axios');
+const app = express();
+const path = require('path');
+
+// Set EJS as the view engine
+app.set('view engine', 'ejs');
+
+// Specify the views directory
+app.set('views', path.join(__dirname, 'views'));
+
+// Middleware to parse JSON request body
+app.use(express.json());
+
+// Serve static files (CSS, JavaScript, images, etc.)
+app.use(express.static('public'));
+
+// Search endpoint
+app.get('/search', async (req, res) => {
+    try {
+        // Extract search query from request query parameters
+        const { query, page = 1, pageSize = 10 } = req.query;
+
+        // Validate search query
+        if (!query) {
+            return res.status(400).json({ error: 'Query parameter is required' });
+        }
+
+        // Calculate start offset for pagination
+        const start = (page - 1) * pageSize;
+
+        // Sanitize search query to prevent code injection
+        const sanitizedQuery = sanitizeQuery(query);
+
+        // Send search query to Solr
+        const response = await axios.get(solrUrl + '/select', {
+            params: {
+                q: `text:${sanitizedQuery}`, // Query string with field name
+		hl: 'true',
+		'hl.method': 'unified',
+		'hl.fl': '*',
+		'hl.snippets': 5,
+		'hl.tag.pre': '<b class=\"result-highlight\">',
+		'hl.tag.post': '</b>',
+		start, // Start offset for pagination
+                rows: 10, // Number of rows to return
+                wt: 'json', // Response format (JSON)
+            },
+        });
+
+        // Extract search results from Solr response
+        const searchResults = response.data.response.docs;
+        const highlightedSnippets = response.data.highlighting;
+
+        // Calculate total number of results (needed for pagination)
+        const totalResults = response.data.response.numFound;
+
+        // Calculate total number of pages
+        const totalPages = Math.ceil(totalResults / pageSize);
+
+        // Send search results as JSON response
+        //res.json({ searchResults, highlightedSnippets });
+	res.render('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages });
+    } catch (error) {
+        console.error('Error searching Solr:', error.message);
+        res.status(500).json({ error: 'Internal server error' });
+    }
+});
+
+// Function to sanitize search query to prevent code injection
+function sanitizeQuery(query) {
+    // Remove any characters that are not alphanumeric or whitespace
+    return query.replace(/[^\w\s"]/gi, '');
+}
+
+// Start server
+const solrUrl = 'http://solr.services.cleveland.daball.me:8983/solr/my_core'; // URL of your Solr instance
+const PORT = process.env.PORT || 3000;
+app.listen(PORT, () => {
+    console.log(`Server is running on port ${PORT}`);
+});
--- a/index/views/search-results.ejs
+++ b/index/views/search-results.ejs
@ -0,0 +1,88 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Search Results</title>
+  <!-- Bootstrap CSS -->
+  <link href="https://daball.me/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css?family=Saira+Extra+Condensed:100,200,300,400,500,600,700,800,900" rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i,800,800i" rel="stylesheet">
+  <link href="https://daball.me/vendor/font-awesome/css/font-awesome.min.css" rel="stylesheet">
+  <link href="https://daball.me/vendor/devicons/css/devicons.min.css" rel="stylesheet">
+  <link href="https://daball.me/vendor/devicon/devicon.min.css" rel="stylesheet">
+  <link href="https://daball.me/vendor/simple-line-icons/css/simple-line-icons.css" rel="stylesheet">
+  <link href="https://daball.me/layouts/blog/css/blog.min.css" rel="stylesheet">
+  <style type="text/css"><!--
+    .result-highlight { background-color: #FBF719; font-weight: normal; }
+  // --></style>
+</head>
+<body>
+  <div class="navbar navbar-expand-lg fixed-top navbar-dark bg-primary">
+    <div class="container">
+      <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation"><span class="navbar-toggler-icon"></span></button>
+      <div class="collapse navbar-collapse" id="navbarResponsive">
+        <a class="navbar-brand" href="/">David A. Ball</a>
+        <ul class="navbar-nav">
+          <li class="nav-item"><a class="nav-link" href="https://russell-county-archives.daball.me/">Russell County Archives</a></li>
+        </ul>
+        <!-- Search form -->
+        <form class="d-flex ms-auto">
+          <input class="form-control me-2" type="search" placeholder="Search" aria-label="Search" value="<s" name="query">
+          <button class="btn btn-outline-success" type="submit">Search</button>
+        </form>
+      </div>
+    </div>
+  </div>
+  <div class="container">
+    <h1 class="mt-5">Search Results</h1>
+    <div id="searchResults" class="mt-3">
+      <!-- Search results will be dynamically populated here -->
+      <ul class="list-group">
+        <% searchResults.forEach(result => { %>
+          <li class="list-group-item">
+            <h5><%= result.title %></h5>
+            <% if (highlightedSnippets[result.id] && highlightedSnippets[result.id].text) { %>
+              <% highlightedSnippets[result.id].text.forEach(snippet => { %>
+                <p><%- snippet %></p>
+              <% }); %>
+            <% } else { %>
+              <p>No snippet available</p>
+            <% } %>
+            <a href="<%= result.url %>"><%= result.url %></a>
+	  </li>
+        <% }); %>
+      </ul>
+    </div>
+  </div>
+
+    <!-- Pagination controls -->
+    <nav aria-label="Search results pagination">
+      <ul class="pagination justify-content-center mt-4">
+        <% if (page > 1) { %>
+          <li class="page-item">
+            <a class="page-link" href="/search?query=<%= query %>&page=<%= page - 1 %>&pageSize=<%= pageSize %>">Previous</a>
+          </li>
+        <% } %>
+        <% for (let i = 1; i <= totalPages; i++) { %>
+          <li class="page-item <%= i === page ? 'active' : '' %>">
+            <a class="page-link" href="/search?query=<%= query %>&page=<%= i %>&pageSize=<%= pageSize %>"><%= i %></a>
+          </li>
+        <% } %>
+        <% if (page < totalPages) { %>
+          <li class="page-item">
+            <a class="page-link" href="/search?query=<%= query %>&page=<%= parseInt(page) + 1 %>&pageSize=<%= pageSize %>">Next</a>
+          </li>
+        <% } %>
+      </ul>
+    </nav>
+  </div>
+
+  <!-- Bootstrap JS (optional, if you need Bootstrap JS features) -->
+  <script src="https://daball.me/vendor/jquery/jquery.min.js"></script>
+  <script src="https://daball.me/vendor/popper.js/dist/popper.min.js"></script>
+  <script src="https://daball.me/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
+  <script src="https://daball.me/vendor/jquery-easing/jquery.easing.min.js"></script>
+  <script src="https://daball.me/layouts/blog/js/blog.min.js"></script>
+</body>
+</html>