nm3clol-express-app/routes/search.js

148 lines
6.9 KiB
JavaScript

const express = require('express');
const router = express.Router();
const { parse, toString } = require('lucene');
const { createClient, Query } = require('solr-client');
const solrConfig = { host: process.env.SOLR_DOCS_HOST||'solr', port: process.env.SOLR_DOCS_PORT||8983, core: process.env.SOLR_DOCS_CORE_NAME||'nm3clol_core' };
const helpers = require('../views/helpers/functions');
router.get('/', (req, res) => {
// Extract paging parameters from request query parameters
let { q, page = 1, pageSize = 10 } = req.query;
// Sanitize query, with particular emphasis on one problem area where soft keyboards are creating fancy quotes but we need basic quotes
q = q.replaceAll(/[“”“”„„‟❝❞〝〞〟"❠⹂🙶🙷🙸]/g, '\"').replaceAll(/[‘’‘’'‚‛❛❜❟]/g, '\'');
if (page instanceof String) page = parseInt(page);
if (pageSize instanceof String) pageSize = parseInt(pageSize);
// Cap at 100 max per page
pageSize = Math.min(pageSize, 100);
// Calculate start offset for pagination
const start = (page - 1) * pageSize;
if (!q || (typeof q === 'string' && q.trim() == "")) {
res.render('search-error', { h: helpers, query: q, error: { code: 400, message: 'Search query is required.'} });
}
else {
// Parse query
let parsedQuery = parse(q);
// Construct a Solr q field query string based on the extracted components
let qQuery = toString(parsedQuery);
// Generate a Solr query based on the query strings and additional parameters
let solrQuery = new Query().df('text').q(qQuery).start(start).rows(10).hl({ options: {
on: true,
q: qQuery,
fl: '*',
snippets: 5,
formatter: 'simple',
simplePre: `<b class="result-highlight">`,
simplePost: `</b>`,
highlightMultiTerm: true,
usePhraseHighlighter: true,
}});
// Create a Solr client
const solrClient = createClient({ host: 'solr.services.cleveland.daball.me', port: 8983, core: 'my_core' });
solrClient.search(solrQuery)
.then(solrResponse => {
//console.log(require('util').inspect(solrResponse, { showHidden: true, depth: null, colors: true }));
// overcome broken hl simplePre/simplePost implementation
let overrideHighlighting = {};
Object.keys(solrResponse.highlighting).forEach((highlight_key) => {
overrideHighlighting[highlight_key] = solrResponse.highlighting[highlight_key];
if (overrideHighlighting[highlight_key].text && overrideHighlighting[highlight_key].text.length > 0) {
overrideHighlighting[highlight_key].text = overrideHighlighting[highlight_key].text.map( (text) => {
return text.replaceAll("<em>", `<b class="result-highlight">`).replaceAll("</em>", "</b>")
});
}
});
solrResponse.highlighting = overrideHighlighting;
// Calculate total number of results (needed for pagination)
const totalResults = solrResponse.response.numFound;
// Calculate total number of pages
const totalPages = Math.ceil(totalResults / pageSize);
res.render('search-results', {
h: helpers,
query: qQuery,
page,
pageSize,
totalResults,
totalPages,
solrQuery: solrQuery,
...solrResponse
});
// res.render('search-error', { h: helpers, query: sanitizedQuery, error: { code: 400, message: 'Search query is required.'} });
})
.catch(error => {
if (typeof error === 'object' && error instanceof Error) {
// check for error from throw new Error(`Request HTTP error ${response.statusCode}: ${text}`) in solr.ts from
// solr-node-client dependency
const detectRequestHttpErrorRegExLit = /^Request HTTP error (?<statusCode>\d{1,3}): (?<text>\{.*\}$)/s;
const detectRequestHttpErrorRegExp = new RegExp(detectRequestHttpErrorRegExLit);
const matchRequestHttpErrorRegExpInError = error.message.match(detectRequestHttpErrorRegExp);
const statusCode = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.statusCode);
const text = (matchRequestHttpErrorRegExpInError && matchRequestHttpErrorRegExpInError.groups && matchRequestHttpErrorRegExpInError.groups.text);
if (text) {
let solrRequestHttpInternalError = JSON.parse(text);
error = {
message: "Solr Client Request HTTP Error",
code: statusCode,
innerError: solrRequestHttpInternalError
};
}
else {
error = {
message: error
};
}
}
res.render('search-error', { h: helpers, query: qQuery, error });
});
}
// // Sanitize search query to prevent code injection
// try {
// // Validate search query
// if (!query) {
// //return res.status(400).json({ error: 'q parameter is required' });
//
// }
// else {
// // Send search query to Solr
// const response = await axios.get(solrUrl + '/select', {
// params: {
// q: `text:${sanitizedQuery}`, // Query string with field name
// hl: 'true',
// 'hl.method': 'unified',
// 'hl.fl': '*',
// 'hl.snippets': 5,
// 'hl.tag.pre': '<strong class=\"result-highlight\">',
// 'hl.tag.post': '</strong>',
// 'hl.usePhraseHighlighter': true,
// start, // Start offset for pagination
// rows: 10, // Number of rows to return
// wt: 'json', // Response format (JSON)
// },
// });
//
// // Extract search results from Solr response
// const searchResults = response.data.response.docs;
// const highlightedSnippets = response.data.highlighting;
// // Calculate total number of results (needed for pagination)
// const totalResults = response.data.response.numFound;
// // Calculate total number of pages
// const totalPages = Math.ceil(totalResults / pageSize);
// // Send search results as JSON response
// //res.json('search-results', { query, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages });
// res.render('search-results', { h: helpers, query: sanitizedQuery, searchResults, highlightedSnippets, page, pageSize, totalResults, totalPages });
// }
// } catch (error) {
// // console.error('Error searching Solr:', error.message);
// // res.status(500).json({ error: 'Internal server error' });
// res.render('search-error', { h: helpers, query: sanitizedQuery, error });
// }
});
module.exports = {
router,
// solrUrl,
// sanitizeQuery,
};