forked from nm3clol/nm3clol-express-app
Updated view for directory to add front matter support in README.md. Updated file scanner logic and working on conversion utility for previous scans.
This commit is contained in:
parent
2f6afa9210
commit
03d2cfa816
|
@ -6,6 +6,7 @@ import markdownIt from 'markdown-it';
|
||||||
import markdownItAttrs from 'markdown-it-attrs';
|
import markdownItAttrs from 'markdown-it-attrs';
|
||||||
import momentJs from 'moment-timezone';
|
import momentJs from 'moment-timezone';
|
||||||
import { inspect } from 'util';
|
import { inspect } from 'util';
|
||||||
|
import matter from 'gray-matter';
|
||||||
const moment = momentJs.tz.setDefault("UTC");
|
const moment = momentJs.tz.setDefault("UTC");
|
||||||
const md = markdownIt({
|
const md = markdownIt({
|
||||||
html: true,
|
html: true,
|
||||||
|
@ -77,10 +78,19 @@ const resolveReadmeFile: (directory: string) => string|undefined = (directory) =
|
||||||
const directoryContainsReadme = (directory: string) => resolveReadmeFile(directory);
|
const directoryContainsReadme = (directory: string) => resolveReadmeFile(directory);
|
||||||
// const printMarkdownFile = (file) => {
|
// const printMarkdownFile = (file) => {
|
||||||
// };
|
// };
|
||||||
|
const readmeFm = (directory: string) => {
|
||||||
|
const readmeFile = resolveReadmeFile(directory);
|
||||||
|
if (readmeFile) {
|
||||||
|
return matter.read(readmeFile).data;
|
||||||
|
}
|
||||||
|
};
|
||||||
const printReadme = (directory: string) => {
|
const printReadme = (directory: string) => {
|
||||||
const readmeFile = resolveReadmeFile(directory);
|
const readmeFile = resolveReadmeFile(directory);
|
||||||
if (readmeFile) {
|
if (readmeFile) {
|
||||||
return md.render(fs.readFileSync(readmeFile).toString());
|
const fm = matter.read(readmeFile);
|
||||||
|
const fmData = { fm: fm.data, excerpt: fm.excerpt };
|
||||||
|
const content = md.render(fm.content, fmData );
|
||||||
|
return content;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
const stripWebVTT = (webvttText: string) => {
|
const stripWebVTT = (webvttText: string) => {
|
||||||
|
@ -187,6 +197,7 @@ export default {
|
||||||
shouldShowSiteWelcomeMessage,
|
shouldShowSiteWelcomeMessage,
|
||||||
shouldOmitLinkOnLastBreadcrumb,
|
shouldOmitLinkOnLastBreadcrumb,
|
||||||
directoryContainsReadme,
|
directoryContainsReadme,
|
||||||
|
readmeFm,
|
||||||
printReadme,
|
printReadme,
|
||||||
stripWebVTT,
|
stripWebVTT,
|
||||||
renderArchive,
|
renderArchive,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import { ReadStream, WriteStream } from 'fs'
|
import { ReadStream } from 'fs'
|
||||||
import fetch from 'node-fetch'
|
import fetch from 'node-fetch'
|
||||||
let join = (...args: String[]) => {
|
let join = (...args: String[]) => {
|
||||||
let output = "";
|
let output = "";
|
||||||
|
@ -8,6 +8,7 @@ let join = (...args: String[]) => {
|
||||||
return output;
|
return output;
|
||||||
};
|
};
|
||||||
import { ContentResource, MetadataResource } from './types.mjs'
|
import { ContentResource, MetadataResource } from './types.mjs'
|
||||||
|
import { Writable } from 'stream';
|
||||||
|
|
||||||
export interface TikaClientOptions {
|
export interface TikaClientOptions {
|
||||||
host: string
|
host: string
|
||||||
|
@ -64,7 +65,7 @@ export class TikaClient {
|
||||||
return response.body
|
return response.body
|
||||||
}
|
}
|
||||||
|
|
||||||
async pipe(readStream: ReadStream, writeStream: WriteStream, contentType: TikaContentType = 'text/plain', filename?: string): Promise<void> {
|
async pipe(readStream: ReadStream, writeStream: Writable, contentType: TikaContentType = 'text/plain', filename?: string): Promise<void> {
|
||||||
const tikaStream = await this.getStream(readStream, contentType, filename)
|
const tikaStream = await this.getStream(readStream, contentType, filename)
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const stream = tikaStream.pipe(writeStream)
|
const stream = tikaStream.pipe(writeStream)
|
||||||
|
|
|
@ -19,6 +19,14 @@
|
||||||
<% if (h.directoryContainsReadme(directory)) {%>
|
<% if (h.directoryContainsReadme(directory)) {%>
|
||||||
<div class="row p-4 pb-0 pe-lg-0 pt-lg-5 align-items-center rounded-3 border shadow-lg">
|
<div class="row p-4 pb-0 pe-lg-0 pt-lg-5 align-items-center rounded-3 border shadow-lg">
|
||||||
<div class="col-lg-12 p-3 p-lg-5 pt-lg-3">
|
<div class="col-lg-12 p-3 p-lg-5 pt-lg-3">
|
||||||
|
<% if (typeof h.readmeFm(directory) !== 'undefined' && h.readmeFm(directory).title) { %>
|
||||||
|
<h1 class="title"><%= h.readmeFm(directory).title %></h1>
|
||||||
|
<p>
|
||||||
|
<small>
|
||||||
|
<%if (typeof h.readmeFm(directory).docDate !== 'undefined') { %><b>Document Date:</b> <%= h.moment(h.readmeFm(directory).docDate).format('MMMM D, YYYY') %><% } %>
|
||||||
|
</small>
|
||||||
|
</p>
|
||||||
|
<% } %>
|
||||||
<%- h.printReadme(directory) %>
|
<%- h.printReadme(directory) %>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
49
gulpfile.mts
49
gulpfile.mts
|
@ -16,6 +16,9 @@ import { SolrDocument } from './app/search/solr-doc.mjs';
|
||||||
// import ts from 'gulp-typescript';
|
// import ts from 'gulp-typescript';
|
||||||
import run from 'gulp-run';
|
import run from 'gulp-run';
|
||||||
import { deleteAsync } from 'del';
|
import { deleteAsync } from 'del';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
gulp.task('build:typescript:compile', () => {
|
gulp.task('build:typescript:compile', () => {
|
||||||
// const tsProject = ts.createProject('tsconfig.build.json');
|
// const tsProject = ts.createProject('tsconfig.build.json');
|
||||||
|
@ -229,7 +232,7 @@ gulp.task('index:laws', async () => {
|
||||||
|
|
||||||
// Create a writable stream to capture the extracted text content into a string
|
// Create a writable stream to capture the extracted text content into a string
|
||||||
let extractedText = '';
|
let extractedText = '';
|
||||||
const writableStream = new WriteStream({
|
const writableStream = new Writable({
|
||||||
write(chunk, encoding, callback) {
|
write(chunk, encoding, callback) {
|
||||||
extractedText += chunk.toString(); // Append the chunk to the extracted text
|
extractedText += chunk.toString(); // Append the chunk to the extracted text
|
||||||
callback();
|
callback();
|
||||||
|
@ -271,21 +274,29 @@ gulp.task('index:docs', async () => {
|
||||||
//let scanExts = ''; //set to empty string to scan all
|
//let scanExts = ''; //set to empty string to scan all
|
||||||
let scanExts = '.{pdf,docx,pptx,xlsx,jpg,png,txt,mkv}';
|
let scanExts = '.{pdf,docx,pptx,xlsx,jpg,png,txt,mkv}';
|
||||||
let globs = [
|
let globs = [
|
||||||
`Amys_Drop_Box/**/*${scanExts}`,
|
`2024-02-16 FOIA Response/*${scanExts}`, `2024-02-16 FOIA Response/**/*${scanExts}`,
|
||||||
`CRS_Reports/**/*${scanExts}`,
|
`Amys_Drop_Box/*${scanExts}`, `Amys_Drop_Box/**/*${scanExts}`,
|
||||||
`Mine_Safety_and_Health_Administration/**/*${scanExts}`,
|
`CRS_Reports/*${scanExts}`, `CRS_Reports/**/*${scanExts}`,
|
||||||
`Potesta_&_Associates/**/*${scanExts}`,
|
`Cumberland_Plateau_Planning_District_Commission/*${scanExts}`, `Cumberland_Plateau_Planning_District_Commission/**/*${scanExts}`,
|
||||||
`Russell_County/**/*${scanExts}`,
|
`David_Eaton_District_4_-_Supervisor_Screenshot_Collection/*${scanExts}`, `David_Eaton_District_4_-_Supervisor_Screenshot_Collection/**/*${scanExts}`,
|
||||||
`Russell_County_Reclamation_LLC/**/*${scanExts}`,
|
`Environmental_Protection_Agency/*${scanExts}`, `Environmental_Protection_Agency/**/*${scanExts}`,
|
||||||
`Tobacco_Region_Revitalization_Commission/**/*${scanExts}`,
|
`Gentry_Locke/*${scanExts}`, `Gentry_Locke/**/*${scanExts}`,
|
||||||
`United_Mine_Workers_of_America/**/*${scanExts}`,
|
`Mine_Safety_and_Health_Administration/*${scanExts}`, `Mine_Safety_and_Health_Administration/**/*${scanExts}`,
|
||||||
`Virginia_Energy/**/*${scanExts}`,
|
`News/*${scanExts}`, `News/**/*${scanExts}`,
|
||||||
|
`Potesta_&_Associates/*${scanExts}`, `Potesta_&_Associates/**/*${scanExts}`,
|
||||||
|
`Russell_County/*${scanExts}`, `Russell_County/**/*${scanExts}`,
|
||||||
|
`Scott_County/*${scanExts}`, `Scott_County/**/*${scanExts}`,
|
||||||
|
`Tobacco_Region_Revitalization_Commission/*${scanExts}`, `Tobacco_Region_Revitalization_Commission/**/*${scanExts}`,
|
||||||
|
`United_Mine_Workers_of_America/*${scanExts}`, `United_Mine_Workers_of_America/**/*${scanExts}`,
|
||||||
|
`Virginia_Energy/*${scanExts}`, `Virginia_Energy/**/*${scanExts}`,
|
||||||
|
`Virginia_Governor/*${scanExts}`, `Virginia_Governor/**/*${scanExts}`,
|
||||||
|
`Virginia_Tech/*${scanExts}`, `Virginia_Tech/**/*${scanExts}`,
|
||||||
// I want to put Virginia Law in its own search category first.
|
// I want to put Virginia Law in its own search category first.
|
||||||
// `Virginia_Law_Library/**/*${scanExts}`,
|
// `Virginia_Law_Library/**/*${scanExts}`,
|
||||||
];
|
];
|
||||||
// Use glob to match files in the local directories
|
// Use glob to match files in the local directories
|
||||||
let files: string[] = [];
|
let files: string[] = [];
|
||||||
let cwd = path.resolve(__dirname, config.publicPath.replaceAll('/', path.sep));
|
let cwd = path.resolve(__dirname, '..', config.publicPath.replaceAll('/', path.sep));
|
||||||
globs.forEach(async (globPattern) => {
|
globs.forEach(async (globPattern) => {
|
||||||
files = files.concat(glob.globSync(globPattern, {
|
files = files.concat(glob.globSync(globPattern, {
|
||||||
cwd,
|
cwd,
|
||||||
|
@ -343,7 +354,7 @@ gulp.task('index:docs', async () => {
|
||||||
// Create a Readable stream for the file contents
|
// Create a Readable stream for the file contents
|
||||||
let f = fs.createReadStream(fileFullPath);
|
let f = fs.createReadStream(fileFullPath);
|
||||||
// Create a writable stream to capture the extracted text content into a string
|
// Create a writable stream to capture the extracted text content into a string
|
||||||
const writableStream = new WriteStream({
|
const writableStream = new Writable({
|
||||||
write(chunk, encoding, callback) {
|
write(chunk, encoding, callback) {
|
||||||
extractedText += chunk.toString(); // Append the chunk to the extracted text
|
extractedText += chunk.toString(); // Append the chunk to the extracted text
|
||||||
callback();
|
callback();
|
||||||
|
@ -376,13 +387,13 @@ gulp.task('index:docs', async () => {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Save a copy of the text into a file
|
// Save a copy of the text into a file
|
||||||
const fileName = fileFullPath + ".md";
|
// const fileName = fileFullPath + ".md";
|
||||||
try {
|
// try {
|
||||||
fs.writeFileSync(fileName, extractedText);
|
// fs.writeFileSync(fileName, extractedText);
|
||||||
console.log(`File ${fileName} has been written successfully!`);
|
// console.log(`File ${fileName} has been written successfully!`);
|
||||||
} catch (err) {
|
// } catch (err) {
|
||||||
console.error('Error writing to file', err);
|
// console.error('Error writing to file', err);
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Send document to Solr for indexing
|
// Send document to Solr for indexing
|
||||||
// Index the file with its text content and metadata
|
// Index the file with its text content and metadata
|
||||||
|
|
223
one-time-conversion.mts
Normal file
223
one-time-conversion.mts
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
/**
|
||||||
|
* Operation 1: Clear orphan index entries which have no file in public. Log output. Keep this gulp operation for later, call it 'index:clean-missing'.
|
||||||
|
*
|
||||||
|
* 1. Connect to Solr.
|
||||||
|
* 2. Query * for all the documents. Return all the records and all the fields.
|
||||||
|
* 3. For each of the records, check that the file exists. If the file does not exist on the filesystem, delete the index record.
|
||||||
|
*
|
||||||
|
* Operation 2: Save the stored records to the `${pathToFile}.md` file.
|
||||||
|
*
|
||||||
|
* 4. Check if the new file exists. If it exists, exit the procedure.
|
||||||
|
* 5. If it does not exist, then create it.
|
||||||
|
* 6. Write the contents to the file. Particularly, every file should have a YAML header. In that YAML header I want the following information stored:
|
||||||
|
* ---
|
||||||
|
* type: document
|
||||||
|
* title: document title
|
||||||
|
* mimeType: text/plain, application/vnd-word, etc.; use known good list
|
||||||
|
* tags:
|
||||||
|
* - customized based on path, perhaps one tag per folder name
|
||||||
|
* path: relative path to document file
|
||||||
|
* sha256sum: checksum for SHA2-256 checksum
|
||||||
|
* sha1sum: checksum for SHA1 checksum
|
||||||
|
* ---
|
||||||
|
* ...document plain text from the OCR operations.
|
||||||
|
*
|
||||||
|
* Operation 3: Save the Solr record without arrays into a second Solr instance.
|
||||||
|
*
|
||||||
|
* 7. Connect to second Solr instance.
|
||||||
|
* 8. Store document correctly according to the new schema without additional cardinality of the search fields.
|
||||||
|
* 9. Index all information in the .md (frontmatter and plaintext) schema into the Solr schema.
|
||||||
|
*
|
||||||
|
* Operation 4: Switch all code to use new Solr schema.
|
||||||
|
*
|
||||||
|
* 10. Update references to Solr so they use the new instance.
|
||||||
|
*
|
||||||
|
* Operation 5: Convert indexing code so that the Tika->.md file generation occurs in one operation and the .md->Solr scan occurs in another.
|
||||||
|
*
|
||||||
|
* 11. Run both operations to reindex the site.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** OPERATION 1: */
|
||||||
|
|
||||||
|
import axios from 'axios';
|
||||||
|
import { config } from './app/config.mjs';
|
||||||
|
import { SolrDocument, IncorrectStyleSolrDocument } from './app/search/solr-doc.mjs';
|
||||||
|
import request from 'request-promise-native';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import fm from 'gray-matter';
|
||||||
|
|
||||||
|
// Function to retrieve metadata of a file from Solr
|
||||||
|
async function getAllSolrDocuments(start: number = 0, rows: number = 10) {
|
||||||
|
const res = await axios({ method: 'get', url: `${config.solrDocsUrl}/select?q=*:*&start=${start}&rows=${rows}`, responseType: 'json' })
|
||||||
|
return res.data.response.docs;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function removeSolrDocument(id: string) {
|
||||||
|
const res = await axios({ method: 'post', url: `${config.solrDocsUrl}/update/json`, responseType: 'json', data: { commit: {}, delete: [ id ] } });
|
||||||
|
return res.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
const existingSolrDocuments: IncorrectStyleSolrDocument[] = (await getAllSolrDocuments(0, 65535));
|
||||||
|
const validSolrDocuments: StoredSolrDocument[] = [];
|
||||||
|
const stripLeftText = "https://no-moss-3-carbo-landfill-library.online/";
|
||||||
|
|
||||||
|
interface StoredSolrDocument extends SolrDocument {
|
||||||
|
relativePathToFile: string;
|
||||||
|
absolutePathToFile: string;
|
||||||
|
doesExist: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const oldSolrDoc of existingSolrDocuments) {
|
||||||
|
const solrDoc: StoredSolrDocument = {
|
||||||
|
id: oldSolrDoc.id,
|
||||||
|
content_length: oldSolrDoc.content_length && oldSolrDoc.content_length.length ? oldSolrDoc.content_length[0] : 0,
|
||||||
|
content_type: oldSolrDoc.content_type && oldSolrDoc.content_type.length ? oldSolrDoc.content_type[0] : '',
|
||||||
|
sha256sum: oldSolrDoc.sha256sum && oldSolrDoc.sha256sum.length ? oldSolrDoc.sha256sum[0] : '',
|
||||||
|
text: oldSolrDoc.text && oldSolrDoc.text.length ? oldSolrDoc.text[0] : '',
|
||||||
|
url: oldSolrDoc.url && oldSolrDoc.url.length ? oldSolrDoc.url[0] : '',
|
||||||
|
_version_: oldSolrDoc._version_,
|
||||||
|
relativePathToFile: '',
|
||||||
|
absolutePathToFile: '',
|
||||||
|
doesExist: false,
|
||||||
|
};
|
||||||
|
solrDoc.relativePathToFile = (solrDoc.url.startsWith(stripLeftText) ? solrDoc.url.substring(stripLeftText.length) : solrDoc.url).replaceAll('/', path.sep);
|
||||||
|
solrDoc.absolutePathToFile = path.resolve(path.join(config.publicPath, solrDoc.relativePathToFile));
|
||||||
|
solrDoc.doesExist = fs.existsSync(solrDoc.absolutePathToFile);
|
||||||
|
// build the correct object
|
||||||
|
if (!solrDoc.doesExist) {
|
||||||
|
await removeSolrDocument(solrDoc.id).then((response) => {
|
||||||
|
console.log(`Removed doc from search index because the file doesn't exist on the volume:`);
|
||||||
|
console.log(` id: ${solrDoc.id}`);
|
||||||
|
console.log(` url: ${solrDoc.url}`);
|
||||||
|
console.log(` path: ${solrDoc.absolutePathToFile}`);
|
||||||
|
console.log(` Response:`, response);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
validSolrDocuments.push(solrDoc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** OPERATION 2: */
|
||||||
|
import moment from 'moment';
|
||||||
|
import { mkdir } from 'fs/promises';
|
||||||
|
function resolveTitleFromFileName(fileName: string) {
|
||||||
|
return path.basename(fileName, path.extname(fileName)).replaceAll('_', ' ')
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveDateFromFileName(fileName: string) {
|
||||||
|
// style 1: convert: 2024-06-19_00_25_00 to: 2024-06-19 12:25:00 am
|
||||||
|
const m1 = 'YYYY-MM-DD hh:mm:ss a';
|
||||||
|
const re1 = /^(\d{4})-(\d{2})-(\d{2})_(\d{2})_(\d{2})_(\d{2}).*/g;
|
||||||
|
// style 2: convert: 2024-06-19_00_25 to: 2024-06-19 12:25:00 am
|
||||||
|
const m2 = m1;
|
||||||
|
const re2 = /^(\d{4})-(\d{2})-(\d{2})_(\d{2})_(\d{2}).*/g;
|
||||||
|
// style 3: convert: 2024-06-19 to: 2024-06-19
|
||||||
|
const re3 = /^(\d{4})-(\d{2})-(\d{2}).*/g;
|
||||||
|
// style 4: convert: 2024-06 to: 2024-06
|
||||||
|
const re4 = /^(\d{4})-(\d{2}).*/g;
|
||||||
|
// style 5: convert: 2024 to: 2024
|
||||||
|
const re5 = /^(\d{4}).*/g;
|
||||||
|
if (fileName.search(re1) >= 0) {
|
||||||
|
return moment(fileName.replace(re1, `$1-$2-$3 $4:$5:$6`)).format(m1);
|
||||||
|
}
|
||||||
|
else if (fileName.search(re2) >= 0) {
|
||||||
|
return moment(fileName.replace(re2, `$1-$2-$3 $4:$5`)).format(m2);
|
||||||
|
}
|
||||||
|
else if (fileName.search(re3) >= 0) {
|
||||||
|
return fileName.replace(re3, `$1-$2-$3`);
|
||||||
|
}
|
||||||
|
else if (fileName.search(re4) >= 0) {
|
||||||
|
return fileName.replace(re4, `$1-$2`);
|
||||||
|
}
|
||||||
|
else if (fileName.search(re5) >= 0) {
|
||||||
|
return fileName.replace(re5, `$1`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveTagsFromRelativeFilePath(relFilePath: string) {
|
||||||
|
let tags = relFilePath.split(path.sep);
|
||||||
|
const fileName = tags.pop()!;
|
||||||
|
const dateTag = resolveDateFromFileName(fileName!);
|
||||||
|
if (dateTag) {
|
||||||
|
// style 3: convert: 2024-06-19 to: 2024-06-19
|
||||||
|
const re3 = /^(\d{4})-(\d{2})-(\d{2}).*/g;
|
||||||
|
// style 4: convert: 2024-06 to: 2024-06
|
||||||
|
const re4 = /^(\d{4})-(\d{2}).*/g;
|
||||||
|
// style 5: convert: 2024 to: 2024
|
||||||
|
const re5 = /^(\d{4}).*/g;
|
||||||
|
if (fileName.search(re5) >= 0) {
|
||||||
|
tags.push(fileName.replace(re5, `$1`));
|
||||||
|
}
|
||||||
|
if (fileName.search(re4) >= 0) {
|
||||||
|
tags.push(fileName.replace(re4, `$1-$2`));
|
||||||
|
}
|
||||||
|
if (fileName.search(re3) >= 0) {
|
||||||
|
tags.push(fileName.replace(re3, `$1-$2-$3`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
import crypto from 'crypto';
|
||||||
|
async function calculateSHA1Hash(filePath: string) {
|
||||||
|
return new Promise<string>((resolve, reject) => {
|
||||||
|
const readStream = fs.createReadStream(filePath);
|
||||||
|
const hash = crypto.createHash('SHA1');
|
||||||
|
|
||||||
|
readStream.on('data', (chunk) => {
|
||||||
|
hash.update(chunk);
|
||||||
|
});
|
||||||
|
|
||||||
|
readStream.on('end', () => {
|
||||||
|
const sha1Hash: string = hash.digest('hex');
|
||||||
|
resolve(sha1Hash);
|
||||||
|
});
|
||||||
|
|
||||||
|
readStream.on('error', (error) => {
|
||||||
|
reject(error);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const solrDoc of validSolrDocuments) {
|
||||||
|
//guard: don't write this file for video files, all of which conveniently contain the string YouTube_Archive
|
||||||
|
if (solrDoc.relativePathToFile.indexOf('YouTube_Archive') >= 0 || solrDoc.relativePathToFile.indexOf('2024-03-20-Governor_Youngkin_Listening_Session') >= 0) {
|
||||||
|
console.log(`This Solr document is for a YouTube_Archive video, which uses a different metadata technique. Skipping creation of markdown file in folder.`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
//guard: don't write this
|
||||||
|
const mdFolderPath = path.join(path.dirname(solrDoc.absolutePathToFile), path.basename(solrDoc.absolutePathToFile, path.extname(solrDoc.absolutePathToFile)));
|
||||||
|
if (fs.existsSync(mdFolderPath)) {
|
||||||
|
console.log(`Directory at ${mdFolderPath} already exists. Continuing check for markdown file in folder.`);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//create folder
|
||||||
|
await mkdir(mdFolderPath);
|
||||||
|
console.log(`Directory at ${mdFolderPath} created.`);
|
||||||
|
}
|
||||||
|
const mdFilePath = path.join(mdFolderPath, 'README.md');
|
||||||
|
// if (fs.existsSync(mdFilePath)) {
|
||||||
|
// console.log(`Markdown file at ${mdFilePath} already exists. Skipping creation of markdown file in folder.`);
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
console.log(solrDoc.id);
|
||||||
|
console.log(`Saving new markdown file at ${mdFilePath}:`);
|
||||||
|
const mdFileContents = fm.stringify(solrDoc.text, {
|
||||||
|
type: 'document',
|
||||||
|
title: resolveTitleFromFileName(solrDoc.absolutePathToFile),
|
||||||
|
file: path.posix.join('..', path.basename(solrDoc.absolutePathToFile)),
|
||||||
|
tags: resolveTagsFromRelativeFilePath(solrDoc.relativePathToFile),
|
||||||
|
docDate: resolveDateFromFileName(path.basename(solrDoc.absolutePathToFile))||null,
|
||||||
|
contentType: solrDoc.content_type,
|
||||||
|
contentLength: solrDoc.content_length,
|
||||||
|
sha256sum: solrDoc.sha256sum,
|
||||||
|
sha1sum: await calculateSHA1Hash(solrDoc.absolutePathToFile),
|
||||||
|
});
|
||||||
|
let ws = fs.createWriteStream(mdFilePath);
|
||||||
|
ws.write(mdFileContents);
|
||||||
|
ws.close();
|
||||||
|
// }
|
||||||
|
}
|
|
@ -10,10 +10,10 @@
|
||||||
"test": "echo \"Error: no test specified\" && exit 1",
|
"test": "echo \"Error: no test specified\" && exit 1",
|
||||||
"transpile:ts": "tsc -project tsconfig.pre-build.json",
|
"transpile:ts": "tsc -project tsconfig.pre-build.json",
|
||||||
"index": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index",
|
"index": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index",
|
||||||
"index:clear": "tsc -project tsconfig.json && gulp -f dist/gulpfile.mjs index:clear",
|
"index:clear": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index:clear",
|
||||||
"index:docs": "tsc -project tsconfig.json && gulp -f dist/gulpfile.mjs index:docs",
|
"index:docs": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index:docs",
|
||||||
"index:laws": "tsc -project tsconfig.json && gulp -f dist/gulpfile.mjs index:laws",
|
"index:laws": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index:laws",
|
||||||
"index:reindex": "tsc -project tsconfig.json && gulp -f dist/gulpfile.mjs index:reindex"
|
"index:reindex": "tsc -project tsconfig.pre-build.json && gulp -f dist/gulpfile.mjs index:reindex"
|
||||||
},
|
},
|
||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user