forked from nm3clol/nm3clol-express-app
103 lines
3.5 KiB
TypeScript
103 lines
3.5 KiB
TypeScript
import { ReadStream } from 'fs'
|
|
import fetch from 'node-fetch'
|
|
let join = (...args: String[]) => {
|
|
let output = "";
|
|
args.forEach((arg) => {
|
|
output += arg;
|
|
})
|
|
return output;
|
|
};
|
|
import { ContentResource, MetadataResource } from './types.mjs'
|
|
import { Writable } from 'stream';
|
|
|
|
export interface TikaClientOptions {
|
|
host: string
|
|
}
|
|
|
|
export type TikaContentType = 'text/plain' | 'text/xml' | 'text/html' | 'application/json'
|
|
|
|
export interface TikaGetTextOptions {
|
|
filename?: string
|
|
contentType: TikaContentType
|
|
}
|
|
|
|
export class TikaClient {
|
|
private metaUrl: string
|
|
private tikaUrl: string
|
|
|
|
constructor(private options: TikaClientOptions) {
|
|
this.metaUrl = join(options.host, '/meta')
|
|
this.tikaUrl = join(options.host, '/tika')
|
|
}
|
|
|
|
async getMetadata(body: ReadStream): Promise<MetadataResource> {
|
|
const response = await fetch(this.metaUrl, {
|
|
method: 'PUT',
|
|
body,
|
|
headers: { 'Accept': 'application/json', 'Content-Disposition': 'attachment' }
|
|
})
|
|
return response.json()
|
|
}
|
|
|
|
async getContent(body: ReadStream, contentType: Exclude<TikaContentType, 'application/json'>, filename?: string): Promise<string>
|
|
async getContent(body: ReadStream, contentType: Extract<TikaContentType, 'application/json'>, filename?: string): Promise<ContentResource>
|
|
async getContent(body: ReadStream, contentType: TikaContentType, filename?: string): Promise<string | ContentResource> {
|
|
const response = await fetch(this.tikaUrl, {
|
|
method: 'PUT',
|
|
body,
|
|
headers: {
|
|
'Accept': contentType,
|
|
'Content-Disposition': `attachment${filename ? `; filename=${filename}` : ''}`
|
|
}
|
|
})
|
|
return (contentType === 'application/json') ? response.json() : response.text()
|
|
}
|
|
|
|
async getStream(body: ReadStream, contentType: TikaContentType, filename?: string): Promise<NodeJS.ReadableStream> {
|
|
const response = await fetch(this.tikaUrl, {
|
|
method: 'PUT',
|
|
body,
|
|
headers: {
|
|
'Accept': contentType,
|
|
'Content-Disposition': `attachment${filename ? `; filename=${filename}` : ''}`
|
|
}
|
|
})
|
|
return response.body
|
|
}
|
|
|
|
async pipe(readStream: ReadStream, writeStream: Writable, contentType: TikaContentType = 'text/plain', filename?: string): Promise<void> {
|
|
const tikaStream = await this.getStream(readStream, contentType, filename)
|
|
return new Promise((resolve, reject) => {
|
|
const stream = tikaStream.pipe(writeStream)
|
|
stream.on('error', (error) => { reject(error) })
|
|
stream.on('finish', () => { resolve() })
|
|
})
|
|
}
|
|
|
|
private async getResource<T>(resource: string, accept: 'application/json'): Promise<T>
|
|
private async getResource(resource: string, accept: 'text/plain'): Promise<string>
|
|
private async getResource<T>(resource: string, accept: 'application/json' | 'text/plain' = 'application/json'): Promise<T> {
|
|
const response = await fetch(join(this.options.host, resource), {
|
|
method: 'GET',
|
|
headers: { 'Accept': accept }
|
|
})
|
|
if (accept === 'text/plain') {
|
|
return response.text() as T
|
|
} else {
|
|
return response.json()
|
|
}
|
|
}
|
|
|
|
getMimeTypes() { return this.getResource('/mime-types', 'application/json') }
|
|
|
|
getDetectors() { return this.getResource('/detectors', 'application/json') }
|
|
|
|
getParsers() { return this.getResource('/parsers', 'application/json') }
|
|
|
|
getDetailedParsers() { return this.getResource('/parsers/details', 'application/json') }
|
|
|
|
async getVersion() {
|
|
const response = await this.getResource('/version', 'text/plain')
|
|
return response.trim()
|
|
}
|
|
} |