import { createDownloader, CreateDownloaderParams, Kanjidic, JMdict, JMnedict, KRadFile, RadKFile, createXmlToJsonLines, } from '@modal-sh/murasaki-core'; import { createWriteStream, readFileSync, writeFileSync } from 'fs'; import { PassThrough } from 'stream'; export interface InitService { downloadDataset(params: CreateDownloaderParams): Promise; } interface ManifestDatasetEntry { createdAt: number; lastUpdatedAt: number; } type ManifestData = Record; export class InitServiceImpl implements InitService { private readonly manifestFilename = '.murasaki.json' as const; private readonly manifestFileEncoding = 'utf-8' as const; private readonly data: ManifestData; constructor() { try { const dataBuffer = readFileSync(this.manifestFilename); const dataJsonString = dataBuffer.toString(this.manifestFileEncoding); this.data = JSON.parse(dataJsonString) as Record; } catch { this.data = {}; } } private commitDatasetMetadata(): ManifestData { writeFileSync(this.manifestFilename, JSON.stringify(this.data), { flag: 'w' }); return this.data; } private static getParserStreams(type: CreateDownloaderParams['type']) { switch (type) { case Kanjidic.SOURCE_ID: return createXmlToJsonLines({ entryTagName: 'character', }); case JMnedict.SOURCE_ID: case JMdict.SOURCE_ID: return createXmlToJsonLines({ entryTagName: 'entry', }); default: break; } return new PassThrough(); } async downloadDataset(params: CreateDownloaderParams): Promise { const downloader = await createDownloader(params); return new Promise((resolve, reject) => { const parserStream = InitServiceImpl.getParserStreams(params.type); const out = createWriteStream(`${params.type}.jsonl`); parserStream.on('error', (error) => { try { this.commitDatasetMetadata(); } catch { // noop } reject(new Error(`Error when parsing dataset: ${params.type as unknown as string}`, { cause: error })); }); out.on('finish', () => { const now = Date.now(); this.data[params.type] = { ...(this.data[params.type] ?? {}), createdAt: this.data[params.type]?.createdAt ?? now, lastUpdatedAt: now, }; try { this.commitDatasetMetadata(); resolve(this.data); } catch { reject(new Error(`Error when committing metadata for dataset: ${params.type as unknown as string}`)); } }); downloader .pipe(parserStream) .pipe(out); }); } }