Browse Source

Integrate core download features

Expose core download features for Web API.
master
TheoryOfNekomata 11 months ago
parent
commit
c20e339b64
3 changed files with 55 additions and 53 deletions
  1. +2
    -0
      .gitignore
  2. +7
    -2
      src/modules/init/InitController.ts
  3. +46
    -51
      src/modules/init/InitService.ts

+ 2
- 0
.gitignore View File

@@ -106,3 +106,5 @@ dist

.npmrc
types/
.murasaki.json
*.jsonl

+ 7
- 2
src/modules/init/InitController.ts View File

@@ -1,6 +1,7 @@
import {RouteHandlerMethod} from 'fastify';
import {CreateDownloaderParams} from '@modal-sh/murasaki-core';
import {InitService, InitServiceImpl} from './InitService';
import {constants} from 'http2';

export interface InitController {
downloadDataset: RouteHandlerMethod;
@@ -12,7 +13,11 @@ export class InitControllerImpl implements InitController {
}

readonly downloadDataset: RouteHandlerMethod = async (request, reply) => {
const result = await this.initService.downloadDataset(request.body as CreateDownloaderParams);
reply.send(result);
try {
const result = await this.initService.downloadDataset(request.body as CreateDownloaderParams);
reply.send(result);
} catch (err) {
reply.status(constants.HTTP_STATUS_INTERNAL_SERVER_ERROR).send(err);
}
};
}

+ 46
- 51
src/modules/init/InitService.ts View File

@@ -8,7 +8,8 @@ import {
RadKFile,
createXmlToJsonLines,
} from '@modal-sh/murasaki-core';
import { createWriteStream, readFileSync } from 'fs';
import { createWriteStream, readFileSync, writeFileSync } from 'fs';
import { PassThrough } from 'stream';

export interface InitService {
downloadDataset(params: CreateDownloaderParams): Promise<ManifestData>;
@@ -22,7 +23,6 @@ interface ManifestDatasetEntry {
type ManifestData = Record<string, ManifestDatasetEntry>;

export class InitServiceImpl implements InitService {
private manifestWriteStream?: NodeJS.WritableStream;

private readonly manifestFilename = '.murasaki.json' as const;

@@ -40,71 +40,66 @@ export class InitServiceImpl implements InitService {
}
}

private async commitDatasetMetadata(): Promise<ManifestData> {
return new Promise<ManifestData>((resolve, reject) => {
this.manifestWriteStream = createWriteStream(this.manifestFilename, {
flags: 'w',
});
this.manifestWriteStream.on('error', reject);
this.manifestWriteStream.write(JSON.stringify(this.data));
this.manifestWriteStream.end(() => {
resolve(this.data);
});
});
private commitDatasetMetadata(): ManifestData {
writeFileSync(this.manifestFilename, JSON.stringify(this.data), { flag: 'w' });
return this.data;
}

private static getParserStreams(type: CreateDownloaderParams['type']) {
switch (type) {
case Kanjidic.SOURCE_ID:
return createXmlToJsonLines({
entryTagName: 'character',
});
case JMnedict.SOURCE_ID:
case JMdict.SOURCE_ID:
return createXmlToJsonLines({
entryTagName: 'entry',
});
default:
break;
}

return new PassThrough();
}

async downloadDataset(params: CreateDownloaderParams): Promise<ManifestData> {
const downloader = await createDownloader(params);

return new Promise<ManifestData>((resolve, reject) => {
const parserStream = InitServiceImpl.getParserStreams(params.type);

const out = createWriteStream(`${params.type}.jsonl`);

parserStream.on('error', (error) => {
try {
this.commitDatasetMetadata();
} catch {
// noop
}

reject(new Error(`Error when parsing dataset: ${params.type as unknown as string}`, { cause: error }));
});

out.on('finish', () => {
const now = Date.now();
this.data[params.type] = {
...this.data[params.type],
createdAt: this.data[params.type].createdAt ?? now,
...(this.data[params.type] ?? {}),
createdAt: this.data[params.type]?.createdAt ?? now,
lastUpdatedAt: now,
};

this.commitDatasetMetadata()
.then(resolve)
.catch(reject);
try {
this.commitDatasetMetadata();
resolve(this.data);
} catch {
reject(new Error(`Error when committing metadata for dataset: ${params.type as unknown as string}`));
}
});

switch (params.type) {
case Kanjidic.SOURCE_ID: {
const jsonlParser = createXmlToJsonLines({
entryTagName: 'character',
});

downloader
.pipe(jsonlParser)
.pipe(out);
} return;
case JMnedict.SOURCE_ID:
case JMdict.SOURCE_ID: {
const jsonlParser = createXmlToJsonLines({
entryTagName: 'entry',
});

downloader
.pipe(jsonlParser)
.pipe(out);
} return;
case KRadFile.SOURCE_ID:
case RadKFile.SOURCE_ID:
downloader.pipe(out);
return;
default:
break;
}

this.commitDatasetMetadata()
.then(() => {
reject(new Error(`Unknown dataset: ${params.type as unknown as string}`));
})
.catch(reject);
downloader
.pipe(parserStream)
.pipe(out);
});
}
}

Loading…
Cancel
Save