Browse Source

Update tests

Include tests for other sources.
master
TheoryOfNekomata 1 year ago
parent
commit
3bd1eeb49b
6 changed files with 300 additions and 100 deletions
  1. +18
    -1
      package.json
  2. +27
    -26
      src/streams.ts
  3. +0
    -73
      test/index.test.ts
  4. +85
    -0
      test/sources/jmdict.test.ts
  5. +85
    -0
      test/sources/jmnedict.test.ts
  6. +85
    -0
      test/sources/kanjidic.test.ts

+ 18
- 1
package.json View File

@@ -1,5 +1,5 @@
{
"name": "murasaki-core",
"name": "@modal-sh/murasaki-core",
"version": "0.0.0",
"files": [
"dist",
@@ -49,5 +49,22 @@
"dependencies": {
"fetch-ponyfill": "^7.1.0",
"xml-js": "^1.6.11"
},
"types": "./dist/types/index.d.ts",
"main": "./dist/cjs/production/index.js",
"module": "./dist/esm/production/index.js",
"exports": {
".": {
"development": {
"require": "./dist/cjs/development/index.js",
"import": "./dist/esm/development/index.js"
},
"require": "./dist/cjs/production/index.js",
"import": "./dist/esm/production/index.js",
"types": "./dist/types/index.d.ts"
}
},
"typesVersions": {
"*": {}
}
}

+ 27
- 26
src/streams.ts View File

@@ -15,33 +15,34 @@ class XmlToJsonLinesTransformStream extends Transform {

// eslint-disable-next-line no-underscore-dangle
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: TransformCallback) {
const chunkStr = chunk.toString('utf-8');
if (typeof this.charactersToParse !== 'string') {
const firstEntryIndex = chunkStr.indexOf(`<${this.options.entryTagName}>`);
this.charactersToParse = chunkStr.slice(firstEntryIndex);
} else {
this.charactersToParse += chunkStr;
}

let theCharacters = `${this.charactersToParse}`;
let nextOpenTagIndex = theCharacters.indexOf(`<${this.options.entryTagName}>`);
let nextCloseTagIndex = theCharacters.indexOf(`</${this.options.entryTagName}>`);
do {
const xml = theCharacters
.slice(
nextOpenTagIndex,
nextCloseTagIndex + this.options.entryTagName.length + 3,
)
.replace(/&(.+?);/g, '$1'); // FIXME better handling of XML entities??? This makes the pipe hang for some reason
const json = xml2json(xml, { compact: true });
this.push(`${json}\n`);
theCharacters = theCharacters.slice(nextCloseTagIndex + this.options.entryTagName.length + 3);
nextOpenTagIndex = theCharacters.indexOf(`<${this.options.entryTagName}>`);
nextCloseTagIndex = theCharacters.indexOf(`</${this.options.entryTagName}>`);
} while (nextOpenTagIndex !== -1 && nextCloseTagIndex !== -1);
this.charactersToParse = theCharacters;
try {
const chunkStr = chunk.toString('utf-8');
if (typeof this.charactersToParse !== 'string') {
const firstEntryIndex = chunkStr.indexOf(`<${this.options.entryTagName}>`);
this.charactersToParse = chunkStr.slice(firstEntryIndex);
} else {
this.charactersToParse += chunkStr;
}

callback(null, '');
let theCharacters = `${this.charactersToParse}`;
let nextOpenTagIndex = theCharacters.indexOf(`<${this.options.entryTagName}>`);
let nextCloseTagIndex = theCharacters.indexOf(`</${this.options.entryTagName}>`);
const closeTagLength = this.options.entryTagName.length + 3;
do {
const xml = theCharacters
.slice(nextOpenTagIndex, nextCloseTagIndex + closeTagLength)
.replace(/&(.+?);/g, '$1'); // FIXME better handling of XML entities??? This makes the pipe hang for some reason
const json = xml2json(xml, { compact: true });
this.push(`${json}\n`);
theCharacters = theCharacters.slice(nextCloseTagIndex + closeTagLength);
nextOpenTagIndex = theCharacters.indexOf(`<${this.options.entryTagName}>`);
nextCloseTagIndex = theCharacters.indexOf(`</${this.options.entryTagName}>`);
} while (nextOpenTagIndex !== -1 && nextCloseTagIndex !== -1);
this.charactersToParse = theCharacters;
callback(null, '');
} catch (err) {
callback(err as Error);
}
}
}



+ 0
- 73
test/index.test.ts View File

@@ -1,73 +0,0 @@
import { createReadStream, createWriteStream } from 'fs';
import { describe, it, expect } from 'vitest';
import { createDownloader, Kanjidic, JMdict, createXmlToJsonLines } from '../src';

describe('downloader', () => {
describe.skip('kanjidic', () => {
it.skip('works', async () => {
const readStream = await createDownloader({
type: Kanjidic.SOURCE_ID,
});

return new Promise<void>((resolve) => {
const out = createWriteStream('kanjidic2.xml');

out.on('finish', () => {
resolve();
});

readStream.pipe(out);
});
});

it.skip('converts XML to JSON', () => new Promise<void>((resolve) => {
const readStream = createReadStream('kanjidic2.xml', { encoding: 'utf-8' });
const transform = createXmlToJsonLines({
entryTagName: 'character',
});
const out = createWriteStream('kanjidic2.jsonl');

out.on('finish', () => {
resolve();
});

readStream
.pipe(transform)
.pipe(out);
}));
});

describe.skip('jmdict', () => {
it('works', async () => {
const readStream = await createDownloader({
type: JMdict.SOURCE_ID,
});

return new Promise<void>((resolve) => {
const out = createWriteStream('jmdict.xml');

out.on('finish', () => {
resolve();
});

readStream.pipe(out);
});
}, { timeout: 300000 });

it('converts XML to JSON', () => new Promise<void>((resolve) => {
const readStream = createReadStream('jmdict.full.xml', { encoding: 'utf-8' });
const transform = createXmlToJsonLines({
entryTagName: 'entry',
});
const out = createWriteStream('jmdict.jsonl');

out.on('finish', () => {
resolve();
});

readStream
.pipe(transform)
.pipe(out);
}), { timeout: 300000 });
});
});

+ 85
- 0
test/sources/jmdict.test.ts View File

@@ -0,0 +1,85 @@
import { afterAll, describe, it } from 'vitest';
import { createReadStream, createWriteStream } from 'fs';
import { unlink } from 'fs/promises';
import { createDownloader, createXmlToJsonLines, JMdict } from '../../src';

const DOWNLOAD_FILENAME = 'jmdict.xml' as const;
const PROCESS_OUTPUT_FILENAME = 'jmdict.jsonl' as const;

// FIXME use tests that do not download the source data

describe.skip('jmdict', () => {
afterAll(async () => {
try {
await unlink(DOWNLOAD_FILENAME);
} catch {
// noop
}
});

afterAll(async () => {
try {
await unlink(PROCESS_OUTPUT_FILENAME);
} catch {
// noop
}
});

describe('downloader', () => {
it('downloads the source data', async () => {
const readStream = await createDownloader({
type: JMdict.SOURCE_ID,
});

return new Promise<void>((resolve, reject) => {
const out = createWriteStream(DOWNLOAD_FILENAME);

readStream.on('error', (err) => {
reject(err);
});

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream.pipe(out);
});
}, { timeout: 30000 });
});

describe('converter', () => {
it('converts XML to JSON', () => new Promise<void>((resolve, reject) => {
const readStream = createReadStream(DOWNLOAD_FILENAME, { encoding: 'utf-8' });

readStream.on('error', (err) => {
reject(err);
});

const transform = createXmlToJsonLines({
entryTagName: 'entry',
});

transform.on('error', (err) => {
reject(err);
});

const out = createWriteStream(PROCESS_OUTPUT_FILENAME);

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream
.pipe(transform)
.pipe(out);
}), { timeout: 30000 });
});
});

+ 85
- 0
test/sources/jmnedict.test.ts View File

@@ -0,0 +1,85 @@
import { afterAll, describe, it } from 'vitest';
import { createReadStream, createWriteStream } from 'fs';
import { unlink } from 'fs/promises';
import { createDownloader, createXmlToJsonLines, JMnedict } from '../../src';

const DOWNLOAD_FILENAME = 'jmnedict.xml' as const;
const PROCESS_OUTPUT_FILENAME = 'jmnedict.jsonl' as const;

// FIXME use tests that do not download the source data

describe.skip('jmnedict', () => {
afterAll(async () => {
try {
await unlink(DOWNLOAD_FILENAME);
} catch {
// noop
}
});

afterAll(async () => {
try {
await unlink(PROCESS_OUTPUT_FILENAME);
} catch {
// noop
}
});

describe('downloader', () => {
it('downloads the source data', async () => {
const readStream = await createDownloader({
type: JMnedict.SOURCE_ID,
});

return new Promise<void>((resolve, reject) => {
const out = createWriteStream(DOWNLOAD_FILENAME);

readStream.on('error', (err) => {
reject(err);
});

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream.pipe(out);
});
}, { timeout: 30000 });
});

describe('converter', () => {
it('converts XML to JSON', () => new Promise<void>((resolve, reject) => {
const readStream = createReadStream(DOWNLOAD_FILENAME, { encoding: 'utf-8' });

readStream.on('error', (err) => {
reject(err);
});

const transform = createXmlToJsonLines({
entryTagName: 'entry',
});

transform.on('error', (err) => {
reject(err);
});

const out = createWriteStream(PROCESS_OUTPUT_FILENAME);

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream
.pipe(transform)
.pipe(out);
}), { timeout: 45000 });
});
});

+ 85
- 0
test/sources/kanjidic.test.ts View File

@@ -0,0 +1,85 @@
import { describe, it, afterAll } from 'vitest';
import { createReadStream, createWriteStream } from 'fs';
import { unlink } from 'fs/promises';
import { createDownloader, createXmlToJsonLines, Kanjidic } from '../../src';

const DOWNLOAD_FILENAME = 'kanjidic2.xml' as const;
const PROCESS_OUTPUT_FILENAME = 'kanjidic2.jsonl' as const;

// FIXME use tests that do not download the source data

describe.skip('kanjidic', () => {
afterAll(async () => {
try {
await unlink(DOWNLOAD_FILENAME);
} catch {
// noop
}
});

afterAll(async () => {
try {
await unlink(PROCESS_OUTPUT_FILENAME);
} catch {
// noop
}
});

describe('downloader', () => {
it('downloads the source data', async () => {
const readStream = await createDownloader({
type: Kanjidic.SOURCE_ID,
});

return new Promise<void>((resolve, reject) => {
const out = createWriteStream(DOWNLOAD_FILENAME);

readStream.on('error', (err) => {
reject(err);
});

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream.pipe(out);
});
});
});

describe('converter', () => {
it('converts XML to JSON', () => new Promise<void>((resolve, reject) => {
const readStream = createReadStream(DOWNLOAD_FILENAME, { encoding: 'utf-8' });

readStream.on('error', (err) => {
reject(err);
});

const transform = createXmlToJsonLines({
entryTagName: 'character',
});

transform.on('error', (err) => {
reject(err);
});

const out = createWriteStream(PROCESS_OUTPUT_FILENAME);

out.on('error', (err) => {
reject(err);
});

out.on('finish', () => {
resolve();
});

readStream
.pipe(transform)
.pipe(out);
}));
});
});

Loading…
Cancel
Save