Browse Source

Add JMnedict and KRADFILE datasets

Include other WWWJDIC datasets to sources.
master
TheoryOfNekomata 1 year ago
parent
commit
edd6c07324
11 changed files with 102 additions and 6 deletions
  1. +16
    -4
      src/index.ts
  2. +1
    -0
      src/sources/jmnedict/common.ts
  3. +25
    -0
      src/sources/jmnedict/downloader.ts
  4. +2
    -0
      src/sources/jmnedict/index.ts
  5. +1
    -0
      src/sources/kradfile/common.ts
  6. +25
    -0
      src/sources/kradfile/downloader.ts
  7. +2
    -0
      src/sources/kradfile/index.ts
  8. +1
    -0
      src/sources/radkfile/common.ts
  9. +25
    -0
      src/sources/radkfile/downloader.ts
  10. +2
    -0
      src/sources/radkfile/index.ts
  11. +2
    -2
      test/index.test.ts

+ 16
- 4
src/index.ts View File

@@ -1,18 +1,30 @@
import * as KanjidicImpl from './sources/kanjidic';
import * as JmdictImpl from './sources/jmdict';
import * as JMdictImpl from './sources/jmdict';
import * as JMnedictImpl from './sources/jmnedict';
import * as RadKFileImpl from './sources/radkfile';
import * as KRadFileImpl from './sources/kradfile';

const SUPPORTED_SOURCES = [
KanjidicImpl,
JmdictImpl,
JMdictImpl,
JMnedictImpl,
RadKFileImpl,
KRadFileImpl,
] as const;

export type CreateDownloaderParams = (
KanjidicImpl.CreateDownloaderParams
| JmdictImpl.CreateDownloaderParams
| JMdictImpl.CreateDownloaderParams
| JMnedictImpl.CreateDownloaderParams
| RadKFileImpl.CreateDownloaderParams
| KRadFileImpl.CreateDownloaderParams
);

export * as Kanjidic from './sources/kanjidic';
export * as Jmdict from './sources/jmdict';
export * as JMdict from './sources/jmdict';
export * as JMnedict from './sources/jmnedict';
export * as RadKFile from './sources/radkfile';
export * as KRadFile from './sources/kradfile';
export * from './streams';

export const createDownloader = (params: CreateDownloaderParams) => {


+ 1
- 0
src/sources/jmnedict/common.ts View File

@@ -0,0 +1 @@
export const SOURCE_ID = 'jmnedict' as const;

+ 25
- 0
src/sources/jmnedict/downloader.ts View File

@@ -0,0 +1,25 @@
import fetchPonyfill from 'fetch-ponyfill';
import { PassThrough } from 'stream';
import { createGunzip } from 'zlib';
import { SOURCE_ID } from './common';

export interface CreateDownloaderParams {
type: typeof SOURCE_ID;
url?: string;
}

const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/JMnedict.xml.gz' as const;

export const createDownloader = async (params: Omit<CreateDownloaderParams, 'type'>) => {
const { url = DEFAULT_SOURCE_URL } = params;
const { fetch } = fetchPonyfill();

const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to download: ${url}`);
}

const rawStream = response.body as unknown as PassThrough;
return rawStream
.pipe(createGunzip());
};

+ 2
- 0
src/sources/jmnedict/index.ts View File

@@ -0,0 +1,2 @@
export * from './common';
export * from './downloader';

+ 1
- 0
src/sources/kradfile/common.ts View File

@@ -0,0 +1 @@
export const SOURCE_ID = 'kradfile' as const;

+ 25
- 0
src/sources/kradfile/downloader.ts View File

@@ -0,0 +1,25 @@
import fetchPonyfill from 'fetch-ponyfill';
import { PassThrough } from 'stream';
import { createGunzip } from 'zlib';
import { SOURCE_ID } from './common';

export interface CreateDownloaderParams {
type: typeof SOURCE_ID;
url?: string;
}

const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/kradfile.gz' as const;

export const createDownloader = async (params: Omit<CreateDownloaderParams, 'type'>) => {
const { url = DEFAULT_SOURCE_URL } = params;
const { fetch } = fetchPonyfill();

const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to download: ${url}`);
}

const rawStream = response.body as unknown as PassThrough;
return rawStream
.pipe(createGunzip());
};

+ 2
- 0
src/sources/kradfile/index.ts View File

@@ -0,0 +1,2 @@
export * from './common';
export * from './downloader';

+ 1
- 0
src/sources/radkfile/common.ts View File

@@ -0,0 +1 @@
export const SOURCE_ID = 'radkfile' as const;

+ 25
- 0
src/sources/radkfile/downloader.ts View File

@@ -0,0 +1,25 @@
import fetchPonyfill from 'fetch-ponyfill';
import { PassThrough } from 'stream';
import { createGunzip } from 'zlib';
import { SOURCE_ID } from './common';

export interface CreateDownloaderParams {
type: typeof SOURCE_ID;
url?: string;
}

const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/radkfile.gz' as const;

export const createDownloader = async (params: Omit<CreateDownloaderParams, 'type'>) => {
const { url = DEFAULT_SOURCE_URL } = params;
const { fetch } = fetchPonyfill();

const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to download: ${url}`);
}

const rawStream = response.body as unknown as PassThrough;
return rawStream
.pipe(createGunzip());
};

+ 2
- 0
src/sources/radkfile/index.ts View File

@@ -0,0 +1,2 @@
export * from './common';
export * from './downloader';

+ 2
- 2
test/index.test.ts View File

@@ -1,6 +1,6 @@
import { createReadStream, createWriteStream } from 'fs';
import { describe, it, expect } from 'vitest';
import { createDownloader, Kanjidic, Jmdict, createXmlToJsonLines } from '../src';
import { createDownloader, Kanjidic, JMdict, createXmlToJsonLines } from '../src';

describe('downloader', () => {
describe.skip('kanjidic', () => {
@@ -40,7 +40,7 @@ describe('downloader', () => {
describe.skip('jmdict', () => {
it('works', async () => {
const readStream = await createDownloader({
type: Jmdict.SOURCE_ID,
type: JMdict.SOURCE_ID,
});

return new Promise<void>((resolve) => {


Loading…
Cancel
Save