From edd6c0732434a4c11aa7b5266ae7d13d73edaac0 Mon Sep 17 00:00:00 2001 From: TheoryOfNekomata Date: Sat, 6 May 2023 11:01:42 +0800 Subject: [PATCH] Add JMnedict and KRADFILE datasets Include other WWWJDIC datasets to sources. --- src/index.ts | 20 ++++++++++++++++---- src/sources/jmnedict/common.ts | 1 + src/sources/jmnedict/downloader.ts | 25 +++++++++++++++++++++++++ src/sources/jmnedict/index.ts | 2 ++ src/sources/kradfile/common.ts | 1 + src/sources/kradfile/downloader.ts | 25 +++++++++++++++++++++++++ src/sources/kradfile/index.ts | 2 ++ src/sources/radkfile/common.ts | 1 + src/sources/radkfile/downloader.ts | 25 +++++++++++++++++++++++++ src/sources/radkfile/index.ts | 2 ++ test/index.test.ts | 4 ++-- 11 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 src/sources/jmnedict/common.ts create mode 100644 src/sources/jmnedict/downloader.ts create mode 100644 src/sources/jmnedict/index.ts create mode 100644 src/sources/kradfile/common.ts create mode 100644 src/sources/kradfile/downloader.ts create mode 100644 src/sources/kradfile/index.ts create mode 100644 src/sources/radkfile/common.ts create mode 100644 src/sources/radkfile/downloader.ts create mode 100644 src/sources/radkfile/index.ts diff --git a/src/index.ts b/src/index.ts index b26a1e8..77d3f65 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,18 +1,30 @@ import * as KanjidicImpl from './sources/kanjidic'; -import * as JmdictImpl from './sources/jmdict'; +import * as JMdictImpl from './sources/jmdict'; +import * as JMnedictImpl from './sources/jmnedict'; +import * as RadKFileImpl from './sources/radkfile'; +import * as KRadFileImpl from './sources/kradfile'; const SUPPORTED_SOURCES = [ KanjidicImpl, - JmdictImpl, + JMdictImpl, + JMnedictImpl, + RadKFileImpl, + KRadFileImpl, ] as const; export type CreateDownloaderParams = ( KanjidicImpl.CreateDownloaderParams - | JmdictImpl.CreateDownloaderParams + | JMdictImpl.CreateDownloaderParams + | JMnedictImpl.CreateDownloaderParams + | RadKFileImpl.CreateDownloaderParams + | KRadFileImpl.CreateDownloaderParams ); export * as Kanjidic from './sources/kanjidic'; -export * as Jmdict from './sources/jmdict'; +export * as JMdict from './sources/jmdict'; +export * as JMnedict from './sources/jmnedict'; +export * as RadKFile from './sources/radkfile'; +export * as KRadFile from './sources/kradfile'; export * from './streams'; export const createDownloader = (params: CreateDownloaderParams) => { diff --git a/src/sources/jmnedict/common.ts b/src/sources/jmnedict/common.ts new file mode 100644 index 0000000..0a1b4d2 --- /dev/null +++ b/src/sources/jmnedict/common.ts @@ -0,0 +1 @@ +export const SOURCE_ID = 'jmnedict' as const; diff --git a/src/sources/jmnedict/downloader.ts b/src/sources/jmnedict/downloader.ts new file mode 100644 index 0000000..da322e4 --- /dev/null +++ b/src/sources/jmnedict/downloader.ts @@ -0,0 +1,25 @@ +import fetchPonyfill from 'fetch-ponyfill'; +import { PassThrough } from 'stream'; +import { createGunzip } from 'zlib'; +import { SOURCE_ID } from './common'; + +export interface CreateDownloaderParams { + type: typeof SOURCE_ID; + url?: string; +} + +const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/JMnedict.xml.gz' as const; + +export const createDownloader = async (params: Omit) => { + const { url = DEFAULT_SOURCE_URL } = params; + const { fetch } = fetchPonyfill(); + + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to download: ${url}`); + } + + const rawStream = response.body as unknown as PassThrough; + return rawStream + .pipe(createGunzip()); +}; diff --git a/src/sources/jmnedict/index.ts b/src/sources/jmnedict/index.ts new file mode 100644 index 0000000..22fa288 --- /dev/null +++ b/src/sources/jmnedict/index.ts @@ -0,0 +1,2 @@ +export * from './common'; +export * from './downloader'; diff --git a/src/sources/kradfile/common.ts b/src/sources/kradfile/common.ts new file mode 100644 index 0000000..fb029ca --- /dev/null +++ b/src/sources/kradfile/common.ts @@ -0,0 +1 @@ +export const SOURCE_ID = 'kradfile' as const; diff --git a/src/sources/kradfile/downloader.ts b/src/sources/kradfile/downloader.ts new file mode 100644 index 0000000..33040af --- /dev/null +++ b/src/sources/kradfile/downloader.ts @@ -0,0 +1,25 @@ +import fetchPonyfill from 'fetch-ponyfill'; +import { PassThrough } from 'stream'; +import { createGunzip } from 'zlib'; +import { SOURCE_ID } from './common'; + +export interface CreateDownloaderParams { + type: typeof SOURCE_ID; + url?: string; +} + +const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/kradfile.gz' as const; + +export const createDownloader = async (params: Omit) => { + const { url = DEFAULT_SOURCE_URL } = params; + const { fetch } = fetchPonyfill(); + + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to download: ${url}`); + } + + const rawStream = response.body as unknown as PassThrough; + return rawStream + .pipe(createGunzip()); +}; diff --git a/src/sources/kradfile/index.ts b/src/sources/kradfile/index.ts new file mode 100644 index 0000000..22fa288 --- /dev/null +++ b/src/sources/kradfile/index.ts @@ -0,0 +1,2 @@ +export * from './common'; +export * from './downloader'; diff --git a/src/sources/radkfile/common.ts b/src/sources/radkfile/common.ts new file mode 100644 index 0000000..94209ac --- /dev/null +++ b/src/sources/radkfile/common.ts @@ -0,0 +1 @@ +export const SOURCE_ID = 'radkfile' as const; diff --git a/src/sources/radkfile/downloader.ts b/src/sources/radkfile/downloader.ts new file mode 100644 index 0000000..fe0185f --- /dev/null +++ b/src/sources/radkfile/downloader.ts @@ -0,0 +1,25 @@ +import fetchPonyfill from 'fetch-ponyfill'; +import { PassThrough } from 'stream'; +import { createGunzip } from 'zlib'; +import { SOURCE_ID } from './common'; + +export interface CreateDownloaderParams { + type: typeof SOURCE_ID; + url?: string; +} + +const DEFAULT_SOURCE_URL = 'http://ftp.edrdg.org/pub/Nihongo/radkfile.gz' as const; + +export const createDownloader = async (params: Omit) => { + const { url = DEFAULT_SOURCE_URL } = params; + const { fetch } = fetchPonyfill(); + + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to download: ${url}`); + } + + const rawStream = response.body as unknown as PassThrough; + return rawStream + .pipe(createGunzip()); +}; diff --git a/src/sources/radkfile/index.ts b/src/sources/radkfile/index.ts new file mode 100644 index 0000000..22fa288 --- /dev/null +++ b/src/sources/radkfile/index.ts @@ -0,0 +1,2 @@ +export * from './common'; +export * from './downloader'; diff --git a/test/index.test.ts b/test/index.test.ts index b36868b..e75377e 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -1,6 +1,6 @@ import { createReadStream, createWriteStream } from 'fs'; import { describe, it, expect } from 'vitest'; -import { createDownloader, Kanjidic, Jmdict, createXmlToJsonLines } from '../src'; +import { createDownloader, Kanjidic, JMdict, createXmlToJsonLines } from '../src'; describe('downloader', () => { describe.skip('kanjidic', () => { @@ -40,7 +40,7 @@ describe('downloader', () => { describe.skip('jmdict', () => { it('works', async () => { const readStream = await createDownloader({ - type: Jmdict.SOURCE_ID, + type: JMdict.SOURCE_ID, }); return new Promise((resolve) => {