Minor refactor

Make event names consistent and improve the API.
1 年之前 · 9252b037ca
--- a/.env.example
+++ b/.env.example
@@ -1 +1,5 @@
 # OpenAI API key.
 OPENAI_API_KEY=

 # OpenAI organization ID.
 OPENAI_ORGANIZATION_ID=
--- a/README.md
+++ b/README.md
@@ -0,0 +1,14 @@
 # webvideo-transcript-summary-core

 This is the core SDK for summarizing transcripts for Web videos.

 ## Setup

 1. Install dependencies.
 2. Copy `.env.example` to `.env` and fill in the correct values.
 3. Refer to `prompt-template.hbs` and create lists of prompts:
   * `prompts/normalize-transcript-text.hbs` is needed for putting proper punctuation to transcript text.
   * `prompts/summarize-transcript.hbs` is needed to perform actual summarization.
 4. Run `npm run build` to build the project.

 Use `npm link` to use it on your own project.
--- a/package.json
+++ b/package.json
@@ -49,5 +49,22 @@
  "dependencies": {
    "fetch-ponyfill": "^7.1.0",
    "handlebars": "^4.7.7"
  },
  "types": "./dist/types/index.d.ts",
  "main": "./dist/cjs/production/index.js",
  "module": "./dist/esm/production/index.js",
  "exports": {
    ".": {
      "development": {
        "require": "./dist/cjs/development/index.js",
        "import": "./dist/esm/development/index.js"
      },
      "require": "./dist/cjs/production/index.js",
      "import": "./dist/esm/production/index.js",
      "types": "./dist/types/index.d.ts"
    }
  },
  "typesVersions": {
    "*": {}
  }
 }
--- a/src/common.ts
+++ b/src/common.ts
@@ -1,18 +1,19 @@
 type ProcessEvent = { type: string, phase: string, command?: string };
 export type ProcessEvent = {
  processType: string,
  phase: string,
  command?: string,
  content?: string,
  contentType?: string,
 };

 type ProcessEventCallback = (event: ProcessEvent) => void;

 type ErrorEventCallback = (event: Error) => void;

 type SuccessEvent = { contentType: string, content: unknown };

 type SuccessEventCallback = (event: SuccessEvent) => void;

 export interface SummarizerEventEmitter extends NodeJS.EventEmitter {
  process(): void;
  on(eventType: 'process', callback: ProcessEventCallback): this;
  on(eventType: 'error', callback: ErrorEventCallback): this;
  on(eventType: 'success', callback: SuccessEventCallback): this;
  on(eventType: 'end', callback: () => void): this;
 }

--- a/src/index.ts
+++ b/src/index.ts
@@ -37,3 +37,5 @@ export const createSummarizer = (params: CreateSummarizerParams): SummarizerEven

  throw new TypeError(`Invalid video type: "${videoType}". Valid values are: ${JSON.stringify(Object.values(VideoType))}`);
 };

 export * from './common';
--- a/src/video-types/youtube/index.ts
+++ b/src/video-types/youtube/index.ts
@@ -4,7 +4,7 @@ import {
  retrieveVideoId,
  getVideoPage,
  extractDataFromPage,
  fetchTranscriptItems, TranscriptResponse,
  fetchTranscriptItems,
 } from './transcript';
 import { normalizeTranscriptText, summarizeTranscript } from '../../summarizer';

@@ -23,10 +23,9 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
      ...config
    } = this.params;
    const identifier = retrieveVideoId(url);
    let transcripts: TranscriptResponse[] = [];

    this.emit('process', {
      type: 'extract-data',
      processType: 'extract-data',
      phase: 'download-page',
    });

@@ -34,26 +33,26 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
      .then((videoPageBody) => {
        const pageData = extractDataFromPage(videoPageBody);
        this.emit('process', {
          type: 'extract-data',
          processType: 'extract-data',
          phase: 'success',
        });

        this.emit('process', {
          type: 'fetch-transcript',
          processType: 'fetch-transcript',
          phase: 'start',
        });
        return fetchTranscriptItems(pageData, config);
      })
      .then((transcript) => {
        this.emit('process', {
          type: 'fetch-transcript',
          processType: 'fetch-transcript',
          phase: 'success',
          content: JSON.stringify(transcript),
          contentType: 'application/json',
        });

        transcripts = transcript;

        this.emit('process', {
          type: 'normalize-caption',
          processType: 'normalize-caption',
          phase: 'start',
        });

@@ -65,12 +64,14 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
      })
      .then((normalizedCaption) => {
        this.emit('process', {
          type: 'normalize-caption',
          processType: 'normalize-transcript',
          phase: 'success',
          content: normalizedCaption,
          contentType: 'text/plain',
        });

        this.emit('process', {
          type: 'summarize-caption',
          processType: 'summarize-transcript',
          phase: 'start',
        });

@@ -78,14 +79,12 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
      })
      .then((summary) => {
        this.emit('process', {
          type: 'summarize-caption',
          processType: 'summarize-transcript',
          phase: 'success',
          data: summary,
          contentType: 'text/plain',
        });

        this.emit('success', {
          contentType: 'application/json',
          content: JSON.stringify({ transcripts, summary }),
        });
        this.emit('end');
      })
      .catch((error) => {
--- a/src/video-types/youtube/transcript.ts
+++ b/src/video-types/youtube/transcript.ts
@@ -123,6 +123,20 @@ interface VideoPageData {
  clickTrackingParams?: string;
 }

 export interface Cue {
  transcriptCueGroupRenderer: {
    cues: {
      transcriptCueRenderer: {
        cue: {
          simpleText: string;
        },
        durationMs: string;
        startOffsetMs: string;
      }
    }[],
  },
 }

 export const extractDataFromPage = (page: string): VideoPageData => ({
  innerTubeApiKey: extractInnterTubeApiKeyFromPage(page),
  serializedShareEntity: extractSerializedShareEntityFromPage(page),
@@ -217,16 +231,18 @@ export const fetchTranscriptItems = async (pageData: VideoPageData, config?: Tra
    .body
    .transcriptBodyRenderer;

  return transcripts.map((cue) => ({
  return transcripts.map((cue: Cue) => ({
    text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
      .cue.simpleText,
    duration: parseInt(
      cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
        .durationMs,
      10,
    ),
    offset: parseInt(
      cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
        .startOffsetMs,
      10,
    ),
  })) as TranscriptResponse[];
 };
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -1,7 +1,16 @@
 import { config } from 'dotenv';
 import { writeFileSync } from 'fs';
 import { beforeAll, describe, it } from 'vitest';
 import { config } from 'dotenv';
 import { createSummarizer, VideoType } from '../src';
 import { writeFileSync } from 'fs';

 const writeTranscript = (filename: string, content: string) => {
  const transcripts = JSON.parse(content) as {
    transcripts: { text: string }[],
    summary: string,
  };
  const transcriptText = transcripts.transcripts.map((t) => t.text).join('\n');
  writeFileSync(filename, transcriptText);
 };

 describe('blah', () => {
  beforeAll(() => {
@@ -15,14 +24,22 @@ describe('blah', () => {
      openaiApiKey: process.env.OPENAI_API_KEY as string,
    });

    summarizer.on('success', (data) => {
      const transcripts = JSON.parse(data.content as string) as {
        transcripts: { text: string }[],
        summary: string,
      };
      const transcriptText = transcripts.transcripts.map((t) => t.text).join('\n');
      writeFileSync('transcript.txt', transcriptText);
      writeFileSync('summary.txt', transcripts.summary);
    summarizer.on('process', (data) => {
      if (data.phase === 'success') {
        switch (data.processType) {
          case 'fetch-transcript':
            writeTranscript('transcript.txt', data.content as string);
            break;
          case 'normalize-transcript':
            writeFileSync('normalized.txt', data.content as string);
            break;
          case 'summarize-transcript':
            writeFileSync('summary.txt', data.content as string);
            break;
          default:
            break;
        }
      }
    });

    summarizer.on('error', (err) => {