Browse Source

Minor refactor

Make event names consistent and improve the API.
master
TheoryOfNekomata 1 year ago
parent
commit
9252b037ca
8 changed files with 103 additions and 33 deletions
  1. +4
    -0
      .env.example
  2. +14
    -0
      README.md
  3. +17
    -0
      package.json
  4. +7
    -6
      src/common.ts
  5. +2
    -0
      src/index.ts
  6. +15
    -16
      src/video-types/youtube/index.ts
  7. +17
    -1
      src/video-types/youtube/transcript.ts
  8. +27
    -10
      test/index.test.ts

+ 4
- 0
.env.example View File

@@ -1 +1,5 @@
# OpenAI API key.
OPENAI_API_KEY=

# OpenAI organization ID.
OPENAI_ORGANIZATION_ID=

+ 14
- 0
README.md View File

@@ -0,0 +1,14 @@
# webvideo-transcript-summary-core

This is the core SDK for summarizing transcripts for Web videos.

## Setup

1. Install dependencies.
2. Copy `.env.example` to `.env` and fill in the correct values.
3. Refer to `prompt-template.hbs` and create lists of prompts:
* `prompts/normalize-transcript-text.hbs` is needed for putting proper punctuation to transcript text.
* `prompts/summarize-transcript.hbs` is needed to perform actual summarization.
4. Run `npm run build` to build the project.

Use `npm link` to use it on your own project.

+ 17
- 0
package.json View File

@@ -49,5 +49,22 @@
"dependencies": {
"fetch-ponyfill": "^7.1.0",
"handlebars": "^4.7.7"
},
"types": "./dist/types/index.d.ts",
"main": "./dist/cjs/production/index.js",
"module": "./dist/esm/production/index.js",
"exports": {
".": {
"development": {
"require": "./dist/cjs/development/index.js",
"import": "./dist/esm/development/index.js"
},
"require": "./dist/cjs/production/index.js",
"import": "./dist/esm/production/index.js",
"types": "./dist/types/index.d.ts"
}
},
"typesVersions": {
"*": {}
}
}

+ 7
- 6
src/common.ts View File

@@ -1,18 +1,19 @@
type ProcessEvent = { type: string, phase: string, command?: string };
export type ProcessEvent = {
processType: string,
phase: string,
command?: string,
content?: string,
contentType?: string,
};

type ProcessEventCallback = (event: ProcessEvent) => void;

type ErrorEventCallback = (event: Error) => void;

type SuccessEvent = { contentType: string, content: unknown };

type SuccessEventCallback = (event: SuccessEvent) => void;

export interface SummarizerEventEmitter extends NodeJS.EventEmitter {
process(): void;
on(eventType: 'process', callback: ProcessEventCallback): this;
on(eventType: 'error', callback: ErrorEventCallback): this;
on(eventType: 'success', callback: SuccessEventCallback): this;
on(eventType: 'end', callback: () => void): this;
}



+ 2
- 0
src/index.ts View File

@@ -37,3 +37,5 @@ export const createSummarizer = (params: CreateSummarizerParams): SummarizerEven

throw new TypeError(`Invalid video type: "${videoType}". Valid values are: ${JSON.stringify(Object.values(VideoType))}`);
};

export * from './common';

+ 15
- 16
src/video-types/youtube/index.ts View File

@@ -4,7 +4,7 @@ import {
retrieveVideoId,
getVideoPage,
extractDataFromPage,
fetchTranscriptItems, TranscriptResponse,
fetchTranscriptItems,
} from './transcript';
import { normalizeTranscriptText, summarizeTranscript } from '../../summarizer';

@@ -23,10 +23,9 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
...config
} = this.params;
const identifier = retrieveVideoId(url);
let transcripts: TranscriptResponse[] = [];

this.emit('process', {
type: 'extract-data',
processType: 'extract-data',
phase: 'download-page',
});

@@ -34,26 +33,26 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
.then((videoPageBody) => {
const pageData = extractDataFromPage(videoPageBody);
this.emit('process', {
type: 'extract-data',
processType: 'extract-data',
phase: 'success',
});

this.emit('process', {
type: 'fetch-transcript',
processType: 'fetch-transcript',
phase: 'start',
});
return fetchTranscriptItems(pageData, config);
})
.then((transcript) => {
this.emit('process', {
type: 'fetch-transcript',
processType: 'fetch-transcript',
phase: 'success',
content: JSON.stringify(transcript),
contentType: 'application/json',
});

transcripts = transcript;

this.emit('process', {
type: 'normalize-caption',
processType: 'normalize-caption',
phase: 'start',
});

@@ -65,12 +64,14 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
})
.then((normalizedCaption) => {
this.emit('process', {
type: 'normalize-caption',
processType: 'normalize-transcript',
phase: 'success',
content: normalizedCaption,
contentType: 'text/plain',
});

this.emit('process', {
type: 'summarize-caption',
processType: 'summarize-transcript',
phase: 'start',
});

@@ -78,14 +79,12 @@ export class YouTubeSummarizerEventEmitter extends EventEmitter implements Summa
})
.then((summary) => {
this.emit('process', {
type: 'summarize-caption',
processType: 'summarize-transcript',
phase: 'success',
data: summary,
contentType: 'text/plain',
});

this.emit('success', {
contentType: 'application/json',
content: JSON.stringify({ transcripts, summary }),
});
this.emit('end');
})
.catch((error) => {


+ 17
- 1
src/video-types/youtube/transcript.ts View File

@@ -123,6 +123,20 @@ interface VideoPageData {
clickTrackingParams?: string;
}

export interface Cue {
transcriptCueGroupRenderer: {
cues: {
transcriptCueRenderer: {
cue: {
simpleText: string;
},
durationMs: string;
startOffsetMs: string;
}
}[],
},
}

export const extractDataFromPage = (page: string): VideoPageData => ({
innerTubeApiKey: extractInnterTubeApiKeyFromPage(page),
serializedShareEntity: extractSerializedShareEntityFromPage(page),
@@ -217,16 +231,18 @@ export const fetchTranscriptItems = async (pageData: VideoPageData, config?: Tra
.body
.transcriptBodyRenderer;

return transcripts.map((cue) => ({
return transcripts.map((cue: Cue) => ({
text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
.cue.simpleText,
duration: parseInt(
cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
.durationMs,
10,
),
offset: parseInt(
cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
.startOffsetMs,
10,
),
})) as TranscriptResponse[];
};

+ 27
- 10
test/index.test.ts View File

@@ -1,7 +1,16 @@
import { config } from 'dotenv';
import { writeFileSync } from 'fs';
import { beforeAll, describe, it } from 'vitest';
import { config } from 'dotenv';
import { createSummarizer, VideoType } from '../src';
import { writeFileSync } from 'fs';

const writeTranscript = (filename: string, content: string) => {
const transcripts = JSON.parse(content) as {
transcripts: { text: string }[],
summary: string,
};
const transcriptText = transcripts.transcripts.map((t) => t.text).join('\n');
writeFileSync(filename, transcriptText);
};

describe('blah', () => {
beforeAll(() => {
@@ -15,14 +24,22 @@ describe('blah', () => {
openaiApiKey: process.env.OPENAI_API_KEY as string,
});

summarizer.on('success', (data) => {
const transcripts = JSON.parse(data.content as string) as {
transcripts: { text: string }[],
summary: string,
};
const transcriptText = transcripts.transcripts.map((t) => t.text).join('\n');
writeFileSync('transcript.txt', transcriptText);
writeFileSync('summary.txt', transcripts.summary);
summarizer.on('process', (data) => {
if (data.phase === 'success') {
switch (data.processType) {
case 'fetch-transcript':
writeTranscript('transcript.txt', data.content as string);
break;
case 'normalize-transcript':
writeFileSync('normalized.txt', data.content as string);
break;
case 'summarize-transcript':
writeFileSync('summary.txt', data.content as string);
break;
default:
break;
}
}
});

summarizer.on('error', (err) => {


Loading…
Cancel
Save