Make structure more extensible. Also use mio-ai for consuming OpenAI endpoints.master
@@ -1,5 +1,14 @@ | |||||
{ | { | ||||
"root": true, | "root": true, | ||||
"rules": { | |||||
"@typescript-eslint/no-unsafe-argument": "off", | |||||
"@typescript-eslint/no-unsafe-member-access": "off", | |||||
"@typescript-eslint/no-unsafe-assignment": "off", | |||||
"@typescript-eslint/no-unsafe-call": "off", | |||||
"@typescript-eslint/no-unsafe-return": "off", | |||||
"@typescript-eslint/no-namespace": "off", | |||||
"@typescript-eslint/restrict-template-expressions": "off" | |||||
}, | |||||
"extends": [ | "extends": [ | ||||
"lxsmnsyc/typescript" | "lxsmnsyc/typescript" | ||||
], | ], | ||||
@@ -47,6 +47,7 @@ | |||||
"access": "public" | "access": "public" | ||||
}, | }, | ||||
"dependencies": { | "dependencies": { | ||||
"@modal-sh/mio-ai": "link:../../../openai-utils", | |||||
"fetch-ponyfill": "^7.1.0", | "fetch-ponyfill": "^7.1.0", | ||||
"handlebars": "^4.7.7" | "handlebars": "^4.7.7" | ||||
}, | }, | ||||
@@ -1,3 +1,3 @@ | |||||
{ | { | ||||
"target": "es2018" | |||||
} | |||||
"target": "esnext" | |||||
} |
@@ -1,35 +0,0 @@ | |||||
export type ProcessEvent = { | |||||
processType: string, | |||||
phase: string, | |||||
command?: string, | |||||
content?: string, | |||||
contentType?: string, | |||||
}; | |||||
export type ProcessEventCallback = (event: ProcessEvent) => void; | |||||
export type ErrorEventCallback = (event: Error) => void; | |||||
export interface SummarizerProcessParams { | |||||
url: string; | |||||
language?: string; | |||||
country?: string; | |||||
} | |||||
export interface SummarizerEventEmitter extends NodeJS.EventEmitter { | |||||
process<T extends SummarizerProcessParams>(params: T): void; | |||||
on(eventType: 'process', callback: ProcessEventCallback): this; | |||||
on(eventType: 'error', callback: ErrorEventCallback): this; | |||||
on(eventType: 'end', callback: () => void): this; | |||||
} | |||||
export interface OpenAiParams { | |||||
apiKey: string; | |||||
organizationId?: string; | |||||
model?: string; | |||||
temperature?: number; | |||||
} | |||||
export interface CreateBaseSummarizerParams { | |||||
openAiParams: OpenAiParams; | |||||
} |
@@ -1,33 +1,44 @@ | |||||
import { SummarizerEventEmitter } from './common'; | |||||
import { | |||||
CreateYouTubeSummarizerParams, | |||||
YouTubeSummarizerEventEmitter, | |||||
} from './video-types/youtube'; | |||||
export enum VideoType { | |||||
YOUTUBE = 'youtube', | |||||
} | |||||
export interface CreateSummarizerParams extends CreateYouTubeSummarizerParams { | |||||
type: VideoType; | |||||
} | |||||
export const createSummarizer = (params: CreateSummarizerParams): SummarizerEventEmitter => { | |||||
const { | |||||
type: videoType, | |||||
openAiParams, | |||||
} = params; | |||||
switch (videoType as string) { | |||||
case VideoType.YOUTUBE: | |||||
return new YouTubeSummarizerEventEmitter({ | |||||
openAiParams, | |||||
}); | |||||
default: | |||||
break; | |||||
import { OpenAi } from '@modal-sh/mio-ai'; | |||||
import { SummarizerEventEmitter, SummarizerEventEmitterImpl } from './summarizer'; | |||||
import * as YouTube from './video-types/youtube'; | |||||
const SUPPORTED_VIDEO_TYPES = [ | |||||
YouTube, | |||||
] as const; | |||||
export type CreateTranscriptFetcherParams = ( | |||||
YouTube.CreateTranscriptFetcherParams | |||||
); | |||||
export type SummarizerProcessParams = ( | |||||
YouTube.SummarizerProcessParams | |||||
); | |||||
export type VideoType = typeof YouTube.VIDEO_TYPE; | |||||
export * from './summarizer'; | |||||
export * from './transcript'; | |||||
export * as YouTube from './video-types/youtube'; | |||||
export const createTranscriptFetcher = (params: CreateTranscriptFetcherParams) => { | |||||
const { type: videoType } = params; | |||||
const theVideoTypeModule = SUPPORTED_VIDEO_TYPES | |||||
.find((videoTypeModule) => videoTypeModule.VIDEO_TYPE === videoType); | |||||
if (!theVideoTypeModule) { | |||||
const validVideoTypes = SUPPORTED_VIDEO_TYPES.map((videoTypeModule) => videoTypeModule.VIDEO_TYPE).join(', '); | |||||
throw new TypeError(`Invalid video type: "${videoType}". Valid values are: ${validVideoTypes}`); | |||||
} | } | ||||
throw new TypeError(`Invalid video type: "${videoType}". Valid values are: ${JSON.stringify(Object.values(VideoType))}`); | |||||
// shadow the original method for protection | |||||
return (...transcriptFetcherParams: Parameters<typeof theVideoTypeModule.getRawTranscript>) => ( | |||||
theVideoTypeModule.getRawTranscript(...transcriptFetcherParams) | |||||
); | |||||
}; | }; | ||||
export * from './common'; | |||||
export const createSummarizer = (params: OpenAi.Configuration): SummarizerEventEmitter => ( | |||||
new SummarizerEventEmitterImpl(params) | |||||
); | |||||
export const OPENAI_API_VERSION = OpenAi.ApiVersion.V1 as const; |
@@ -1,122 +1,83 @@ | |||||
import fetchPonyfill from 'fetch-ponyfill'; | |||||
import Handlebars from 'handlebars'; | |||||
import { resolve } from 'path'; | |||||
import { readFile } from 'fs/promises'; | |||||
import * as config from './config'; | |||||
import { OpenAiParams } from './common'; | |||||
import { OpenAi, createAiClient } from '@modal-sh/mio-ai'; | |||||
import { EventEmitter } from 'events'; | |||||
import { BaseTranscriptItem } from './transcript'; | |||||
export interface MakeAiCallParams { | |||||
prompts: string[]; | |||||
openAiParams: OpenAiParams; | |||||
} | |||||
export type DataEventCallback = (event: string) => void; | |||||
export class AiCallError extends Error { | |||||
constructor(message: string, public readonly response: Response) { | |||||
super(message); | |||||
this.name = 'AiCallError'; | |||||
} | |||||
} | |||||
export type ErrorEventCallback = (event: Error) => void; | |||||
const makeAiCall = async (params: MakeAiCallParams): Promise<string> => { | |||||
const { | |||||
prompts, | |||||
openAiParams: { | |||||
apiKey, | |||||
organizationId, | |||||
model = 'gpt-3.5-turbo', | |||||
temperature = 0.6, | |||||
}, | |||||
} = params; | |||||
export interface SummarizerEventEmitter<T = unknown> extends NodeJS.EventEmitter { | |||||
normalize(transcriptItems: T[]): Promise<string>; | |||||
summarize(transcript: string): void; | |||||
on(eventType: 'data', callback: DataEventCallback): this; | |||||
on(eventType: 'error', callback: ErrorEventCallback): this; | |||||
on(eventType: 'end', callback: () => void): this; | |||||
} | |||||
const headers: Record<string, string> = { | |||||
'Content-Type': 'application/json', | |||||
Accept: 'application/json', | |||||
Authorization: `Bearer ${apiKey}`, | |||||
}; | |||||
export class SummarizerEventEmitterImpl<T extends BaseTranscriptItem> extends EventEmitter { | |||||
private readonly openAiClient: OpenAi.PlatformEventEmitter; | |||||
if (organizationId) { | |||||
headers['OpenAI-Organization'] = organizationId; | |||||
constructor(params: OpenAi.Configuration) { | |||||
super(); | |||||
this.openAiClient = createAiClient({ | |||||
platform: OpenAi.PLATFORM_ID, | |||||
platformConfiguration: params, | |||||
}); | |||||
} | } | ||||
const { fetch } = fetchPonyfill(); | |||||
const response = await fetch( | |||||
new URL('/v1/chat/completions', 'https://api.openai.com'), | |||||
{ | |||||
method: 'POST', | |||||
headers, | |||||
body: JSON.stringify({ | |||||
model, | |||||
temperature, | |||||
messages: [ | |||||
{ | |||||
role: 'user', | |||||
content: prompts[Math.floor(Math.random() * prompts.length)].trim(), | |||||
}, | |||||
], | |||||
}), | |||||
}, | |||||
); | |||||
if (!response.ok) { | |||||
const { error } = await response.json(); | |||||
throw new AiCallError(`OpenAI API call failed with status ${response.status}: ${error.message}`, response); | |||||
normalize(transcript: T[]) { | |||||
return new Promise<string>((resolve, reject) => { | |||||
this.openAiClient.once<OpenAi.CreateEditDataEvent>('data', (data) => { | |||||
const normalizedTranscript = data.choices[0].text; | |||||
resolve(normalizedTranscript); | |||||
}); | |||||
this.openAiClient.once('error', (error) => { | |||||
reject(error); | |||||
}); | |||||
this.openAiClient.createEdit({ | |||||
input: transcript.map((item) => item.text).join(' '), | |||||
instruction: 'Put proper punctuation and correct capitalization', | |||||
model: OpenAi.EditModel.TEXT_DAVINCI_EDIT_001, | |||||
}); | |||||
}); | |||||
} | } | ||||
const { choices } = await response.json(); | |||||
// should we use all the response choices? | |||||
return choices[0].message.content; | |||||
}; | |||||
const compilePrompts = async (filename: string, params: Record<string, unknown>): Promise<string[]> => { | |||||
const rawPromptText = await readFile(resolve(config.openAi.promptsDir, filename), 'utf-8'); | |||||
const fill = Handlebars.compile(rawPromptText, { noEscape: true }); | |||||
const filledText = fill(params); | |||||
return filledText.split('---').map((s) => s.trim()); | |||||
}; | |||||
export interface NormalizeTranscriptTextParams { | |||||
rawTranscriptText: string, | |||||
openAiParams: OpenAiParams, | |||||
} | |||||
export const normalizeTranscriptText = async (params: NormalizeTranscriptTextParams) => { | |||||
const { | |||||
rawTranscriptText, | |||||
openAiParams, | |||||
} = params; | |||||
const prompts = await compilePrompts( | |||||
'normalize-transcript-text.hbs', | |||||
{ | |||||
transcript: rawTranscriptText, | |||||
}, | |||||
); | |||||
return makeAiCall({ | |||||
prompts, | |||||
openAiParams, | |||||
}); | |||||
}; | |||||
export interface SummarizeTranscriptParams { | |||||
normalizedTranscript: string, | |||||
openAiParams: OpenAiParams, | |||||
summarize(normalizedTranscript: string) { | |||||
const listener = (data: OpenAi.ChatCompletionChunkDataEvent) => { | |||||
const theContent = data.choices[0].delta.content; | |||||
if (typeof theContent !== 'string') { | |||||
return; | |||||
} | |||||
this.emit('data', theContent) | |||||
}; | |||||
this.openAiClient.on<OpenAi.ChatCompletionChunkDataEvent>('data', listener); | |||||
this.openAiClient.once('error', (error) => { | |||||
this.openAiClient.off('data', listener); | |||||
this.emit('error', error); | |||||
}); | |||||
this.openAiClient.once('end', () => { | |||||
this.openAiClient.off('data', listener); | |||||
this.emit('end'); | |||||
}); | |||||
this.openAiClient.createChatCompletion({ | |||||
model: OpenAi.ChatCompletionModel.GPT_3_5_TURBO, | |||||
messages: [ | |||||
{ | |||||
role: OpenAi.MessageRole.SYSTEM, | |||||
content: 'You are working on video transcripts.', | |||||
}, | |||||
{ | |||||
role: OpenAi.MessageRole.USER, | |||||
content: `Summarize the following transcript:\n\n${normalizedTranscript}`, | |||||
}, | |||||
], | |||||
}); | |||||
} | |||||
} | } | ||||
export const summarizeTranscript = async (params: SummarizeTranscriptParams) => { | |||||
const { | |||||
normalizedTranscript, | |||||
openAiParams, | |||||
} = params; | |||||
const prompts = await compilePrompts( | |||||
'summarize-transcript.hbs', | |||||
{ | |||||
transcript: normalizedTranscript, | |||||
}, | |||||
); | |||||
return makeAiCall({ | |||||
prompts, | |||||
openAiParams, | |||||
}); | |||||
}; |
@@ -0,0 +1,3 @@ | |||||
export interface BaseTranscriptItem { | |||||
text: string; | |||||
} |
@@ -0,0 +1 @@ | |||||
export const VIDEO_TYPE = 'youtube' as const; |
@@ -0,0 +1,54 @@ | |||||
/* eslint-disable no-bitwise */ | |||||
const alphabet = 'ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghjijklmnopqrstuvwxyz0123456789' as const; | |||||
const jda = [ | |||||
`${alphabet}+/=`, | |||||
`${alphabet}+/`, | |||||
`${alphabet}-_=`, | |||||
`${alphabet}-_.`, | |||||
`${alphabet}-_`, | |||||
] as const; | |||||
export type Nonce = string; | |||||
export const generateNonce = (): Nonce => { | |||||
const rnd = Math.random().toString(); | |||||
const b = jda[3]; | |||||
const a = []; | |||||
for (let i = 0; i < rnd.length - 1; i += 1) { | |||||
a.push(rnd[i].charCodeAt(i)); | |||||
} | |||||
let c = ''; | |||||
let d = 0; | |||||
let m; let n; let q; let r; let f; let | |||||
g; | |||||
while (d < a.length) { | |||||
f = a[d]; | |||||
g = d + 1 < a.length; | |||||
if (g) { | |||||
m = a[d + 1]; | |||||
} else { | |||||
m = 0; | |||||
} | |||||
n = d + 2 < a.length; | |||||
if (n) { | |||||
q = a[d + 2]; | |||||
} else { | |||||
q = 0; | |||||
} | |||||
r = f >> 2; | |||||
f = ((f & 3) << 4) | (m >> 4); | |||||
m = ((m & 15) << 2) | (q >> 6); | |||||
q &= 63; | |||||
if (!n) { | |||||
q = 64; | |||||
if (!q) { | |||||
m = 64; | |||||
} | |||||
} | |||||
c += b[r] + b[f] + b[m] + b[q]; | |||||
d += 3; | |||||
} | |||||
return c; | |||||
}; |
@@ -1,90 +1,3 @@ | |||||
import { EventEmitter } from 'events'; | |||||
import { CreateBaseSummarizerParams, SummarizerEventEmitter, SummarizerProcessParams } from '../../common'; | |||||
import { | |||||
retrieveVideoId, | |||||
getVideoPage, | |||||
extractDataFromPage, | |||||
fetchTranscriptItems, | |||||
} from './transcript'; | |||||
import { normalizeTranscriptText, summarizeTranscript } from '../../summarizer'; | |||||
export type CreateYouTubeSummarizerParams = CreateBaseSummarizerParams | |||||
export class YouTubeSummarizerEventEmitter extends EventEmitter implements SummarizerEventEmitter { | |||||
constructor(private readonly params: CreateYouTubeSummarizerParams) { | |||||
super(); | |||||
} | |||||
process(params: SummarizerProcessParams) { | |||||
const { url, ...config } = params; | |||||
const { openAiParams } = this.params; | |||||
const identifier = retrieveVideoId(url); | |||||
this.emit('process', { | |||||
processType: 'extract-data', | |||||
phase: 'download-page', | |||||
}); | |||||
getVideoPage(identifier) | |||||
.then((videoPageBody) => { | |||||
const pageData = extractDataFromPage(videoPageBody); | |||||
this.emit('process', { | |||||
processType: 'extract-data', | |||||
phase: 'success', | |||||
}); | |||||
this.emit('process', { | |||||
processType: 'fetch-transcript', | |||||
phase: 'start', | |||||
}); | |||||
return fetchTranscriptItems(pageData, config); | |||||
}) | |||||
.then((transcript) => { | |||||
this.emit('process', { | |||||
processType: 'fetch-transcript', | |||||
phase: 'success', | |||||
content: JSON.stringify(transcript), | |||||
contentType: 'application/json', | |||||
}); | |||||
this.emit('process', { | |||||
processType: 'normalize-transcript', | |||||
phase: 'start', | |||||
}); | |||||
return normalizeTranscriptText({ | |||||
rawTranscriptText: transcript.map((item) => item.text).join(' '), | |||||
openAiParams, | |||||
}); | |||||
}) | |||||
.then((normalizedTranscript) => { | |||||
this.emit('process', { | |||||
processType: 'normalize-transcript', | |||||
phase: 'success', | |||||
content: normalizedTranscript, | |||||
contentType: 'text/plain', | |||||
}); | |||||
this.emit('process', { | |||||
processType: 'summarize-transcript', | |||||
phase: 'start', | |||||
}); | |||||
return summarizeTranscript({ normalizedTranscript, openAiParams }); | |||||
}) | |||||
.then((summary) => { | |||||
this.emit('process', { | |||||
processType: 'summarize-transcript', | |||||
phase: 'success', | |||||
content: summary, | |||||
contentType: 'text/plain', | |||||
}); | |||||
this.emit('end'); | |||||
}) | |||||
.catch((error) => { | |||||
this.emit('error', error); | |||||
this.emit('end'); | |||||
}); | |||||
} | |||||
} | |||||
export * from './common'; | |||||
export * from './transcript'; | |||||
export * from './errors'; |
@@ -4,40 +4,34 @@ | |||||
import fetchPonyfill from 'fetch-ponyfill'; | import fetchPonyfill from 'fetch-ponyfill'; | ||||
import { | import { | ||||
InvalidVideoIdError, | |||||
CannotRetrieveVideoPageError, | CannotRetrieveVideoPageError, | ||||
FetchTranscriptRequestFailureError, | FetchTranscriptRequestFailureError, | ||||
InnerTubeApiKeyMissingError, | InnerTubeApiKeyMissingError, | ||||
InvalidTranscriptActionsError, | InvalidTranscriptActionsError, | ||||
InvalidTranscriptResponseContextError, | InvalidTranscriptResponseContextError, | ||||
} from './errors'; | } from './errors'; | ||||
import { BaseTranscriptItem } from '../../transcript'; | |||||
import { generateNonce } from './crypto'; | |||||
import { retrieveVideoId } from './url'; | |||||
import { VIDEO_TYPE } from './common'; | |||||
const RE_YOUTUBE = /^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*/im; | |||||
export interface CreateTranscriptFetcherParams { | |||||
type: typeof VIDEO_TYPE; | |||||
} | |||||
export interface TranscriptConfig { | |||||
interface TranscriptConfig { | |||||
language?: string; | language?: string; | ||||
country?: string; | country?: string; | ||||
} | } | ||||
export interface TranscriptResponse { | |||||
text: string; | |||||
export interface TranscriptItem extends BaseTranscriptItem { | |||||
duration: number; | duration: number; | ||||
offset: number; | offset: number; | ||||
} | } | ||||
const { fetch: f } = fetchPonyfill(); | const { fetch: f } = fetchPonyfill(); | ||||
export const retrieveVideoId = (videoId: string): string => { | |||||
if (videoId.length === 11) { | |||||
return videoId; | |||||
} | |||||
const matchId = videoId.match(RE_YOUTUBE); | |||||
if (matchId && matchId.length) { | |||||
return matchId[1]; | |||||
} | |||||
throw new InvalidVideoIdError('Impossible to retrieve Youtube video ID.'); | |||||
}; | |||||
export const getVideoPage = async (videoId: string): Promise<string> => { | |||||
const getVideoPage = async (videoId: string): Promise<string> => { | |||||
const identifier = retrieveVideoId(videoId); | const identifier = retrieveVideoId(videoId); | ||||
const videoUrl = new URL('/watch', 'https://www.youtube.com'); | const videoUrl = new URL('/watch', 'https://www.youtube.com'); | ||||
const videoUrlParams = new URLSearchParams({ | const videoUrlParams = new URLSearchParams({ | ||||
@@ -45,63 +39,13 @@ export const getVideoPage = async (videoId: string): Promise<string> => { | |||||
}); | }); | ||||
videoUrl.search = videoUrlParams.toString(); | videoUrl.search = videoUrlParams.toString(); | ||||
const videoPageResponse = await f(videoUrl.toString()); | const videoPageResponse = await f(videoUrl.toString()); | ||||
if (!videoPageResponse.ok) { | |||||
throw new CannotRetrieveVideoPageError('Unable to get video page.'); | |||||
} | |||||
return videoPageResponse.text(); | |||||
}; | |||||
const generateNonce = () => { | |||||
const rnd = Math.random().toString(); | |||||
const alphabet = 'ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghjijklmnopqrstuvwxyz0123456789'; | |||||
const jda = [ | |||||
`${alphabet}+/=`, | |||||
`${alphabet}+/`, | |||||
`${alphabet}-_=`, | |||||
`${alphabet}-_.`, | |||||
`${alphabet}-_`, | |||||
]; | |||||
const b = jda[3]; | |||||
const a = []; | |||||
for (let i = 0; i < rnd.length - 1; i++) { | |||||
a.push(rnd[i].charCodeAt(i)); | |||||
} | |||||
let c = ''; | |||||
let d = 0; | |||||
let m; let n; let q; let r; let f; let | |||||
g; | |||||
while (d < a.length) { | |||||
f = a[d]; | |||||
g = d + 1 < a.length; | |||||
if (g) { | |||||
m = a[d + 1]; | |||||
} else { | |||||
m = 0; | |||||
} | |||||
n = d + 2 < a.length; | |||||
if (n) { | |||||
q = a[d + 2]; | |||||
} else { | |||||
q = 0; | |||||
} | |||||
r = f >> 2; | |||||
f = ((f & 3) << 4) | (m >> 4); | |||||
m = ((m & 15) << 2) | (q >> 6); | |||||
q &= 63; | |||||
if (!n) { | |||||
q = 64; | |||||
if (!q) { | |||||
m = 64; | |||||
} | |||||
} | |||||
c += b[r] + b[f] + b[m] + b[q]; | |||||
d += 3; | |||||
if (videoPageResponse.ok) { | |||||
return videoPageResponse.text(); | |||||
} | } | ||||
return c; | |||||
throw new CannotRetrieveVideoPageError('Unable to get video page.'); | |||||
}; | }; | ||||
const extractInnterTubeApiKeyFromPage = (videoPageBody: string): string => videoPageBody | |||||
const extractInnerTubeApiKeyFromPage = (videoPageBody: string): string => videoPageBody | |||||
.split('"INNERTUBE_API_KEY":"')[1] | .split('"INNERTUBE_API_KEY":"')[1] | ||||
.split('"')[0]; | .split('"')[0]; | ||||
@@ -123,7 +67,24 @@ interface VideoPageData { | |||||
clickTrackingParams?: string; | clickTrackingParams?: string; | ||||
} | } | ||||
export interface Cue { | |||||
interface TranscriptResponse { | |||||
responseContext?: unknown, | |||||
actions?: { | |||||
updateEngagementPanelAction: { | |||||
content: { | |||||
transcriptRenderer: { | |||||
body: { | |||||
transcriptBodyRenderer: { | |||||
cueGroups: Cue[], | |||||
} | |||||
} | |||||
} | |||||
} | |||||
}, | |||||
}[]; | |||||
} | |||||
interface Cue { | |||||
transcriptCueGroupRenderer: { | transcriptCueGroupRenderer: { | ||||
cues: { | cues: { | ||||
transcriptCueRenderer: { | transcriptCueRenderer: { | ||||
@@ -137,8 +98,8 @@ export interface Cue { | |||||
}, | }, | ||||
} | } | ||||
export const extractDataFromPage = (page: string): VideoPageData => ({ | |||||
innerTubeApiKey: extractInnterTubeApiKeyFromPage(page), | |||||
const extractDataFromPage = (page: string): VideoPageData => ({ | |||||
innerTubeApiKey: extractInnerTubeApiKeyFromPage(page), | |||||
serializedShareEntity: extractSerializedShareEntityFromPage(page), | serializedShareEntity: extractSerializedShareEntityFromPage(page), | ||||
visitorData: extractVisitorDataFromPage(page), | visitorData: extractVisitorDataFromPage(page), | ||||
sessionId: extractSessionIdFromPage(page), | sessionId: extractSessionIdFromPage(page), | ||||
@@ -191,7 +152,7 @@ const generateGetTranscriptRequestBody = ( | |||||
}; | }; | ||||
}; | }; | ||||
export const fetchTranscriptItems = async (pageData: VideoPageData, config?: TranscriptConfig) => { | |||||
const fetchTranscriptItems = async (pageData: VideoPageData, config?: TranscriptConfig) => { | |||||
const { innerTubeApiKey } = pageData; | const { innerTubeApiKey } = pageData; | ||||
if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) { | if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) { | ||||
throw new InnerTubeApiKeyMissingError('InnerTube API key not found on video page.'); | throw new InnerTubeApiKeyMissingError('InnerTube API key not found on video page.'); | ||||
@@ -214,7 +175,7 @@ export const fetchTranscriptItems = async (pageData: VideoPageData, config?: Tra | |||||
throw new FetchTranscriptRequestFailureError(`Fetching transcript failed with status ${transcriptResponse.status}.`); | throw new FetchTranscriptRequestFailureError(`Fetching transcript failed with status ${transcriptResponse.status}.`); | ||||
} | } | ||||
const transcriptBody = await transcriptResponse.json(); | |||||
const transcriptBody = await transcriptResponse.json() as TranscriptResponse; | |||||
if (!transcriptBody.responseContext) { | if (!transcriptBody.responseContext) { | ||||
throw new InvalidTranscriptResponseContextError('No responseContext found on get transcript response.'); | throw new InvalidTranscriptResponseContextError('No responseContext found on get transcript response.'); | ||||
} | } | ||||
@@ -244,5 +205,19 @@ export const fetchTranscriptItems = async (pageData: VideoPageData, config?: Tra | |||||
.startOffsetMs, | .startOffsetMs, | ||||
10, | 10, | ||||
), | ), | ||||
})) as TranscriptResponse[]; | |||||
})) as TranscriptItem[]; | |||||
}; | |||||
export interface SummarizerProcessParams { | |||||
url: string; | |||||
language?: string; | |||||
country?: string; | |||||
} | |||||
export const getRawTranscript = async (params: SummarizerProcessParams) => { | |||||
const { url, ...config } = params; | |||||
const identifier = retrieveVideoId(url); | |||||
const videoPageBody = await getVideoPage(identifier); | |||||
const pageData = extractDataFromPage(videoPageBody); | |||||
return fetchTranscriptItems(pageData, config); | |||||
}; | }; |
@@ -0,0 +1,19 @@ | |||||
import { InvalidVideoIdError } from './errors'; | |||||
const STANDARD_YOUTUBE_VIDEO_ID_LENGTH = 11 as const; | |||||
export const RE_YOUTUBE = /^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v?:i?=|&v?:i?=))([^#&?]*).*/im; | |||||
export const retrieveVideoId = (videoId: string): string => { | |||||
if (typeof (videoId as unknown) !== 'string') { | |||||
throw new InvalidVideoIdError('The video ID must be a string.'); | |||||
} | |||||
if (videoId.length === STANDARD_YOUTUBE_VIDEO_ID_LENGTH) { | |||||
return videoId; | |||||
} | |||||
const matchId = videoId.match(RE_YOUTUBE); | |||||
if (matchId && matchId.length > 1) { | |||||
return matchId[1]; | |||||
} | |||||
throw new InvalidVideoIdError('Impossible to retrieve Youtube video ID.'); | |||||
}; |
@@ -52,7 +52,7 @@ describe('blah', () => { | |||||
done(); | done(); | ||||
}); | }); | ||||
summarizer.process({ | |||||
summarizer.summarize({ | |||||
url: 'https://www.youtube.com/watch?v=WeNgDxtBiyw', | url: 'https://www.youtube.com/watch?v=WeNgDxtBiyw', | ||||
}); | }); | ||||
}), { timeout: 180000 }); | }), { timeout: 180000 }); | ||||
@@ -233,6 +233,11 @@ | |||||
"@babel/helper-validator-identifier" "^7.19.1" | "@babel/helper-validator-identifier" "^7.19.1" | ||||
to-fast-properties "^2.0.0" | to-fast-properties "^2.0.0" | ||||
"@dqbd/tiktoken@^1.0.6": | |||||
version "1.0.6" | |||||
resolved "https://registry.yarnpkg.com/@dqbd/tiktoken/-/tiktoken-1.0.6.tgz#96bfd0a4909726c61551a8c783493f01841bd163" | |||||
integrity sha512-umSdeZTy/SbPPKVuZKV/XKyFPmXSN145CcM3iHjBbmhlohBJg7vaDp4cPCW+xNlWL6L2U1sp7T2BD+di2sUKdA== | |||||
"@esbuild/android-arm64@0.17.16": | "@esbuild/android-arm64@0.17.16": | ||||
version "0.17.16" | version "0.17.16" | ||||
resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.16.tgz#7b18cab5f4d93e878306196eed26b6d960c12576" | resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.17.16.tgz#7b18cab5f4d93e878306196eed26b6d960c12576" | ||||
@@ -436,6 +441,10 @@ | |||||
resolved "https://registry.yarnpkg.com/@mdn/browser-compat-data/-/browser-compat-data-5.2.49.tgz#b4322b2610173bf71185ab394923d49f467f8f97" | resolved "https://registry.yarnpkg.com/@mdn/browser-compat-data/-/browser-compat-data-5.2.49.tgz#b4322b2610173bf71185ab394923d49f467f8f97" | ||||
integrity sha512-tXJUP9EFcfeTcn3hpn616qtcbaLMrhqfgsljRnIv/qYckL8ywLodk7Cj3oJlZed3zWLZLnE9LHHsfpO8w4yJuw== | integrity sha512-tXJUP9EFcfeTcn3hpn616qtcbaLMrhqfgsljRnIv/qYckL8ywLodk7Cj3oJlZed3zWLZLnE9LHHsfpO8w4yJuw== | ||||
"@modal-sh/mio-ai@link:../../../openai-utils": | |||||
version "0.0.0" | |||||
uid "" | |||||
"@next/eslint-plugin-next@^13.2.4": | "@next/eslint-plugin-next@^13.2.4": | ||||
version "13.3.0" | version "13.3.0" | ||||
resolved "https://registry.yarnpkg.com/@next/eslint-plugin-next/-/eslint-plugin-next-13.3.0.tgz#3a4742b0817575cc0dd4d152cb10363584c215ac" | resolved "https://registry.yarnpkg.com/@next/eslint-plugin-next/-/eslint-plugin-next-13.3.0.tgz#3a4742b0817575cc0dd4d152cb10363584c215ac" | ||||