// based from https://github.com/Kakulukian/youtube-transcript // // we refactored it to make it more maintainable import fetchPonyfill from 'fetch-ponyfill'; import { InvalidVideoIdError, CannotRetrieveVideoPageError, FetchTranscriptRequestFailureError, InnerTubeApiKeyMissingError, InvalidTranscriptActionsError, InvalidTranscriptResponseContextError, } from './errors'; const RE_YOUTUBE = /^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*/im; export interface TranscriptConfig { language?: string; country?: string; } export interface TranscriptResponse { text: string; duration: number; offset: number; } const { fetch: f } = fetchPonyfill(); export const retrieveVideoId = (videoId: string): string => { if (videoId.length === 11) { return videoId; } const matchId = videoId.match(RE_YOUTUBE); if (matchId && matchId.length) { return matchId[1]; } throw new InvalidVideoIdError('Impossible to retrieve Youtube video ID.'); }; export const getVideoPage = async (videoId: string): Promise => { const identifier = retrieveVideoId(videoId); const videoUrl = new URL('/watch', 'https://www.youtube.com'); const videoUrlParams = new URLSearchParams({ v: identifier, }); videoUrl.search = videoUrlParams.toString(); const videoPageResponse = await f(videoUrl.toString()); if (!videoPageResponse.ok) { throw new CannotRetrieveVideoPageError('Unable to get video page.'); } return videoPageResponse.text(); }; const generateNonce = () => { const rnd = Math.random().toString(); const alphabet = 'ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghjijklmnopqrstuvwxyz0123456789'; const jda = [ `${alphabet}+/=`, `${alphabet}+/`, `${alphabet}-_=`, `${alphabet}-_.`, `${alphabet}-_`, ]; const b = jda[3]; const a = []; for (let i = 0; i < rnd.length - 1; i++) { a.push(rnd[i].charCodeAt(i)); } let c = ''; let d = 0; let m; let n; let q; let r; let f; let g; while (d < a.length) { f = a[d]; g = d + 1 < a.length; if (g) { m = a[d + 1]; } else { m = 0; } n = d + 2 < a.length; if (n) { q = a[d + 2]; } else { q = 0; } r = f >> 2; f = ((f & 3) << 4) | (m >> 4); m = ((m & 15) << 2) | (q >> 6); q &= 63; if (!n) { q = 64; if (!q) { m = 64; } } c += b[r] + b[f] + b[m] + b[q]; d += 3; } return c; }; const extractInnterTubeApiKeyFromPage = (videoPageBody: string): string => videoPageBody .split('"INNERTUBE_API_KEY":"')[1] .split('"')[0]; const extractSerializedShareEntityFromPage = (page: string) => page.split('"serializedShareEntity":"')[1]?.split('"')[0]; const extractVisitorDataFromPage = (page: string) => page.split('"VISITOR_DATA":"')[1]?.split('"')[0]; const extractSessionIdFromPage = (page: string) => page.split('"sessionId":"')[1]?.split('"')[0]; const extractClickTrackingParamsFromPage = (page: string) => page ?.split('"clickTrackingParams":"')[1] ?.split('"')[0]; interface VideoPageData { innerTubeApiKey?: string; serializedShareEntity?: string; visitorData?: string; sessionId?: string; clickTrackingParams?: string; } export const extractDataFromPage = (page: string): VideoPageData => ({ innerTubeApiKey: extractInnterTubeApiKeyFromPage(page), serializedShareEntity: extractSerializedShareEntityFromPage(page), visitorData: extractVisitorDataFromPage(page), sessionId: extractSessionIdFromPage(page), clickTrackingParams: extractClickTrackingParamsFromPage(page), }); const generateGetTranscriptRequestBody = ( p: Partial, config?: TranscriptConfig, ) => { const { serializedShareEntity, visitorData, sessionId, clickTrackingParams, } = p; return { context: { client: { hl: config?.language ?? 'en', gl: config?.country ?? 'PH', visitorData, userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)', clientName: 'WEB', clientVersion: '2.20200925.01.00', osName: 'Macintosh', osVersion: '10_15_4', browserName: 'Chrome', browserVersion: '85.0f.4183.83', screenWidthPoints: 1440, screenHeightPoints: 770, screenPixelDensity: 2, utcOffsetMinutes: 120, userInterfaceTheme: 'USER_INTERFACE_THEME_LIGHT', connectionType: 'CONN_CELLULAR_3G', }, request: { sessionId, internalExperimentFlags: [], consistencyTokenJars: [], }, user: {}, clientScreenNonce: generateNonce(), clickTracking: { clickTrackingParams, }, }, params: serializedShareEntity, }; }; export const fetchTranscriptItems = async (pageData: VideoPageData, config?: TranscriptConfig) => { const { innerTubeApiKey } = pageData; if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) { throw new InnerTubeApiKeyMissingError('InnerTube API key not found on video page.'); } const getTranscriptUrl = new URL('/youtubei/v1/get_transcript', 'https://www.youtube.com'); const getTranscriptParams = new URLSearchParams({ key: innerTubeApiKey, }); getTranscriptUrl.search = getTranscriptParams.toString(); const transcriptResponse = await f(getTranscriptUrl.toString(), { method: 'POST', headers: { Accept: 'application/json', 'Content-Type': 'application/json', }, body: JSON.stringify(generateGetTranscriptRequestBody(pageData, config)), }); if (!transcriptResponse.ok) { throw new FetchTranscriptRequestFailureError(`Fetching transcript failed with status ${transcriptResponse.status}.`); } const transcriptBody = await transcriptResponse.json(); if (!transcriptBody.responseContext) { throw new InvalidTranscriptResponseContextError('No responseContext found on get transcript response.'); } if (!transcriptBody.actions) { throw new InvalidTranscriptActionsError('No actions found on get transcript response.'); } const { cueGroups: transcripts } = transcriptBody .actions[0] .updateEngagementPanelAction .content .transcriptRenderer .body .transcriptBodyRenderer; return transcripts.map((cue) => ({ text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer .cue.simpleText, duration: parseInt( cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer .durationMs, ), offset: parseInt( cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer .startOffsetMs, ), })) as TranscriptResponse[]; };