Get transcript summaries of Web videos.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 line
6.4 KiB

  1. // based from
  2. //
  3. // we refactored it to make it more maintainable
  4. import fetchPonyfill from 'fetch-ponyfill';
  5. import {
  6. CannotRetrieveVideoPageError,
  7. FetchTranscriptRequestFailureError,
  8. InnerTubeApiKeyMissingError,
  9. InvalidTranscriptActionsError,
  10. InvalidTranscriptResponseContextError,
  11. } from './errors';
  12. import { BaseTranscriptItem } from '../../transcript';
  13. import { generateNonce } from './crypto';
  14. import { retrieveVideoId } from './url';
  15. import { VIDEO_TYPE } from './common';
  16. export interface CreateTranscriptFetcherParams {
  17. type: typeof VIDEO_TYPE;
  18. }
  19. interface TranscriptConfig {
  20. language?: string;
  21. country?: string;
  22. }
  23. export interface TranscriptItem extends BaseTranscriptItem {
  24. duration: number;
  25. offset: number;
  26. }
  27. const { fetch: f } = fetchPonyfill();
  28. const getVideoPage = async (videoId: string): Promise<string> => {
  29. const identifier = retrieveVideoId(videoId);
  30. const videoUrl = new URL('/watch', '');
  31. const videoUrlParams = new URLSearchParams({
  32. v: identifier,
  33. });
  34. = videoUrlParams.toString();
  35. const videoPageResponse = await f(videoUrl.toString());
  36. if (videoPageResponse.ok) {
  37. return videoPageResponse.text();
  38. }
  39. throw new CannotRetrieveVideoPageError('Unable to get video page.');
  40. };
  41. const extractInnerTubeApiKeyFromPage = (videoPageBody: string): string => videoPageBody
  42. .split('"INNERTUBE_API_KEY":"')[1]
  43. .split('"')[0];
  44. const extractSerializedShareEntityFromPage = (page: string) => page.split('"serializedShareEntity":"')[1]?.split('"')[0];
  45. const extractVisitorDataFromPage = (page: string) => page.split('"VISITOR_DATA":"')[1]?.split('"')[0];
  46. const extractSessionIdFromPage = (page: string) => page.split('"sessionId":"')[1]?.split('"')[0];
  47. const extractClickTrackingParamsFromPage = (page: string) => page
  48. ?.split('"clickTrackingParams":"')[1]
  49. ?.split('"')[0];
  50. interface VideoPageData {
  51. innerTubeApiKey?: string;
  52. serializedShareEntity?: string;
  53. visitorData?: string;
  54. sessionId?: string;
  55. clickTrackingParams?: string;
  56. }
  57. interface TranscriptResponse {
  58. responseContext?: unknown,
  59. actions?: {
  60. updateEngagementPanelAction: {
  61. content: {
  62. transcriptRenderer: {
  63. body: {
  64. transcriptBodyRenderer: {
  65. cueGroups: Cue[],
  66. }
  67. }
  68. }
  69. }
  70. },
  71. }[];
  72. }
  73. interface Cue {
  74. transcriptCueGroupRenderer: {
  75. cues: {
  76. transcriptCueRenderer: {
  77. cue: {
  78. simpleText: string;
  79. },
  80. durationMs: string;
  81. startOffsetMs: string;
  82. }
  83. }[],
  84. },
  85. }
  86. const extractDataFromPage = (page: string): VideoPageData => ({
  87. innerTubeApiKey: extractInnerTubeApiKeyFromPage(page),
  88. serializedShareEntity: extractSerializedShareEntityFromPage(page),
  89. visitorData: extractVisitorDataFromPage(page),
  90. sessionId: extractSessionIdFromPage(page),
  91. clickTrackingParams: extractClickTrackingParamsFromPage(page),
  92. });
  93. const generateGetTranscriptRequestBody = (
  94. p: Partial<VideoPageData>,
  95. config?: TranscriptConfig,
  96. ) => {
  97. const {
  98. serializedShareEntity,
  99. visitorData,
  100. sessionId,
  101. clickTrackingParams,
  102. } = p;
  103. return {
  104. context: {
  105. client: {
  106. hl: config?.language ?? 'en',
  107. gl: config?.country ?? 'PH',
  108. visitorData,
  109. userAgent:
  110. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)',
  111. clientName: 'WEB',
  112. clientVersion: '2.20200925.01.00',
  113. osName: 'Macintosh',
  114. osVersion: '10_15_4',
  115. browserName: 'Chrome',
  116. browserVersion: '85.0f.4183.83',
  117. screenWidthPoints: 1440,
  118. screenHeightPoints: 770,
  119. screenPixelDensity: 2,
  120. utcOffsetMinutes: 120,
  121. userInterfaceTheme: 'USER_INTERFACE_THEME_LIGHT',
  122. connectionType: 'CONN_CELLULAR_3G',
  123. },
  124. request: {
  125. sessionId,
  126. internalExperimentFlags: [],
  127. consistencyTokenJars: [],
  128. },
  129. user: {},
  130. clientScreenNonce: generateNonce(),
  131. clickTracking: {
  132. clickTrackingParams,
  133. },
  134. },
  135. params: serializedShareEntity,
  136. };
  137. };
  138. const fetchTranscriptItems = async (pageData: VideoPageData, config?: TranscriptConfig) => {
  139. const { innerTubeApiKey } = pageData;
  140. if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) {
  141. throw new InnerTubeApiKeyMissingError('InnerTube API key not found on video page.');
  142. }
  143. const getTranscriptUrl = new URL('/youtubei/v1/get_transcript', '');
  144. const getTranscriptParams = new URLSearchParams({
  145. key: innerTubeApiKey,
  146. });
  147. = getTranscriptParams.toString();
  148. const transcriptResponse = await f(getTranscriptUrl.toString(), {
  149. method: 'POST',
  150. headers: {
  151. Accept: 'application/json',
  152. 'Content-Type': 'application/json',
  153. },
  154. body: JSON.stringify(generateGetTranscriptRequestBody(pageData, config)),
  155. });
  156. if (!transcriptResponse.ok) {
  157. throw new FetchTranscriptRequestFailureError(`Fetching transcript failed with status ${transcriptResponse.status}.`);
  158. }
  159. const transcriptBody = await transcriptResponse.json() as TranscriptResponse;
  160. if (!transcriptBody.responseContext) {
  161. throw new InvalidTranscriptResponseContextError('No responseContext found on get transcript response.');
  162. }
  163. if (!transcriptBody.actions) {
  164. throw new InvalidTranscriptActionsError('No actions found on get transcript response.');
  165. }
  166. const { cueGroups: transcripts } = transcriptBody
  167. .actions[0]
  168. .updateEngagementPanelAction
  169. .content
  170. .transcriptRenderer
  171. .body
  172. .transcriptBodyRenderer;
  173. return Cue) => ({
  174. text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
  175. .cue.simpleText,
  176. duration: parseInt(
  177. cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
  178. .durationMs,
  179. 10,
  180. ),
  181. offset: parseInt(
  182. cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer
  183. .startOffsetMs,
  184. 10,
  185. ),
  186. })) as TranscriptItem[];
  187. };
  188. export interface SummarizerProcessParams {
  189. url: string;
  190. language?: string;
  191. country?: string;
  192. }
  193. export const getRawTranscript = async (params: SummarizerProcessParams) => {
  194. const { url, ...config } = params;
  195. const identifier = retrieveVideoId(url);
  196. const videoPageBody = await getVideoPage(identifier);
  197. const pageData = extractDataFromPage(videoPageBody);
  198. return fetchTranscriptItems(pageData, config);
  199. };