import { encoding_for_model as encodingForModel, TiktokenModel, } from '@dqbd/tiktoken'; import { MessageObject, MessageRole } from './chat'; import { ChatCompletionModel } from './models'; const START_TOKEN = '<|im_start|>' as const; const END_TOKEN = '<|im_end|>' as const; const SEPARATOR_TOKEN = '<|im_sep|>' as const; const generateChatTokenString = (normalizedMessageArray: MessageObject[], model: TiktokenModel) => { switch (model) { case ChatCompletionModel.GPT_3_5_TURBO: { const tokens = normalizedMessageArray .map((m) => ( `${START_TOKEN}${m.role}\n${m.content}${END_TOKEN}` )) .join('\n'); return `${tokens}\n${START_TOKEN}${MessageRole.ASSISTANT}\n`; } case ChatCompletionModel.GPT_4: case ChatCompletionModel.GPT_4_32K: { const tokens = normalizedMessageArray .map((m) => ( `${START_TOKEN}${m.role}${SEPARATOR_TOKEN}${m.content}${END_TOKEN}` )) .join(''); return `${tokens}${START_TOKEN}${MessageRole.ASSISTANT}${SEPARATOR_TOKEN}`; } default: break; } throw new Error('Invalid model.'); }; export const getTokens = (chatTokens: string, model: TiktokenModel) => { const enc = Object.values(ChatCompletionModel).includes(model as unknown as ChatCompletionModel) ? encodingForModel(model, { [START_TOKEN]: 100264, [END_TOKEN]: 100265, [SEPARATOR_TOKEN]: 100266, }) : encodingForModel(model); const theTokens = enc.encode(chatTokens, 'all'); enc.free(); return theTokens; }; export const getPromptTokens = (normalizedMessageArray: MessageObject[], model: TiktokenModel) => { const chatTokens = generateChatTokenString(normalizedMessageArray, model); return getTokens(chatTokens, model); }; export interface PromptUsage { prompt_tokens: number; total_tokens: number; } export interface CompletionUsage extends PromptUsage { completion_tokens: number; } export interface UsageMetadata { usage: U; }