Gets the name of a number, even if it's stupidly big. Supersedes TheoryOfNekomata/number-name.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

503 line
13 KiB

  1. import {
  2. bigIntMax, bigIntMin,
  3. Group,
  4. GROUP_DIGITS_INDEX,
  5. GROUP_PLACE_INDEX,
  6. InvalidTokenError,
  7. } from '../../../common';
  8. import {
  9. CENTILLIONS_PREFIXES,
  10. DECILLIONS_PREFIXES,
  11. DECIMAL_POINT,
  12. EMPTY_GROUP_DIGITS,
  13. EMPTY_PLACE,
  14. EXPONENT_DELIMITER,
  15. HUNDRED,
  16. ILLION_SUFFIX,
  17. MILLIA_PREFIX,
  18. MILLIONS_PREFIXES,
  19. MILLIONS_SPECIAL_PREFIXES, NEGATIVE,
  20. NEGATIVE_SYMBOL,
  21. ONES,
  22. OnesName,
  23. POSITIVE_SYMBOL,
  24. SHORT_MILLIA_DELIMITER,
  25. SHORT_MILLIA_ILLION_DELIMITER,
  26. T_AFFIX,
  27. TEN_PLUS_ONES,
  28. TenPlusOnesName,
  29. TENS,
  30. TENS_ONES_SEPARATOR,
  31. TensName,
  32. THOUSAND,
  33. } from '../../en/common';
  34. const FINAL_TOKEN = '' as const;
  35. /**
  36. * Tokenizes a string.
  37. * @param value - The string to tokenize.
  38. * @see {NumberNameSystem.mergeTokens}
  39. * @returns string[] The tokens.
  40. */
  41. export const tokenize = (value: string) => (
  42. value
  43. .toLowerCase()
  44. .trim()
  45. .replace(/\n+/gs, ' ')
  46. .replace(/\s+/g, ' ')
  47. .replace(
  48. new RegExp(`${THOUSAND}\\s+(.+?${ILLION_SUFFIX})`, 'g'),
  49. (_substring, illion: string) => (
  50. `${THOUSAND}${illion}`
  51. ),
  52. )
  53. .replace(
  54. new RegExp(`${MILLIA_PREFIX}\\${SHORT_MILLIA_DELIMITER}(\\d+)${SHORT_MILLIA_ILLION_DELIMITER}`, 'g'),
  55. (_substring, milliaCount: string) => `${MILLIA_PREFIX}${SHORT_MILLIA_DELIMITER}${milliaCount}`,
  56. )
  57. .replace(new RegExp(`${TENS_ONES_SEPARATOR}`, 'g'), ' ')
  58. .split(' ')
  59. .filter((maybeToken) => maybeToken.length > 0)
  60. );
  61. interface DoParseState {
  62. groupNameCurrent: string;
  63. millias: number[];
  64. milliaIndex: number;
  65. done: boolean;
  66. }
  67. /**
  68. * Deconstructs a group name token (e.g. "million", "duodecillion", etc.) to its affixes and
  69. * parses them.
  70. * @param result - The current state of the parser.
  71. * @returns DoParseState The next state of the parser.
  72. */
  73. const doParseGroupName = (result: DoParseState): DoParseState => {
  74. if (
  75. result.groupNameCurrent.length < 1
  76. // If the current group name is "t", then we're done.
  77. // We use the -t- affix to attach the group prefix to the -illion suffix, except for decillion.
  78. || result.groupNameCurrent === T_AFFIX
  79. ) {
  80. return {
  81. ...result,
  82. // Fill the gaps of millias with zeros.
  83. millias: new Array(result.millias.length)
  84. .fill(0)
  85. .map((z, i) => (
  86. result.millias[i] ?? z
  87. )),
  88. done: true,
  89. };
  90. }
  91. const centillions = CENTILLIONS_PREFIXES.findIndex((p) => (
  92. p.length > 0 && result.groupNameCurrent.startsWith(p)
  93. ));
  94. if (centillions > -1) {
  95. return {
  96. milliaIndex: 0,
  97. millias: result.millias.map((m, i) => (
  98. i === 0
  99. ? m + (centillions * 100)
  100. : m
  101. )),
  102. groupNameCurrent: result.groupNameCurrent.slice(
  103. CENTILLIONS_PREFIXES[centillions].length,
  104. ),
  105. done: false,
  106. };
  107. }
  108. const decillions = DECILLIONS_PREFIXES.findIndex((p) => (
  109. p.length > 0 && result.groupNameCurrent.startsWith(p)
  110. ));
  111. if (decillions > -1) {
  112. return {
  113. milliaIndex: 0,
  114. millias: result.millias.map((m, i) => (
  115. i === 0
  116. ? m + (decillions * 10)
  117. : m
  118. )),
  119. groupNameCurrent: result.groupNameCurrent.slice(
  120. DECILLIONS_PREFIXES[decillions].length,
  121. ),
  122. done: false,
  123. };
  124. }
  125. const millions = MILLIONS_PREFIXES.findIndex((p) => (
  126. p.length > 0 && result.groupNameCurrent.startsWith(p)
  127. ));
  128. if (millions > -1) {
  129. return {
  130. milliaIndex: 0,
  131. millias: result.millias.map((m, i) => (
  132. i === 0
  133. ? m + millions
  134. : m
  135. )),
  136. groupNameCurrent: result.groupNameCurrent.slice(
  137. MILLIONS_PREFIXES[millions].length,
  138. ),
  139. done: false,
  140. };
  141. }
  142. if (result.groupNameCurrent.startsWith(MILLIA_PREFIX)) {
  143. let newMillia: number;
  144. let prefix: string;
  145. const isShortMillia = result.groupNameCurrent.startsWith(`${MILLIA_PREFIX}${SHORT_MILLIA_DELIMITER}`);
  146. if (isShortMillia) {
  147. const matchedMilliaArray = result.groupNameCurrent
  148. .match(new RegExp(`^${MILLIA_PREFIX}\\${SHORT_MILLIA_DELIMITER}(\\d+)`));
  149. if (!matchedMilliaArray) {
  150. throw new InvalidTokenError(result.groupNameCurrent);
  151. }
  152. const [wholeMilliaPrefix, matchedMillia] = matchedMilliaArray;
  153. newMillia = Number(matchedMillia);
  154. prefix = wholeMilliaPrefix;
  155. } else {
  156. newMillia = result.milliaIndex + 1;
  157. prefix = MILLIA_PREFIX;
  158. }
  159. const oldMillia = result.milliaIndex;
  160. const newMillias = [...result.millias];
  161. newMillias[newMillia] = newMillias[oldMillia] || 1;
  162. newMillias[oldMillia] = 0;
  163. return {
  164. milliaIndex: newMillia,
  165. millias: newMillias,
  166. groupNameCurrent: result.groupNameCurrent.slice(prefix.length),
  167. done: false,
  168. };
  169. }
  170. throw new InvalidTokenError(result.groupNameCurrent);
  171. };
  172. /**
  173. * Gets the place of a group name (e.g. "million", "duodecillion", etc.).
  174. * @param groupName - The group name.
  175. * @returns bigint The place of the group name.
  176. */
  177. const getGroupPlaceFromGroupName = (groupName: string) => {
  178. if (groupName === THOUSAND) {
  179. return BigInt(1);
  180. }
  181. const groupNameBase = groupName.replace(ILLION_SUFFIX, '').replace(THOUSAND, '');
  182. const specialMillions = MILLIONS_SPECIAL_PREFIXES.findIndex((p) => groupNameBase === p);
  183. if (specialMillions > -1) {
  184. return BigInt(specialMillions * 2) + (groupName.startsWith(THOUSAND) ? BigInt(1) : BigInt(0));
  185. }
  186. let result: DoParseState = {
  187. groupNameCurrent: groupNameBase,
  188. millias: [0],
  189. milliaIndex: 0,
  190. done: false,
  191. };
  192. do {
  193. result = doParseGroupName(result);
  194. } while (!result.done);
  195. const bigGroupPlace = BigInt(
  196. result.millias
  197. .map((s) => s.toString().padStart(3, '0'))
  198. .reverse()
  199. .join(''),
  200. );
  201. return bigGroupPlace * BigInt(2) + (groupName.startsWith(THOUSAND) ? BigInt(1) : BigInt(0));
  202. };
  203. /**
  204. * Mode of the group parser.
  205. */
  206. enum ParseGroupsMode {
  207. /**
  208. * Initial mode.
  209. */
  210. INITIAL = 'initial',
  211. /**
  212. * Has parsed a ones name.
  213. */
  214. ONES_MODE = 'ones',
  215. /**
  216. * Has parsed a tens name.
  217. */
  218. TENS_MODE = 'tens',
  219. /**
  220. * Has parsed a ten-plus-ones name.
  221. */
  222. TEN_PLUS_ONES_MODE = 'tenPlusOnes',
  223. /**
  224. * Has parsed a "hundred" token.
  225. */
  226. HUNDRED_MODE = 'hundred',
  227. /**
  228. * Has parsed a "thousand" or any "-illion" token.
  229. */
  230. THOUSAND_MODE = 'thousand',
  231. /**
  232. * Done parsing.
  233. */
  234. DONE = 'done',
  235. }
  236. /**
  237. * State of the group parser.
  238. */
  239. interface ParserState {
  240. lastToken?: string;
  241. groups: Group[];
  242. mode: ParseGroupsMode;
  243. negative: boolean;
  244. }
  245. const parseThousand = (acc: ParserState, token: string): ParserState => {
  246. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  247. if (acc.mode === ParseGroupsMode.ONES_MODE) {
  248. const ones = ONES.findIndex((o) => o === acc.lastToken);
  249. if (ones > -1) {
  250. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 2)}${ones}`;
  251. }
  252. } else if (acc.mode === ParseGroupsMode.TENS_MODE) {
  253. const tens = TENS.findIndex((t) => t === acc.lastToken);
  254. if (tens > -1) {
  255. lastGroup[GROUP_DIGITS_INDEX] = (
  256. `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`
  257. );
  258. }
  259. }
  260. // Put the digits in the right place.
  261. lastGroup[GROUP_PLACE_INDEX] = getGroupPlaceFromGroupName(token);
  262. return {
  263. ...acc,
  264. groups: [...acc.groups.slice(0, -1), lastGroup],
  265. lastToken: token,
  266. mode: ParseGroupsMode.THOUSAND_MODE,
  267. };
  268. };
  269. const parseHundred = (acc: ParserState): ParserState => {
  270. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  271. const hundreds = ONES.findIndex((o) => o === acc.lastToken);
  272. lastGroup[GROUP_DIGITS_INDEX] = `${hundreds}${lastGroup[GROUP_DIGITS_INDEX].slice(1)}`;
  273. return {
  274. ...acc,
  275. groups: [...acc.groups.slice(0, -1), lastGroup],
  276. mode: ParseGroupsMode.HUNDRED_MODE,
  277. };
  278. };
  279. const parseFinal = (acc: ParserState): ParserState => {
  280. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  281. if (acc.mode === ParseGroupsMode.ONES_MODE) {
  282. const ones = ONES.findIndex((o) => o === acc.lastToken);
  283. if (ones > -1) {
  284. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 2)}${ones}`;
  285. }
  286. // We assume last token without parsed place will always be the smallest
  287. lastGroup[GROUP_PLACE_INDEX] = BigInt(0);
  288. return {
  289. ...acc,
  290. groups: [...acc.groups.slice(0, -1), lastGroup],
  291. mode: ParseGroupsMode.DONE,
  292. };
  293. }
  294. if (acc.mode === ParseGroupsMode.TENS_MODE) {
  295. const tens = TENS.findIndex((o) => o === acc.lastToken);
  296. if (tens > -1) {
  297. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[0].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`;
  298. }
  299. lastGroup[GROUP_PLACE_INDEX] = BigInt(0);
  300. return {
  301. ...acc,
  302. groups: [...acc.groups.slice(0, -1), lastGroup],
  303. mode: ParseGroupsMode.DONE,
  304. };
  305. }
  306. return acc;
  307. };
  308. const parseOnes = (acc: ParserState, token: string): ParserState => {
  309. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  310. // Create next empty place
  311. return {
  312. ...acc,
  313. lastToken: token,
  314. mode: ParseGroupsMode.ONES_MODE,
  315. groups: [...acc.groups, [...EMPTY_PLACE]],
  316. };
  317. }
  318. return {
  319. ...acc,
  320. lastToken: token,
  321. mode: ParseGroupsMode.ONES_MODE,
  322. };
  323. };
  324. const parseTenPlusOnes = (acc: ParserState, token: string): ParserState => {
  325. const tenPlusOnes = TEN_PLUS_ONES.findIndex((t) => t === token);
  326. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  327. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  328. return {
  329. ...acc,
  330. lastToken: token,
  331. mode: ParseGroupsMode.TEN_PLUS_ONES_MODE,
  332. groups: [...acc.groups, [`01${tenPlusOnes}`, lastGroup[GROUP_PLACE_INDEX] - BigInt(1)]],
  333. };
  334. }
  335. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}1${tenPlusOnes}`;
  336. return {
  337. ...acc,
  338. lastToken: token,
  339. mode: ParseGroupsMode.TEN_PLUS_ONES_MODE,
  340. groups: [...acc.groups.slice(0, -1), lastGroup],
  341. };
  342. };
  343. const parseTens = (acc: ParserState, token: string): ParserState => {
  344. const tens = TENS.findIndex((t) => t === token);
  345. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  346. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  347. return {
  348. ...acc,
  349. lastToken: token,
  350. mode: ParseGroupsMode.TENS_MODE,
  351. groups: [...acc.groups, [`0${tens}0`, lastGroup[GROUP_PLACE_INDEX] - BigInt(1)]],
  352. };
  353. }
  354. lastGroup[GROUP_DIGITS_INDEX] = (
  355. `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`
  356. );
  357. return {
  358. ...acc,
  359. lastToken: token,
  360. mode: ParseGroupsMode.TENS_MODE,
  361. groups: [...acc.groups.slice(0, -1), lastGroup],
  362. };
  363. };
  364. /**
  365. * Parses groups from a string.
  366. * @param tokens - The string to parse groups from.
  367. * @see {NumberNameSystem.stringifyGroups}
  368. * @returns Group[] The parsed groups.
  369. */
  370. export const parseGroups = (tokens: string[]) => {
  371. // We add a final token which is an empty string to parse whatever the last non-empty token is.
  372. const tokensToParse = [...tokens, FINAL_TOKEN];
  373. const { groups, negative } = tokensToParse.reduce<ParserState>(
  374. (acc, token) => {
  375. if (token === THOUSAND || token.endsWith(ILLION_SUFFIX)) {
  376. return parseThousand(acc, token);
  377. }
  378. if (token === HUNDRED && acc.mode === ParseGroupsMode.ONES_MODE) {
  379. return parseHundred(acc);
  380. }
  381. if (token === FINAL_TOKEN) {
  382. return parseFinal(acc);
  383. }
  384. if (ONES.includes(token as OnesName)) {
  385. return parseOnes(acc, token);
  386. }
  387. if (TEN_PLUS_ONES.includes(token as TenPlusOnesName)) {
  388. return parseTenPlusOnes(acc, token);
  389. }
  390. if (TENS.includes(token as TensName)) {
  391. return parseTens(acc, token);
  392. }
  393. if (token === NEGATIVE) {
  394. return {
  395. ...acc,
  396. negative: !acc.negative,
  397. };
  398. }
  399. return {
  400. ...acc,
  401. lastToken: token,
  402. };
  403. },
  404. {
  405. lastToken: undefined,
  406. groups: [],
  407. mode: ParseGroupsMode.INITIAL,
  408. negative: false,
  409. },
  410. );
  411. return { groups, negative };
  412. };
  413. /**
  414. * Combines groups into a string.
  415. * @param groups - The groups to combine.
  416. * @param negative - Whether the number is negative.
  417. * @see {NumberNameSystem.splitIntoGroups}
  418. * @returns string The combined groups in exponential form.
  419. */
  420. export const combineGroups = (groups: Group[], negative: boolean) => {
  421. if (groups.length < 1) {
  422. return '';
  423. }
  424. const places = groups.map((g) => g[GROUP_PLACE_INDEX]);
  425. const maxPlace = bigIntMax(...places) as bigint;
  426. const minPlace = bigIntMin(...places) as bigint;
  427. const firstGroup = groups.find((g) => g[GROUP_PLACE_INDEX] === maxPlace) ?? [...EMPTY_PLACE];
  428. const firstGroupPlace = firstGroup[GROUP_PLACE_INDEX];
  429. const groupsSorted = [];
  430. for (let i = maxPlace; i >= minPlace; i = BigInt(i) - BigInt(1)) {
  431. const thisGroup = groups.find((g) => g[GROUP_PLACE_INDEX] === i) ?? [EMPTY_GROUP_DIGITS, i];
  432. groupsSorted.push(thisGroup);
  433. }
  434. const digits = groupsSorted.reduce(
  435. (previousDigits, thisGroup) => {
  436. const [groupDigits] = thisGroup;
  437. return `${previousDigits}${groupDigits}`;
  438. },
  439. '',
  440. ).replace(/^0+/, '') || '0';
  441. const firstGroupDigits = firstGroup[0];
  442. const firstGroupDigitsWithoutZeroes = firstGroupDigits.replace(/^0+/, '');
  443. const exponentExtra = firstGroupDigits.length - firstGroupDigitsWithoutZeroes.length;
  444. const exponentValue = BigInt(
  445. (BigInt(firstGroupPlace) * BigInt(3)) + (BigInt(2) - BigInt(exponentExtra)),
  446. );
  447. const isExponentNegative = exponentValue < 0;
  448. const exponentValueAbs = isExponentNegative ? -exponentValue : exponentValue;
  449. const exponentSign = isExponentNegative ? NEGATIVE_SYMBOL : POSITIVE_SYMBOL;
  450. const exponent = `${exponentSign}${exponentValueAbs}`;
  451. const significandInteger = digits.slice(0, 1);
  452. const significandFraction = digits.slice(1).replace(/0+$/, '');
  453. if (significandFraction.length > 0) {
  454. return `${negative ? NEGATIVE_SYMBOL : ''}${significandInteger}${DECIMAL_POINT}${significandFraction}${EXPONENT_DELIMITER}${exponent}`;
  455. }
  456. return `${negative ? NEGATIVE_SYMBOL : ''}${significandInteger}${EXPONENT_DELIMITER}${exponent}`;
  457. };