Gets the name of a number, even if it's stupidly big. Supersedes TheoryOfNekomata/number-name.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

493 line
13 KiB

  1. import {
  2. bigIntMax, bigIntMin,
  3. Group,
  4. GROUP_DIGITS_INDEX,
  5. GROUP_PLACE_INDEX,
  6. InvalidTokenError,
  7. } from '../../../common';
  8. import {
  9. CENTILLIONS_PREFIXES,
  10. DECILLIONS_PREFIXES,
  11. DECIMAL_POINT,
  12. EMPTY_GROUP_DIGITS,
  13. EMPTY_PLACE,
  14. EXPONENT_DELIMITER,
  15. HUNDRED,
  16. ILLION_SUFFIX,
  17. MILLIA_PREFIX,
  18. MILLIONS_PREFIXES,
  19. MILLIONS_SPECIAL_PREFIXES,
  20. NEGATIVE_SYMBOL,
  21. ONES,
  22. OnesName,
  23. POSITIVE_SYMBOL,
  24. SHORT_MILLIA_DELIMITER,
  25. SHORT_MILLIA_ILLION_DELIMITER,
  26. T_AFFIX,
  27. TEN_PLUS_ONES,
  28. TenPlusOnesName,
  29. TENS,
  30. TENS_ONES_SEPARATOR,
  31. TensName,
  32. THOUSAND,
  33. } from '../../en/common';
  34. const FINAL_TOKEN = '' as const;
  35. /**
  36. * Tokenizes a string.
  37. * @param value - The string to tokenize.
  38. * @see {NumberNameSystem.mergeTokens}
  39. * @returns string[] The tokens.
  40. */
  41. export const tokenize = (value: string) => (
  42. value
  43. .toLowerCase()
  44. .trim()
  45. .replace(/\n+/gs, ' ')
  46. .replace(/\s+/g, ' ')
  47. .replace(
  48. new RegExp(`${THOUSAND}\\s+(.+?${ILLION_SUFFIX})`, 'g'),
  49. (_substring, illion: string) => (
  50. `${THOUSAND}${illion}`
  51. ),
  52. )
  53. .replace(
  54. new RegExp(`${MILLIA_PREFIX}\\${SHORT_MILLIA_DELIMITER}(\\d+)${SHORT_MILLIA_ILLION_DELIMITER}`, 'g'),
  55. (_substring, milliaCount: string) => `${MILLIA_PREFIX}${SHORT_MILLIA_DELIMITER}${milliaCount}`,
  56. )
  57. .replace(new RegExp(`${TENS_ONES_SEPARATOR}`, 'g'), ' ')
  58. .split(' ')
  59. .filter((maybeToken) => maybeToken.length > 0)
  60. );
  61. interface DoParseState {
  62. groupNameCurrent: string;
  63. millias: number[];
  64. milliaIndex: number;
  65. done: boolean;
  66. }
  67. /**
  68. * Deconstructs a group name token (e.g. "million", "duodecillion", etc.) to its affixes and
  69. * parses them.
  70. * @param result - The current state of the parser.
  71. * @returns DoParseState The next state of the parser.
  72. */
  73. const doParseGroupName = (result: DoParseState): DoParseState => {
  74. if (
  75. result.groupNameCurrent.length < 1
  76. // If the current group name is "t", then we're done.
  77. // We use the -t- affix to attach the group prefix to the -illion suffix, except for decillion.
  78. || result.groupNameCurrent === T_AFFIX
  79. ) {
  80. return {
  81. ...result,
  82. // Fill the gaps of millias with zeros.
  83. millias: new Array(result.millias.length)
  84. .fill(0)
  85. .map((z, i) => (
  86. result.millias[i] ?? z
  87. )),
  88. done: true,
  89. };
  90. }
  91. const centillions = CENTILLIONS_PREFIXES.findIndex((p) => (
  92. p.length > 0 && result.groupNameCurrent.startsWith(p)
  93. ));
  94. if (centillions > -1) {
  95. return {
  96. milliaIndex: 0,
  97. millias: result.millias.map((m, i) => (
  98. i === 0
  99. ? m + (centillions * 100)
  100. : m
  101. )),
  102. groupNameCurrent: result.groupNameCurrent.slice(
  103. CENTILLIONS_PREFIXES[centillions].length,
  104. ),
  105. done: false,
  106. };
  107. }
  108. const decillions = DECILLIONS_PREFIXES.findIndex((p) => (
  109. p.length > 0 && result.groupNameCurrent.startsWith(p)
  110. ));
  111. if (decillions > -1) {
  112. return {
  113. milliaIndex: 0,
  114. millias: result.millias.map((m, i) => (
  115. i === 0
  116. ? m + (decillions * 10)
  117. : m
  118. )),
  119. groupNameCurrent: result.groupNameCurrent.slice(
  120. DECILLIONS_PREFIXES[decillions].length,
  121. ),
  122. done: false,
  123. };
  124. }
  125. const millions = MILLIONS_PREFIXES.findIndex((p) => (
  126. p.length > 0 && result.groupNameCurrent.startsWith(p)
  127. ));
  128. if (millions > -1) {
  129. return {
  130. milliaIndex: 0,
  131. millias: result.millias.map((m, i) => (
  132. i === 0
  133. ? m + millions
  134. : m
  135. )),
  136. groupNameCurrent: result.groupNameCurrent.slice(
  137. MILLIONS_PREFIXES[millions].length,
  138. ),
  139. done: false,
  140. };
  141. }
  142. if (result.groupNameCurrent.startsWith(MILLIA_PREFIX)) {
  143. let newMillia: number;
  144. let prefix: string;
  145. const isShortMillia = result.groupNameCurrent.startsWith(`${MILLIA_PREFIX}${SHORT_MILLIA_DELIMITER}`);
  146. if (isShortMillia) {
  147. const matchedMilliaArray = result.groupNameCurrent
  148. .match(new RegExp(`^${MILLIA_PREFIX}\\${SHORT_MILLIA_DELIMITER}(\\d+)`));
  149. if (!matchedMilliaArray) {
  150. throw new InvalidTokenError(result.groupNameCurrent);
  151. }
  152. const [wholeMilliaPrefix, matchedMillia] = matchedMilliaArray;
  153. newMillia = Number(matchedMillia);
  154. prefix = wholeMilliaPrefix;
  155. } else {
  156. newMillia = result.milliaIndex + 1;
  157. prefix = MILLIA_PREFIX;
  158. }
  159. const oldMillia = result.milliaIndex;
  160. const newMillias = [...result.millias];
  161. newMillias[newMillia] = newMillias[oldMillia] || 1;
  162. newMillias[oldMillia] = 0;
  163. return {
  164. milliaIndex: newMillia,
  165. millias: newMillias,
  166. groupNameCurrent: result.groupNameCurrent.slice(prefix.length),
  167. done: false,
  168. };
  169. }
  170. throw new InvalidTokenError(result.groupNameCurrent);
  171. };
  172. /**
  173. * Gets the place of a group name (e.g. "million", "duodecillion", etc.).
  174. * @param groupName - The group name.
  175. * @returns bigint The place of the group name.
  176. */
  177. const getGroupPlaceFromGroupName = (groupName: string) => {
  178. if (groupName === THOUSAND) {
  179. return BigInt(1);
  180. }
  181. const groupNameBase = groupName.replace(ILLION_SUFFIX, '').replace(THOUSAND, '');
  182. const specialMillions = MILLIONS_SPECIAL_PREFIXES.findIndex((p) => groupNameBase === p);
  183. if (specialMillions > -1) {
  184. return BigInt(specialMillions * 2) + (groupName.startsWith(THOUSAND) ? BigInt(1) : BigInt(0));
  185. }
  186. let result: DoParseState = {
  187. groupNameCurrent: groupNameBase,
  188. millias: [0],
  189. milliaIndex: 0,
  190. done: false,
  191. };
  192. do {
  193. result = doParseGroupName(result);
  194. } while (!result.done);
  195. const bigGroupPlace = BigInt(
  196. result.millias
  197. .map((s) => s.toString().padStart(3, '0'))
  198. .reverse()
  199. .join(''),
  200. );
  201. return bigGroupPlace * BigInt(2) + (groupName.startsWith(THOUSAND) ? BigInt(1) : BigInt(0));
  202. };
  203. /**
  204. * Mode of the group parser.
  205. */
  206. enum ParseGroupsMode {
  207. /**
  208. * Initial mode.
  209. */
  210. INITIAL = 'initial',
  211. /**
  212. * Has parsed a ones name.
  213. */
  214. ONES_MODE = 'ones',
  215. /**
  216. * Has parsed a tens name.
  217. */
  218. TENS_MODE = 'tens',
  219. /**
  220. * Has parsed a ten-plus-ones name.
  221. */
  222. TEN_PLUS_ONES_MODE = 'tenPlusOnes',
  223. /**
  224. * Has parsed a "hundred" token.
  225. */
  226. HUNDRED_MODE = 'hundred',
  227. /**
  228. * Has parsed a "thousand" or any "-illion" token.
  229. */
  230. THOUSAND_MODE = 'thousand',
  231. /**
  232. * Done parsing.
  233. */
  234. DONE = 'done',
  235. }
  236. /**
  237. * State of the group parser.
  238. */
  239. interface ParserState {
  240. lastToken?: string;
  241. groups: Group[];
  242. mode: ParseGroupsMode;
  243. }
  244. const parseThousand = (acc: ParserState, token: string): ParserState => {
  245. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  246. if (acc.mode === ParseGroupsMode.ONES_MODE) {
  247. const ones = ONES.findIndex((o) => o === acc.lastToken);
  248. if (ones > -1) {
  249. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 2)}${ones}`;
  250. }
  251. } else if (acc.mode === ParseGroupsMode.TENS_MODE) {
  252. const tens = TENS.findIndex((t) => t === acc.lastToken);
  253. if (tens > -1) {
  254. lastGroup[GROUP_DIGITS_INDEX] = (
  255. `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`
  256. );
  257. }
  258. }
  259. // Put the digits in the right place.
  260. lastGroup[GROUP_PLACE_INDEX] = getGroupPlaceFromGroupName(token);
  261. return {
  262. ...acc,
  263. groups: [...acc.groups.slice(0, -1), lastGroup],
  264. lastToken: token,
  265. mode: ParseGroupsMode.THOUSAND_MODE,
  266. };
  267. };
  268. const parseHundred = (acc: ParserState): ParserState => {
  269. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  270. const hundreds = ONES.findIndex((o) => o === acc.lastToken);
  271. lastGroup[GROUP_DIGITS_INDEX] = `${hundreds}${lastGroup[GROUP_DIGITS_INDEX].slice(1)}`;
  272. return {
  273. ...acc,
  274. groups: [...acc.groups.slice(0, -1), lastGroup],
  275. mode: ParseGroupsMode.HUNDRED_MODE,
  276. };
  277. };
  278. const parseFinal = (acc: ParserState): ParserState => {
  279. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  280. if (acc.mode === ParseGroupsMode.ONES_MODE) {
  281. const ones = ONES.findIndex((o) => o === acc.lastToken);
  282. if (ones > -1) {
  283. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 2)}${ones}`;
  284. }
  285. // We assume last token without parsed place will always be the smallest
  286. lastGroup[GROUP_PLACE_INDEX] = BigInt(0);
  287. return {
  288. ...acc,
  289. groups: [...acc.groups.slice(0, -1), lastGroup],
  290. mode: ParseGroupsMode.DONE,
  291. };
  292. }
  293. if (acc.mode === ParseGroupsMode.TENS_MODE) {
  294. const tens = TENS.findIndex((o) => o === acc.lastToken);
  295. if (tens > -1) {
  296. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[0].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`;
  297. }
  298. lastGroup[GROUP_PLACE_INDEX] = BigInt(0);
  299. return {
  300. ...acc,
  301. groups: [...acc.groups.slice(0, -1), lastGroup],
  302. mode: ParseGroupsMode.DONE,
  303. };
  304. }
  305. return acc;
  306. };
  307. const parseOnes = (acc: ParserState, token: string): ParserState => {
  308. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  309. // Create next empty place
  310. return {
  311. ...acc,
  312. lastToken: token,
  313. mode: ParseGroupsMode.ONES_MODE,
  314. groups: [...acc.groups, [...EMPTY_PLACE]],
  315. };
  316. }
  317. return {
  318. ...acc,
  319. lastToken: token,
  320. mode: ParseGroupsMode.ONES_MODE,
  321. };
  322. };
  323. const parseTenPlusOnes = (acc: ParserState, token: string): ParserState => {
  324. const tenPlusOnes = TEN_PLUS_ONES.findIndex((t) => t === token);
  325. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  326. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  327. return {
  328. ...acc,
  329. lastToken: token,
  330. mode: ParseGroupsMode.TEN_PLUS_ONES_MODE,
  331. groups: [...acc.groups, [`01${tenPlusOnes}`, lastGroup[GROUP_PLACE_INDEX] - BigInt(1)]],
  332. };
  333. }
  334. lastGroup[GROUP_DIGITS_INDEX] = `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}1${tenPlusOnes}`;
  335. return {
  336. ...acc,
  337. lastToken: token,
  338. mode: ParseGroupsMode.TEN_PLUS_ONES_MODE,
  339. groups: [...acc.groups.slice(0, -1), lastGroup],
  340. };
  341. };
  342. const parseTens = (acc: ParserState, token: string): ParserState => {
  343. const tens = TENS.findIndex((t) => t === token);
  344. const lastGroup = acc.groups.at(-1) ?? [...EMPTY_PLACE];
  345. if (acc.mode === ParseGroupsMode.THOUSAND_MODE) {
  346. return {
  347. ...acc,
  348. lastToken: token,
  349. mode: ParseGroupsMode.TENS_MODE,
  350. groups: [...acc.groups, [`0${tens}0`, lastGroup[GROUP_PLACE_INDEX] - BigInt(1)]],
  351. };
  352. }
  353. lastGroup[GROUP_DIGITS_INDEX] = (
  354. `${lastGroup[GROUP_DIGITS_INDEX].slice(0, 1)}${tens}${lastGroup[GROUP_DIGITS_INDEX].slice(2)}`
  355. );
  356. return {
  357. ...acc,
  358. lastToken: token,
  359. mode: ParseGroupsMode.TENS_MODE,
  360. groups: [...acc.groups.slice(0, -1), lastGroup],
  361. };
  362. };
  363. /**
  364. * Parses groups from a string.
  365. * @param tokens - The string to parse groups from.
  366. * @see {NumberNameSystem.stringifyGroups}
  367. * @returns Group[] The parsed groups.
  368. */
  369. export const parseGroups = (tokens: string[]) => {
  370. // We add a final token which is an empty string to parse whatever the last non-empty token is.
  371. const tokensToParse = [...tokens, FINAL_TOKEN];
  372. const { groups } = tokensToParse.reduce<ParserState>(
  373. (acc, token) => {
  374. if (token === THOUSAND || token.endsWith(ILLION_SUFFIX)) {
  375. return parseThousand(acc, token);
  376. }
  377. if (token === HUNDRED && acc.mode === ParseGroupsMode.ONES_MODE) {
  378. return parseHundred(acc);
  379. }
  380. if (token === FINAL_TOKEN) {
  381. return parseFinal(acc);
  382. }
  383. if (ONES.includes(token as OnesName)) {
  384. return parseOnes(acc, token);
  385. }
  386. if (TEN_PLUS_ONES.includes(token as TenPlusOnesName)) {
  387. return parseTenPlusOnes(acc, token);
  388. }
  389. if (TENS.includes(token as TensName)) {
  390. return parseTens(acc, token);
  391. }
  392. return {
  393. ...acc,
  394. lastToken: token,
  395. };
  396. },
  397. {
  398. lastToken: undefined,
  399. groups: [],
  400. mode: ParseGroupsMode.INITIAL,
  401. },
  402. );
  403. return groups;
  404. };
  405. /**
  406. * Combines groups into a string.
  407. * @param groups - The groups to combine.
  408. * @see {NumberNameSystem.splitIntoGroups}
  409. * @returns string The combined groups in exponential form.
  410. */
  411. export const combineGroups = (groups: Group[]) => {
  412. const places = groups.map((g) => g[GROUP_PLACE_INDEX]);
  413. if (places.length < 1) {
  414. return '';
  415. }
  416. const maxPlace = bigIntMax(...places) as bigint;
  417. const minPlace = bigIntMin(...places) as bigint;
  418. const firstGroup = groups.find((g) => g[GROUP_PLACE_INDEX] === maxPlace) ?? [...EMPTY_PLACE];
  419. const firstGroupPlace = firstGroup[GROUP_PLACE_INDEX];
  420. const groupsSorted = [];
  421. for (let i = maxPlace; i >= minPlace; i = BigInt(i) - BigInt(1)) {
  422. const thisGroup = groups.find((g) => g[GROUP_PLACE_INDEX] === i) ?? [EMPTY_GROUP_DIGITS, i];
  423. groupsSorted.push(thisGroup);
  424. }
  425. const digits = groupsSorted.reduce(
  426. (previousDigits, thisGroup) => {
  427. const [groupDigits] = thisGroup;
  428. return `${previousDigits}${groupDigits}`;
  429. },
  430. '',
  431. ).replace(/^0+/, '') || '0';
  432. const firstGroupDigits = firstGroup[0];
  433. const firstGroupDigitsWithoutZeroes = firstGroupDigits.replace(/^0+/, '');
  434. const exponentExtra = firstGroupDigits.length - firstGroupDigitsWithoutZeroes.length;
  435. const exponentValue = BigInt(
  436. (BigInt(firstGroupPlace) * BigInt(3)) + (BigInt(2) - BigInt(exponentExtra)),
  437. );
  438. const isExponentNegative = exponentValue < 0;
  439. const exponentValueAbs = isExponentNegative ? -exponentValue : exponentValue;
  440. const exponentSign = isExponentNegative ? NEGATIVE_SYMBOL : POSITIVE_SYMBOL;
  441. const exponent = `${exponentSign}${exponentValueAbs}`;
  442. const significandInteger = digits.slice(0, 1);
  443. const significandFraction = digits.slice(1).replace(/0+$/, '');
  444. if (significandFraction.length > 0) {
  445. return `${significandInteger}${DECIMAL_POINT}${significandFraction}${EXPONENT_DELIMITER}${exponent}`;
  446. }
  447. return `${significandInteger}${EXPONENT_DELIMITER}${exponent}`;
  448. };