import { String } from "runtypes";

// https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Identifiers-1
// > Identifiers consist of a sequence of letters, digits, the period (‘.’) and
// > the underscore. They must not start with a digit or an underscore, or with
// > a period followed by a digit.
// Note: technically the definition of "letter" is locale-dependent, but we
// limit it to ASCII letters for simplicity.

const ID_CHARS_REGEX = /^[\w._]+$/;

// https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Reserved-words-1
const KEYWORDS = new Set([
  "if",
  "else",
  "repeat",
  "while",
  "function",
  "for",
  "in",
  "next",
  "break",
  "TRUE",
  "FALSE",
  "NULL",
  "Inf",
  "NaN",
  "NA",
  "NA_integer_",
  "NA_real_",
  "NA_complex_",
  "NA_character_",
]);
// from above link: `... ..1 ..2 etc.`
const RESERVED_REGEX = /^\.\.(\.|\d+)$/;

export const RIdentifier = String.withBrand("RIdentifier").withConstraint(
  (name) => {
    if (KEYWORDS.has(name)) {
      return `'${name}' is a reserved word in R`;
    } else if (!ID_CHARS_REGEX.test(name)) {
      return `R identifiers must consist of letters, digits, periods, and underscores.`;
    } else if (RESERVED_REGEX.test(name)) {
      return `'...', '..1', '..2', etc. are reserved words in R`;
    } else if (/^\d/.test(name)) {
      return `R identifiers must not start with a digit`;
    } else if (name.startsWith("_")) {
      return `R identifiers must not start with an underscore`;
    } else if (/^\.\d/.test(name)) {
      return `R identifiers must not start with a period followed by a digit`;
    } else {
      return true;
    }
  },
  { name: "RIdentifier" },
);
