import { String } from "runtypes";

import { PYTHON_ID_CONTINUE, PYTHON_ID_START } from "./generated";

// https://docs.python.org/3.10/reference/lexical_analysis.html#identifiers
// to give slightly better error messages, we first check if the name matches
// `id_continue+`, then check if the first character matches `id_start`
const ID_CHARS_REGEX = new RegExp(`^${PYTHON_ID_CONTINUE}+$`, "u");
const BAD_START_CLASS = PYTHON_ID_START.replace("[", "[^");
const BAD_START_REGEX = new RegExp(`^(${BAD_START_CLASS})`, "u");

// https://docs.python.org/3.10/reference/lexical_analysis.html#keywords
// or `keyword.kwlist` in Python
const KEYWORDS = new Set([
  "False",
  "None",
  "True",
  "and",
  "as",
  "assert",
  "async",
  "await",
  "break",
  "class",
  "continue",
  "def",
  "del",
  "elif",
  "else",
  "except",
  "finally",
  "for",
  "from",
  "global",
  "if",
  "import",
  "in",
  "is",
  "lambda",
  "nonlocal",
  "not",
  "or",
  "pass",
  "raise",
  "return",
  "try",
  "while",
  "with",
  "yield",
]);

export const PythonIdentifier = String.withBrand(
  "PythonIdentifier",
).withConstraint(
  (name) => {
    // https://docs.python.org/3.10/reference/lexical_analysis.html#identifiers
    // > All identifiers are converted into the normal form NFKC while parsing;
    // > comparison of identifiers is based on NFKC.
    const normalized = name.normalize("NFKC");

    if (KEYWORDS.has(normalized)) {
      return `'${name}' is a reserved Python keyword`;
    }

    if (!ID_CHARS_REGEX.test(normalized)) {
      return `'${name}' contains invalid characters for a Python identifier`;
    }

    const badStart = BAD_START_REGEX.exec(normalized);
    if (badStart) {
      return `'${name}' cannot start with character ${badStart[1]}`;
    }

    return true;
  },
  { name: "PythonIdentifier" },
);
