import { RewriteSection } from "Article/ArticleRewriteApi";

export type SelectionIndices = [number, number];

const HEADINGS_TAG_PATTERN = /^H[0-6]$/;
export const STX = "\u0002";
export const ETX = "\u0003";
// ESLint reasons that a regex containing an invisible character
// is probably a mistake, but this is one of those rare situations where
// it is not a mistake.
// eslint-disable-next-line no-control-regex
const CONTROL_CHARS_PATTERN = /[\u0002\u0003]/g;

/**
 * Parse a given article into a list of sections, including selection
 * indicies for any sections with selection markers (STX, ETX).
 */
export function parseArticleSections(
  articleHtml: string
): readonly RewriteSection[] {
  // Parse the article, build up the section title and text
  const parser = new DOMParser();
  const dom = parser.parseFromString(articleHtml, "text/html");
  const documentBody = dom.getElementsByTagName("BODY")[0];

  if (!documentBody) {
    return [];
  }

  const { childNodes } = documentBody;

  // Stores all sections for the article
  const sections: RewriteSection[] = [];

  // As we loop through the elements in the article,
  // we use these to track the current section
  let sectionTitle = "";
  let sectionBody = "";

  // Function to add current section to sections array
  const addSection = () => {
    sectionBody = normalizeTextSpacing(sectionBody);

    const selection: SelectionIndices | undefined =
      sectionBody.includes(STX) && sectionBody.includes(ETX)
        ? [
            sectionBody.indexOf(STX),

            // -1 because we are removing the STX char below
            sectionBody.indexOf(ETX) - 1,
          ]
        : undefined;

    sections.push({
      // Do not include an empty section title
      title: sectionTitle || undefined,
      body: removeSelectionMarkers(sectionBody),
      selection,
    });
  };

  for (let i = 0; i < childNodes.length; i++) {
    const child = childNodes[i];

    if (!child) {
      continue;
    }
    const { textContent } = child;

    if (
      child instanceof HTMLElement &&
      HEADINGS_TAG_PATTERN.test(child.tagName)
    ) {
      // We have encountered a new section,
      // So if a previous section exists, push it onto the sections array
      // so push the previous section, if it exists, onto the sections array.
      if (sectionTitle) {
        addSection();
      }

      // Now reset the current section
      sectionTitle = textContent || "";
      sectionBody = "";
    } else if (textContent) {
      sectionBody += elementToText(child);
    }
  }

  // If there is remaining text, add it to the sections array
  if (sectionBody) {
    addSection();
  }

  return sections;
}

/**
 * Trims all leading and trailing whitespace from string.
 * Within the string, combinations of non-newline whitespace with newline
 * are replaced with just newlines and strings of more than 2 newlines
 * are collapsed to 2 newlines.
 */
function normalizeTextSpacing(text: string) {
  return (
    text
      // Remove leading and trailing whitespace
      .trim()

      // This changes all combinations of non-newline whitespace with newline
      // into just a newline. For example:
      // "\r\n" -> "\n"
      // " \n" -> "\n"
      // "\t\n" -> "\n"
      // However "\n\n" would remain unchanged.
      .replace(/[^\S\n]*\n[^\S\n]*/g, "\n")

      // Collapses any run of more than 2 newline characters into just 2 newlines
      .replace(/\n{2,}/, "\n\n")
  );
}

export function removeSelectionMarkers(text: string) {
  return text.replace(CONTROL_CHARS_PATTERN, "");
}

function elementToText(node: Node): string {
  let text = "";

  if (node instanceof Text) {
    text += node.textContent;
  } else if (node instanceof HTMLElement) {
    if (node.tagName === "P") {
      text += `${br2nl(node)}\n\n`;
    } else if (node.tagName === "UL") {
      eachChildNode(node, (child) => {
        if (child instanceof HTMLElement && child.tagName === "LI") {
          text += `- ${br2nl(child)}\n`;
        } else {
          text += child.textContent;
        }
      });
    } else if (node.tagName === "OL") {
      let num = 1;
      eachChildNode(node, (child) => {
        if (child instanceof HTMLElement && child.tagName === "LI") {
          text += `${num}. ${br2nl(child)}\n`;
          num++;
        } else {
          text += child.textContent;
        }
      });
    } else {
      text += br2nl(node);
    }
  }

  return text;
}

function br2nl(element: Element) {
  if (element.childNodes.length < 1) {
    return element.textContent;
  }

  let text = "";
  eachChildNode(element, (child) => {
    if (child instanceof HTMLElement && child.tagName === "BR") {
      text += "\n";
    } else {
      text += child.textContent;
    }
  });
  return text;
}

function eachChildNode(
  element: Node,
  func: (child: Node, index: number) => void
) {
  for (let i = 0; i < element.childNodes.length; i++) {
    const child = element.childNodes[i];
    if (child) {
      func(child, i);
    }
  }
}

export function formatArticleAsText(articleHtml: string): string {
  const parser = new DOMParser();
  const dom = parser.parseFromString(articleHtml, "text/html");
  const documentBody = dom.getElementsByTagName("BODY")[0];

  if (!documentBody) {
    return "";
  }

  const { childNodes } = documentBody;
  let text = "";

  for (let i = 0; i < childNodes.length; i++) {
    const child = childNodes[i];

    if (!child) {
      continue;
    }
    const { textContent } = child;

    if (
      child instanceof HTMLElement &&
      HEADINGS_TAG_PATTERN.test(child.tagName)
    ) {
      const breaks = /^H[1-3]$/.test(child.tagName) ? "\n\n" : "\n";
      text += `${(textContent || "").trim()}${breaks}`;
    } else if (textContent && !/^\s+$/.test(textContent)) {
      text += elementToText(child).trimStart();
    }
  }

  return text;
}
