/*
 * Functions to Mailchimp Form Validation
 */
import cheerio from 'cheerio'

/*
 * The isASingletonTag function checks if the provided tag is a singleton tag.
 */
const isASingletonTag = (tag: string): boolean => {
  const singletonTags = [
    '<img',
    '<input',
    '<link',
    '<meta',
    '<area',
    '<base',
    '<col',
    '<command',
    '<embed',
    '<keygen',
    '<param',
    '<source',
    '<track',
    '<wbr',
  ]
  if (tag.endsWith('/>')) {
    return true
  }

  const tagParts = tag.split(' ')
  if (tagParts.length === 0) {
    return false
  }
  const tagName = tagParts[0]
  return singletonTags.includes(tagName)
}

/*
 * The isHtmlStructureComplete function uses a regular expression to find all HTML tags in the provided HTML.
 * It then iterates through the tags, maintaining a stack to track the opening tags.
 * When it encounters a closing tag, it pops the top of the stack and checks if
 * it matches the last encountered opening tag. If any mismatches are found, or if there are tags left in the stack
 * after the iteration, the function returns false, indicating that the structure is incomplete.
 * Otherwise, it returns true, indicating that all open tags have corresponding closing tags,
 * and no symbols are missing.
 */

const isHtmlStructureComplete = (html: string): boolean => {
  const tagStack: string[] = []

  const regexTag = /<\/?[\w-]+( [^>]*?)?>/g
  const matches = html.match(regexTag)

  if (!matches) {
    // If there are no tags found, the structure is incomplete
    return false
  }

  for (const tag of matches) {
    if (tag.startsWith('</')) {
      // If it's a closing tag, pop the top of the stack
      const openingTag = tagStack.pop()
      if (!openingTag || !tag.startsWith(`</${openingTag}`)) {
        // If the closing tag doesn't match the last opening tag, the structure is incomplete
        return false
      }
    } else if (isASingletonTag(tag)) {
      // Self-closing tags don't need to be added to the stack
      continue
    } else {
      // If it's an opening tag, push it to the stack
      const tagName = tag.match(/<\/?([\w-]+)/)
      if (tagName && tagName.length >= 1) {
        tagStack.push(tagName[1])
      }
    }
  }

  // If there are still tags in the stack, the structure is incomplete
  return tagStack.length === 0
}

/*
 * Performs various checks to ensure that the required elements and attributes are present.
 */
const isHtmlStructureCorrect = (html: string): boolean => {
  const $ = cheerio.load(html)
  let isValid = true

  // Check if the form element exists
  const formElement = $('form#mc-embedded-subscribe-form')
  if (!formElement.length) {
    isValid = false
  }

  // Check if the action attribute is present and has a value
  const actionAttribute = formElement.attr('action')
  if (!actionAttribute || actionAttribute.trim() === '') {
    isValid = false
  }

  // Check if the input element with type="email" exists
  if (!$('input[type="email"]').length) {
    isValid = false
  }

  // Check if the input element with type "submit" exists
  if (!$('input[type="submit"]').length) {
    isValid = false
  }

  return isValid
}

/*
 * Extract attribute action from form element
 */
const extractFormAction = (html: string): string | null => {
  const formRegex = /<form[^>]*action="([^"]+)"[^>]*>/
  const matches = html.match(formRegex)

  if (matches && matches.length === 2) {
    return matches[1].trim()
  }

  return null
}

/*
 * Performs all the validation checks on the HTML
 */

export const validateHTML = (html: string): boolean => {
  /* Check HTML structure is complete */
  if (!isHtmlStructureComplete(html)) {
    return false
  }

  /* Check HTML structure is correct */
  if (!isHtmlStructureCorrect(html)) {
    return false
  }

  /* Check Mailchimp list-manage domain is in form action attribute */
  const formAction = extractFormAction(html)

  /* Parse the URL because cotains HTML entities as &amp;  */
  const parser = new DOMParser()
  const decodedUrl =
    formAction &&
    parser.parseFromString(formAction, 'text/html').body.textContent

  const url = decodedUrl && new URL(decodedUrl)

  const domainParts = url && url.hostname.split('.')
  const domain = domainParts && domainParts.slice(-2).join('.')

  if (!domain || !domain.includes('list-manage.com')) {
    return false
  }

  return true
}
