import { fromMarkdown } from 'mdast-util-from-markdown'
import { NodeHtmlMarkdown } from 'node-html-markdown'
import { DraftFragment } from './types'
import { toMarkdown } from 'mdast-util-to-markdown'
import { micromark } from 'micromark'
import { encode } from 'gpt-tokenizer'

export const toHTML = (md: string) => {
  const html = micromark(md)
  return html
}

const isValidContent = (content?: string) => {
  if(!content) return false

  const hasLength = content.length > 0
  // not &nbsp;
  const isNotNbsp = content !== '&nbsp;'
  return hasLength && isNotNbsp
}

const takeValue = (element: Elem): string => {
  const content = element.children?.map((c: Elem) => c.value).join('')
  if(!isValidContent(content)) return ''

  return content!
}

type Elem = {
  type: string
  value?: string
  children?: Elem[] | null
  depth?: number
}

const  filterElement = (element: Elem): Elem | null => {
  if(element.type === 'link') {
    const content = takeValue(element)
    if(!content) return null
  }

  // Filter the children (if any).
  if (element.children) {
    const filteredChildren = element.children
      .map((c: Elem) => filterElement(c))
      .filter((c: Elem | null): c is Elem => c !== null)
    element.children = filteredChildren.length > 0 ? filteredChildren : null
  }

  return element
}

const isHeading = (element: Elem, depth: number): boolean => {
  if(element.type !== 'heading') return false

  return !!(element.depth && element.depth <= depth)
}


const MAX_TOKENS = 2000
// it splits by words in ASCII, characters in Unicode
const countWords = (text: string): number => {
  const decoded = encode(text)
  return decoded.length
}

const processElements = (elements: Elem[], depth: number): DraftFragment[] => {
  const fragments: DraftFragment[] = [{ input: '', output: '', isChecked: true } as DraftFragment]
  for (let i = 0; i < elements.length; i++) {
    const element = elements[i];
    //if(element.type === 'heading') {
    if(isHeading(element, depth)) {
      const value = takeValue(element)
      fragments.push({ input: value, output: '', isChecked: true })
    }else{
      const filtered = filterElement(element)
      // @ts-ignore it works
      const value = toMarkdown(element, { bullet: '-' })
      const lastSection = fragments[fragments.length - 1]
      if(lastSection) {
        lastSection.output += '\n' + value
      }
    }
  }

  // don't filter at this point.
  return fragments
}

const breakdownFragment = (fragment: DraftFragment, depth: number): DraftFragment[] => {
  const words = countWords(fragment.output);
  // Base case: If the fragment is short enough, return it as is.
  if(words < MAX_TOKENS) {
    return [fragment];
  }

  // If the fragment is too long, break it down further
  const subFragmentsJson = fromMarkdown(`# ${fragment.input}\n${fragment.output}`)
  const subFragmentsElements = subFragmentsJson.children;
  const subFragments = processElements(subFragmentsElements, depth)

  // If the subFragments are still too long, increase depth and breakdown further
  const result: DraftFragment[] = [];
  for(let subFragment of subFragments) {
    if(countWords(subFragment.output) >= MAX_TOKENS && depth < 6) {
      result.push(...breakdownFragment(subFragment, depth + 1))
    } else {
      result.push(subFragment)
    }
  }

  return result;
}

export const mdToFragments = (md: string): DraftFragment[] => {
  const json = fromMarkdown(md)
  const elements = json.children
  // breakdown up to ## in normal cases
  const fragments = processElements(elements, 2)

  // Post-processing to break down long sections
  const processedFragments: DraftFragment[] = [];
  for(let fragment of fragments) {
    processedFragments.push(...breakdownFragment(fragment, 3))
  }

  return processedFragments.filter(f => f.output.length)
}

export const htmlToStructure = (html: string) => {
  //console.log('htmlToStructure')
  const md = NodeHtmlMarkdown.translate(html)
  return mdToFragments(md)
}

