import { uniq } from 'lodash-es'
import { AnswerWithRelations, DataRoomDocument } from 'silta-ai-backend'

interface RegexMatch {
    originalText: string
    documentId: string
    sectionDescription?: string
}

export interface Reference {
    index: number
    documentId: string
    document?: DataRoomDocument
    url?: string
    title: string
}

export interface ReferenceMatch extends RegexMatch {
    reference: Reference
}

/**
 * Takes an input like this, with references following a particular syntax:
 *
 * "Here is some fact with a reference [some-document-id, Section 5]."
 *
 * The document id can refer to a DataRoomDocument or it can be an url.
 */
export function extractReferences(answer: AnswerWithRelations): {
    references: Reference[]
    matches: ReferenceMatch[]
} {
    if (!answer.content) {
        return {
            references: [],
            matches: [],
        }
    }

    const regexMatches = getRegexMatches(
        `${answer.content || ''}\n\n${answer.outcomeDetails}`
    )
    const uniqueDocumentIds = uniq(
        regexMatches.map((match) => match.documentId)
    )

    // Enrich document references
    const references: Reference[] = uniqueDocumentIds.map((documentId, i) => {
        const index = i + 1
        const sourceMaterial = answer.sourceMaterials.find(
            (material) =>
                material.documentId === documentId ||
                material.url === documentId
        )

        // DataRoomDocument reference
        if (sourceMaterial?.dataRoomDocument) {
            return {
                index,
                documentId,
                title: sourceMaterial.title,
                document: sourceMaterial.dataRoomDocument,
            }
        }

        // Web reference
        if (sourceMaterial?.url) {
            return {
                index,
                documentId,
                title: sourceMaterial.title,
                url: sourceMaterial.url,
            }
        }

        // Invalid reference
        return {
            index,
            documentId,
            title: 'Invalid reference',
        }
    })

    // Enrich matches
    const matches: ReferenceMatch[] = regexMatches.map((regexMatch) => ({
        documentId: regexMatch.documentId,
        originalText: regexMatch.originalText,
        sectionDescription: regexMatch.sectionDescription,
        reference: references.find(
            (reference) => regexMatch.documentId === reference.documentId
        )!,
    }))

    return {
        references,
        matches,
    }
}

function getRegexMatches(content: string): RegexMatch[] {
    const regexMatches: RegexMatch[] = []

    // matches [documentId, optional sectionDescriptiong]
    const referenceMatcherRegex = /\[([^,]+?)(?:,\s*(.*?))?\]/g

    let nextMatch = referenceMatcherRegex.exec(content)
    while (nextMatch !== null) {
        const [originalText, documentId, sectionDescription] = nextMatch

        if (!documentId) {
            console.log(
                `WARN: something's wrong, documentId was falsy. That shouldn't happen`
            )
            nextMatch = referenceMatcherRegex.exec(content)
            continue
        }

        regexMatches.push({
            originalText,
            documentId,
            sectionDescription,
        })

        nextMatch = referenceMatcherRegex.exec(content)
    }

    return regexMatches
}
