import {
    extractLines,
    isAminoAcidSequence, isDnaSequence, isFasta, isRnaSequence, isSmilesLine, isTargetSequence,
} from 'BMapsSrc/util/mol_format_utils';
import _ from 'lodash';

// Constants

export const foldingPrograms = [
    {
        foldingProgramId: 'Boltz-1',
        label: 'Boltz-1',
        extractFoldingArgs: extractFoldingArgsBoltz,
        useLigands: true,
        allowProteinPrep: true,
        useCPU: false,
        appSpecificLabel: 'Use Exact FASTA / YAML for Boltz-1',
        documentationUrl: 'https://github.com/jwohlwend/boltz',
    },
    {
        foldingProgramId: 'Chai-1',
        label: 'Chai-1',
        extractFoldingArgs: extractFoldingArgsChai,
        useLigands: true,
        useCPU: false,
        appSpecificLabel: 'Use Exact FASTA for Chai-1',
        documentationUrl: 'https://github.com/chaidiscovery/chai-lab',
    },
    {
        foldingProgramId: 'RosettaFold',
        label: 'RoseTTAFold All-Atom',
        disabled: true,
    },
    {
        foldingProgramId: 'Alphafold',
        label: 'AlphaFold 3',
        disabled: true,
    },
];

export const foldingSequenceTypes = {
    protein: {
        id: 'protein',
        label: 'Protein Sequence',
        unit: 'Protein Sequence',
        unitPlural: 'Protein Sequences',
        title: 'Add each input line as a protein amino acid sequence',
        placeholder: 'protein sequences',
        filetypes: ['fasta'],
    },
    dna: {
        id: 'dna',
        label: 'DNA Sequence',
        unit: 'DNA Sequence',
        unitPlural: 'DNA Sequences',
        title: 'Add each input line as a DNA sequence',
        placeholder: 'DNA sequences',
        filetypes: ['fasta'],
    },
    rna: {
        id: 'rna',
        label: 'RNA Sequence',
        unit: 'RNA Sequence',
        unitPlural: 'RNA Sequences',
        title: 'Add each input line as an RNA sequence',
        placeholder: 'RNA sequences',
        filetypes: ['fasta'],
    },
    ligand: {
        id: 'ligand',
        label: 'Ligand (Smiles)',
        unit: 'Ligand',
        unitPlural: 'Ligands',
        title: 'Add each input line as a ligand by smiles string',
        placeholder: 'ligand smiles',
        filetypes: ['.smi'],
    },
    // Disable CCD codes for managed input.
    // For Boltz, can use the app-specific FASTA / yaml
    // ccd: {
    //     id: 'ccd',
    //     label: 'Ligand (CCD code)',
    //     title: 'Add ligand(s) by CCD code to folding input',
    // },
    mixed: {
        id: 'mixed',
        label: 'Mixed (Detect Types from Text)',
        unit: 'Component',
        unitPlural: 'Components',
        title: 'Add detected data types to folding input',
        placeholder: 'sequences and/or ligand smiles',
        filetypes: ['fasta', '.smi'],
    },
};

const exampleData = [
    {
        name: 'Boltz-1 Sample',
        foldingProgramId: 'Boltz-1',
        sequences: [
            {
                type: 'protein',
                data: 'GMTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEK',
                count: 1,
            },
            {
                type: 'protein',
                data: 'VSSVPTKLEVVAATPTSLLISWDASSSSVSYYRITYGETGGNSPVQEFTVPGYYSTATISGLKPGVDYTITVYAVWQGVWRYVSPISINYRT',
                count: 1,
            },
            {
                type: 'ligand',
                data: 'c1nc2c(n1[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@](=O)(O)OP(=O)(O)O)O)O)N=C(NC2=O)N',
                count: 1,
            },
            {
                type: 'ligand',
                data: 'CC(=O)N1[C@@H]2CC[C@H]1CN(C2)c3c4cnc(c(c4nc(n3)OC[C@@]56CCCN5C[C@@H](C6)F)F)c7cc(cc8c7c(c(cc8)F)C#C)O',
                count: 1,
            },
        ],
    },
    {
        name: 'Chai-1 Sample',
        foldingProgramId: 'Chai-1',
        sequences: [
            {
                type: 'protein',
                data: 'AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP',
                count: 1,
            },
            {
                type: 'protein',
                data: 'AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM',
                count: 1,
            },
            {
                type: 'protein',
                data: 'GAAL',
                count: 1,
            },

            {
                type: 'ligand',
                data: 'CCCCCCCCCCCCCC(=O)O',
                count: 1,
            },
        ],
    },
];

export const getExampleData = () => _.cloneDeep(exampleData);

// Processing folding input

/**
 * Extract sequences from a text input, with reasonable cleverness to account for user input.
 *
 * 1. General case: sequences and ligands are extracted one per line
 * 2. If there are FASTA header markers >, enter FASTA mode, joining sequence lines between headers
 * 3. Leave FASTA mode if:
 *    - The next line would cause the current sequence to not be a valid seq type (eg smiles)
 *    - A smiles line is followed by more data instead of a FASTA header (smiles can't be multiline)
 *
 * See unit test in mol_format_utils.test.js
 * @param {string} input
 * @returns {string[]}
 */
export function extractSequences(input) {
    const lines = extractLines(input, /^;/); // Ignore FASTA comments starting with ;
    const sequences = [];
    let inFasta = false;
    let currentSequence = '';
    function grabCurrentSequence() {
        if (currentSequence) {
            sequences.push(currentSequence);
            currentSequence = '';
        }
    }
    for (const line of lines) {
        if (line.startsWith('>')) {
            inFasta = true;
            grabCurrentSequence();
        } else if (inFasta) {
            if (!currentSequence) {
                currentSequence = line;
                continue;
            }
            // Multi-line entries are only allowed for sequence types, not ligands
            if (isTargetSequence(currentSequence) && isTargetSequence(currentSequence + line)) {
                currentSequence += line;
            } else {
                // Leave FASTA mode: either multiline ligand or incoming line breaks currentSequence
                inFasta = false;
                grabCurrentSequence();
                sequences.push(line);
            }
        } else {
            // Just adding line-by-line
            sequences.push(line);
        }
    }
    grabCurrentSequence();
    return sequences;
}

export function guessSequenceTypeMulti(input) {
    const sequences = extractSequences(input);
    const result = sequences.reduce((acc, line) => {
        const type = guessSequenceTypeOne(line);
        if (acc == null) return type;
        if (acc === type) return acc;
        // If we have a mix of types, return nothing
        return 'mixed';
    }, null);
    return result;
}

export function guessSequenceTypeOne(input) {
    const str = input?.trim() || '';
    switch (true) {
        // Disable CCD codes for managed input.
        // For Boltz, can use the app-specific FASTA / yaml
        // case isCcdCode(str): return 'ccd';
        case isDnaSequence(str): return 'dna';
        case isRnaSequence(str): return 'rna';
        case isAminoAcidSequence(str): return 'protein';
        case isSmilesLine(str): return 'ligand';
        default: return '';
    }
}

export function checkSequenceType(sequence, type) {
    switch (type) {
        case 'protein': return isAminoAcidSequence(sequence);
        case 'dna': return isDnaSequence(sequence);
        case 'rna': return isRnaSequence(sequence);
        case 'ligand': return isSmilesLine(sequence);
        default: return false;
    }
}

// Preparing data for folding programs

export function extractFoldingArgsBoltz(formValues) {
    const foldingProgramId = formValues.foldingInfo.foldingProgramId;
    const useCPU = formValues.foldingInfo.useCPU;

    const boltzArgs = { use_cpu: useCPU };
    if (formValues.useApplicationSpecific && formValues.applicationSpecificInput !== '') {
        const data = formValues.applicationSpecificInput;
        // Boltz can use FASTA or YAML formats
        if (isFasta(data)) {
            boltzArgs.fasta = data;
        } else { // assume valid yaml if not FASTA
            boltzArgs.yaml = data;
        }
    } else {
        boltzArgs.json = {
            sequences: fullSequenceList(formValues.sequences)
                .map((seq, seqI) => formatBoltzItemJson(seq, seqI)),
        };
    }
    return {
        foldingProgramId,
        boltz_args: boltzArgs,
        use_protein_prep: formValues.useProteinPrep,
    };
}

/**
 * @param {{type: string, data: string}} sequence
 */
export function formatBoltzItemJson(sequence, index) {
    switch (sequence.type) {
        case 'protein':
            return { protein: { id: `P${index}`, sequence: sequence.data } };
        case 'ligand':
            return { ligand: { id: `L${index}`, smiles: sequence.data } };
        case 'ccd':
            return { ligand: { id: `L${index}`, ccd: sequence.data } };
        case 'rna':
            return { rna: { id: `R${index}`, sequence: sequence.data } };
        case 'dna':
            return { dna: { id: `D${index}`, sequence: sequence.data } };
        default:
            return sequence.data;
    }
}

export function formatBoltzItemFasta(sequence, index) { // eslint-disable-line no-unused-vars
    switch (sequence.type) {
        case 'protein':
            return `>P${index}|protein\n${sequence.data}`;
        case 'ligand':
            return `>L${index}|smiles\n${sequence.data}`;
        case 'ccd':
            return `>L${index}|ccd\n${sequence.data}`;
        case 'rna':
            return `>R${index}|rna\n${sequence.data}`;
        case 'dna':
            return `>D${index}|dna\n${sequence.data}`;
        default:
            return sequence.data;
    }
}

export function extractFoldingArgsChai(formValues) {
    const foldingProgramId = formValues.foldingInfo.foldingProgramId;
    const useCPU = formValues.foldingInfo.useCPU;

    const chaiArgs = { use_cpu: useCPU };

    if (formValues.useApplicationSpecific && formValues.applicationSpecificInput !== '') {
        chaiArgs.fasta = formValues.applicationSpecificInput;
    } else {
        chaiArgs.fasta = fullSequenceList(formValues.sequences)
            .map((seq, seqI) => formatChaiItemFasta(seq, seqI)).join('\n');
    }

    return {
        foldingProgramId,
        chai_args: chaiArgs,
        use_protein_prep: formValues.useProteinPrep,
    };
}

export function formatChaiItemFasta(sequence, index) {
    switch (sequence.type) {
        case 'protein':
            return `>protein|name=P${index}\n${sequence.data}`;
        case 'ligand':
            return `>ligand|name=L${index}\n${sequence.data}`;
        case 'ccd':
            return `>glycan|name=C${index}\n${sequence.data}`;
        case 'rna':
            return `>rna|name=R${index}\n${sequence.data}`;
        case 'dna':
            return `>dna|name=D${index}\n${sequence.data}`;
        default:
            return sequence.data;
    }
}

/**
 * @param {{
 *  type: string,
 *  data: string,
 *  count: number
 * }} sequences
 */
export function fullSequenceList(sequences) {
    return sequences.flatMap((seq) => Array(seq.count).fill(seq));
}
