/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.alignment.errorcheck;

import edu.msu.cme.rdp.alignment.AlignmentMode;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAligner;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAlignment;
import edu.msu.cme.rdp.alignment.pairwise.ScoringMatrix;
import edu.msu.cme.rdp.alignment.pairwise.rna.DistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.IdentityDistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.OverlapCheckFailedException;
import edu.msu.cme.rdp.readseq.SequenceType;
import edu.msu.cme.rdp.readseq.readers.Sequence;
import edu.msu.cme.rdp.readseq.readers.SequenceReader;
import edu.msu.cme.rdp.readseq.utils.SeqUtils;
import edu.msu.cme.rdp.readseq.utils.kmermatch.KmerMatchCore;
import edu.msu.cme.rdp.readseq.utils.kmermatch.NuclSeqMatch;
import edu.msu.cme.rdp.readseq.utils.kmermatch.ProteinSeqMatch;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;

public class RmPartialSeqs {
    private static final char gapChar = '-';
    private ScoringMatrix scoringMatrix;
    private SequenceType seqType;
    private AlignmentMode mode = AlignmentMode.overlap;
    private static final Options options = new Options();
    private static DistanceModel dist = new IdentityDistanceModel();
    private HashMap<String, Sequence> refSeqMap = new HashMap();
    private ArrayList<Sequence> seqList = new ArrayList();
    private KmerMatchCore sabCalculator = null;
    private int knn = 20;
    private int min_begin_gaps = 50;
    private int min_end_gaps = 50;

    public RmPartialSeqs(String trainseqFile, String testFile, AlignmentMode mode, int knn, int min_gaps) throws IOException, OverlapCheckFailedException {
        Sequence seq;
        this.mode = mode;
        this.knn = knn;
        this.min_begin_gaps = min_gaps;
        this.min_end_gaps = min_gaps;
        SequenceReader parser = new SequenceReader(new File(trainseqFile));
        SequenceType seqType = null;
        while ((seq = parser.readNextSequence()) != null) {
            if (seqType == null) {
                seqType = SeqUtils.guessSequenceType((Sequence)seq);
            }
            this.refSeqMap.put(seq.getSeqName(), seq);
        }
        parser.close();
        parser = new SequenceReader(new File(testFile));
        while ((seq = parser.readNextSequence()) != null) {
            this.seqList.add(seq);
        }
        parser.close();
        if (seqType == SequenceType.Nucleotide) {
            this.scoringMatrix = ScoringMatrix.getDefaultNuclMatrix();
            this.sabCalculator = new NuclSeqMatch(trainseqFile);
        } else {
            this.scoringMatrix = ScoringMatrix.getDefaultProteinMatrix();
            this.sabCalculator = new ProteinSeqMatch(trainseqFile);
        }
    }

    public HashSet<Sequence> checkPartial(PrintStream seqOutstream, PrintStream alignOutstream) throws OverlapCheckFailedException, IOException {
        HashSet<Sequence> partialSeqs = new HashSet<Sequence>();
        for (int i = 0; i < this.seqList.size(); ++i) {
            Sequence seqx = this.seqList.get(i);
            PairwiseAlignment bestResult = null;
            int bestScore = Integer.MIN_VALUE;
            Sequence bestSeqy = null;
            ArrayList matchResults = this.sabCalculator.findTopKMatch(seqx, this.knn);
            for (KmerMatchCore.BestMatch match : matchResults) {
                Sequence seqy = this.refSeqMap.get(match.getBestMatch().getSeqName());
                PairwiseAlignment result = PairwiseAligner.align(seqx.getSeqString().replaceAll("U", "T"), seqy.getSeqString().replaceAll("U", "T"), this.scoringMatrix, this.mode);
                if (bestResult != null && result.getScore() < bestScore) continue;
                bestResult = result;
                bestScore = result.getScore();
                bestSeqy = seqy;
            }
            double distance = dist.getDistance(bestResult.getAlignedSeqj().getBytes(), bestResult.getAlignedSeqi().getBytes(), 0);
            int beginGaps = this.getBeginGapLength(bestResult.getAlignedSeqi());
            int endGaps = this.getEndGapLength(bestResult.getAlignedSeqi());
            if (beginGaps >= this.min_begin_gaps || endGaps >= this.min_end_gaps) {
                partialSeqs.add(seqx);
            } else {
                seqOutstream.println(">" + seqx.getSeqName() + "\t" + seqx.getDesc() + "\n" + seqx.getSeqString());
            }
            if (alignOutstream == null) continue;
            alignOutstream.println(">\t" + seqx.getSeqName() + "\t" + bestSeqy.getSeqName() + "\t" + String.format("%.3f", distance) + "\tmissingBegin=" + (beginGaps >= this.min_begin_gaps) + "\tmissingEnd=" + (endGaps >= this.min_end_gaps) + "\tbeginGaps=" + beginGaps + "\tendGaps=" + endGaps);
            alignOutstream.print(bestResult.getAlignedSeqi() + "\n");
            alignOutstream.print(bestResult.getAlignedSeqj() + "\n");
        }
        seqOutstream.close();
        if (alignOutstream != null) {
            alignOutstream.close();
        }
        return partialSeqs;
    }

    int getBeginGapLength(String s) {
        int length = 0;
        for (int i = 0; i < s.length(); ++i) {
            if (s.charAt(i) == '-') {
                ++length;
                continue;
            }
            return length;
        }
        return length;
    }

    int getEndGapLength(String s) {
        int length = 0;
        for (int i = s.length() - 1; i > 0; --i) {
            if (s.charAt(i) == '-') {
                ++length;
                continue;
            }
            return length;
        }
        return length;
    }

    public static void main(String[] args) throws Exception {
        String trainseqFile = null;
        String queryFile = null;
        PrintStream seqOutStream = null;
        PrintStream alignOutStream = null;
        AlignmentMode mode = AlignmentMode.overlap;
        int k = 10;
        int min_gaps = 50;
        try {
            CommandLine line = new PosixParser().parse(options, args);
            if (line.hasOption("alignment-mode")) {
                String m = line.getOptionValue("alignment-mode").toLowerCase();
                mode = AlignmentMode.valueOf(m);
            }
            if (line.hasOption("min_gaps")) {
                min_gaps = Integer.parseInt(line.getOptionValue("min_gaps"));
            }
            if (line.hasOption("knn")) {
                k = Integer.parseInt(line.getOptionValue("knn"));
            }
            if (line.hasOption("alignment-out")) {
                alignOutStream = new PrintStream(new File(line.getOptionValue("alignment-out")));
            }
            if ((args = line.getArgs()).length != 3) {
                throw new Exception("wrong number of arguments");
            }
            trainseqFile = args[0];
            queryFile = args[1];
            seqOutStream = new PrintStream(new File(args[2]));
        }
        catch (Exception e) {
            System.err.println("Error: " + e.getMessage());
            new HelpFormatter().printHelp(80, " [options] fulllengthSeqFile queryFile passedSeqOutFile\n  sequences can be either protein or nucleotide", "", options, "");
            return;
        }
        RmPartialSeqs theObj = new RmPartialSeqs(trainseqFile, queryFile, mode, k, min_gaps);
        theObj.checkPartial(seqOutStream, alignOutStream);
    }

    static {
        options.addOption("a", "alignment-mode", true, "Alignment mode: overlap, glocal, local or global. default = overlap");
        options.addOption("g", "min_gaps", true, "The minimum number of continuous gaps in the beginning or end of the query alignment. If above the cutoff, the query is marked as partial. default = 50");
        options.addOption("k", "knn", true, "The top k closest targets using a heuristic method. (default = 20)");
        options.addOption("o", "alignment-out", true, "The output file containing the pairwise alignment");
    }
}

