package org.biojavax.bio.seq.io;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.biojava.bio.program.tagvalue.TagValueParser;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.io.ParseException;
import org.biojava.bio.seq.io.SeqIOListener;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.SimpleSymbolList;
import org.biojava.bio.symbol.Symbol;
import org.biojava.utils.ChangeVetoException;
import org.biojavax.Namespace;
import org.biojavax.RichObjectFactory;
import org.biojavax.SimpleNamespace;
import org.biojavax.bio.seq.Position;
import org.biojavax.bio.seq.RichSequence;
import org.biojavax.bio.seq.io.RichSequenceFormat;

/* loaded from: input_file:org/biojavax/bio/seq/io/FastaFormat.class */
public class FastaFormat extends RichSequenceFormat.HeaderlessFormat {
    public static final String FASTA_FORMAT = "FASTA";
    protected static final Pattern hp;
    protected static final Pattern dp;
    protected static final Pattern readableFiles;
    protected static final Pattern aminoAcids;
    private FastaHeader header = new FastaHeader();

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat.BasicFormat, org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean canRead(File file) throws IOException {
        if (readableFiles.matcher(file.getName()).matches()) {
            return true;
        }
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        String readLine = bufferedReader.readLine();
        boolean z = readLine != null && readLine.startsWith(">");
        bufferedReader.close();
        return z;
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat.BasicFormat, org.biojavax.bio.seq.io.RichSequenceFormat
    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        bufferedReader.readLine();
        boolean matches = aminoAcids.matcher(bufferedReader.readLine()).matches();
        bufferedReader.close();
        return matches ? RichSequence.IOTools.getProteinParser() : RichSequence.IOTools.getDNAParser();
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean canRead(BufferedInputStream bufferedInputStream) throws IOException {
        bufferedInputStream.mark(2000);
        String readLine = new BufferedReader(new InputStreamReader(bufferedInputStream)).readLine();
        boolean z = readLine != null && readLine.startsWith(">");
        bufferedInputStream.reset();
        return z;
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public SymbolTokenization guessSymbolTokenization(BufferedInputStream bufferedInputStream) throws IOException {
        bufferedInputStream.mark(2000);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(bufferedInputStream));
        bufferedReader.readLine();
        boolean matches = aminoAcids.matcher(bufferedReader.readLine()).matches();
        bufferedInputStream.reset();
        return matches ? RichSequence.IOTools.getProteinParser() : RichSequence.IOTools.getDNAParser();
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public boolean readSequence(BufferedReader bufferedReader, SymbolTokenization symbolTokenization, SeqIOListener seqIOListener) throws IllegalSymbolException, IOException, ParseException {
        if (seqIOListener instanceof RichSeqIOListener) {
            return readRichSequence(bufferedReader, symbolTokenization, (RichSeqIOListener) seqIOListener, null);
        }
        throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean readRichSequence(BufferedReader bufferedReader, SymbolTokenization symbolTokenization, RichSeqIOListener richSeqIOListener, Namespace namespace) throws IllegalSymbolException, IOException, ParseException {
        String readLine = bufferedReader.readLine();
        if (readLine == null) {
            throw new IOException("Premature stream end");
        }
        while (readLine.length() == 0) {
            readLine = bufferedReader.readLine();
            if (readLine == null) {
                throw new IOException("Premature stream end");
            }
        }
        if (!readLine.startsWith(">")) {
            throw new IOException("Stream does not appear to contain FASTA formatted data: " + readLine);
        }
        richSeqIOListener.startSequence();
        processHeader(readLine, richSeqIOListener, namespace);
        StringBuffer stringBuffer = new StringBuffer();
        boolean z = true;
        while (z) {
            bufferedReader.mark(500);
            readLine = bufferedReader.readLine();
            if (readLine != null) {
                readLine = readLine.trim();
                if (readLine.length() <= 0 || readLine.charAt(0) != '>') {
                    stringBuffer.append(readLine);
                } else {
                    bufferedReader.reset();
                    z = false;
                }
            } else {
                z = false;
            }
        }
        if (!getElideSymbols()) {
            try {
                SimpleSymbolList simpleSymbolList = new SimpleSymbolList(symbolTokenization, stringBuffer.toString().replaceAll("\\s+", TagValueParser.EMPTY_LINE_EOR).replaceAll("[\\.|~]", "-"));
                richSeqIOListener.addSymbols(symbolTokenization.getAlphabet(), (Symbol[]) simpleSymbolList.toList().toArray(new Symbol[0]), 0, simpleSymbolList.length());
            } catch (Exception e) {
                throw new ParseException(e, ParseException.newMessage(getClass(), TagValueParser.EMPTY_LINE_EOR, TagValueParser.EMPTY_LINE_EOR, "problem parsing symbols", stringBuffer.toString()));
            }
        }
        richSeqIOListener.endSequence();
        return readLine != null;
    }

    public void processHeader(String str, RichSeqIOListener richSeqIOListener, Namespace namespace) throws IOException, ParseException {
        Matcher matcher = hp.matcher(str);
        if (!matcher.matches()) {
            throw new IOException("Stream does not appear to contain FASTA formatted data: " + str);
        }
        String group = matcher.group(1);
        String group2 = matcher.group(3);
        Matcher matcher2 = dp.matcher(group);
        if (matcher2.matches()) {
            String group3 = matcher2.group(2);
            String group4 = matcher2.group(3);
            String group5 = matcher2.group(4);
            String group6 = matcher2.group(6);
            int parseInt = group6 == null ? 0 : Integer.parseInt(group6);
            group = matcher2.group(7);
            if (group == null) {
                group = group5;
            }
            richSeqIOListener.setAccession(group5);
            richSeqIOListener.setVersion(parseInt);
            if (group3 != null) {
                richSeqIOListener.setIdentifier(group3);
            }
            if (namespace == null) {
                richSeqIOListener.setNamespace((Namespace) RichObjectFactory.getObject(SimpleNamespace.class, new Object[]{group4}));
            } else {
                richSeqIOListener.setNamespace(namespace);
            }
        } else {
            richSeqIOListener.setAccession(group);
            richSeqIOListener.setNamespace(namespace == null ? RichObjectFactory.getDefaultNamespace() : namespace);
        }
        richSeqIOListener.setName(group);
        if (getElideComments()) {
            return;
        }
        richSeqIOListener.setDescription(group2);
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public void writeSequence(Sequence sequence, PrintStream printStream) throws IOException {
        if (getPrintStream() == null) {
            setPrintStream(printStream);
        }
        writeSequence(sequence, RichObjectFactory.getDefaultNamespace());
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public void writeSequence(Sequence sequence, String str, PrintStream printStream) throws IOException {
        if (getPrintStream() == null) {
            setPrintStream(printStream);
        }
        if (!str.equals(getDefaultFormat())) {
            throw new IllegalArgumentException("Unknown format: " + str);
        }
        writeSequence(sequence, RichObjectFactory.getDefaultNamespace());
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public void writeSequence(Sequence sequence, Namespace namespace) throws IOException {
        String description;
        try {
            RichSequence enrich = sequence instanceof RichSequence ? (RichSequence) sequence : RichSequence.Tools.enrich(sequence);
            StringBuilder sb = new StringBuilder();
            sb.append(">");
            String identifier = enrich.getIdentifier();
            if (this.header.isShowIdentifier() && identifier != null && !TagValueParser.EMPTY_LINE_EOR.equals(identifier)) {
                sb.append("gi|");
                sb.append(identifier);
                sb.append("|");
            }
            if (this.header.isShowNamespace()) {
                sb.append(namespace == null ? enrich.getNamespace().getName() : namespace.getName());
                sb.append("|");
            }
            if (this.header.isShowAccession()) {
                sb.append(enrich.getAccession());
                if (this.header.isShowVersion()) {
                    sb.append(Position.IN_RANGE);
                }
            }
            if (this.header.isShowVersion()) {
                sb.append(enrich.getVersion());
                sb.append("|");
            }
            if (this.header.isShowName()) {
                sb.append(enrich.getName());
                sb.append(" ");
            } else {
                sb.append(" ");
            }
            if (this.header.isShowDescription() && (description = enrich.getDescription()) != null && !TagValueParser.EMPTY_LINE_EOR.equals(description)) {
                sb.append(description.replaceAll("\\n", " "));
            }
            if (sb.charAt(sb.length() - 1) == '|') {
                sb.deleteCharAt(sb.length() - 1);
            }
            getPrintStream().print(sb.toString());
            getPrintStream().println();
            int length = enrich.length();
            int i = 1;
            while (true) {
                int i2 = i;
                if (i2 > length) {
                    return;
                }
                getPrintStream().println(enrich.subStr(i2, Math.min((i2 + getLineWidth()) - 1, length)));
                i = i2 + getLineWidth();
            }
        } catch (ChangeVetoException e) {
            IOException iOException = new IOException("Unable to enrich sequence");
            iOException.initCause(e);
            throw iOException;
        }
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public String getDefaultFormat() {
        return "FASTA";
    }

    public FastaHeader getHeader() {
        return this.header;
    }

    public void setHeader(FastaHeader fastaHeader) {
        this.header = fastaHeader;
    }

    static {
        RichSequence.IOTools.registerFormat(FastaFormat.class);
        hp = Pattern.compile(">\\s*(\\S+)(\\s+(.*))?");
        dp = Pattern.compile("^(gi\\|(\\d+)\\|)?(\\w+)\\|(\\w+?)(\\.(\\d+))?\\|(\\w+)?$");
        readableFiles = Pattern.compile(".*(fa|fas)$");
        aminoAcids = Pattern.compile(".*[FLIPQE].*");
    }
}
