package org.musicbrainz.search.index;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.lang.CharEncoding;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.similarities.Similarity;
import org.mozilla.universalchardet.UniversalDetector;
import org.musicbrainz.search.MbDocument;

/* loaded from: input_file:org/musicbrainz/search/index/FreeDBIndex.class */
public class FreeDBIndex implements Index {
    private int emptyCount = 0;
    private int failedCount = 0;
    private Set<String> unknownCharsets = new HashSet();
    private Map<String, CharsetDecoder> decoderMap = new HashMap();
    private Map<String, Integer> countMap = new TreeMap();
    private static final String INDEX_SUFFIX = "_index";
    protected static String[] CATEGORIES = {"data", "folk", "jazz", "misc", "rock", "country", "blues", "newage", "reggae", "classical", "soundtrack"};
    protected File dumpFile;

    private void initDecoders() {
        CharsetDecoder newDecoder = Charset.forName("UTF8").newDecoder();
        newDecoder.onMalformedInput(CodingErrorAction.REPORT);
        newDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        this.decoderMap.put("UTF8", newDecoder);
        this.countMap.put("UTF8", 0);
        CharsetDecoder newDecoder2 = Charset.forName(CharEncoding.ISO_8859_1).newDecoder();
        newDecoder2.onMalformedInput(CodingErrorAction.REPORT);
        newDecoder2.onUnmappableCharacter(CodingErrorAction.REPORT);
        this.decoderMap.put(CharEncoding.ISO_8859_1, newDecoder2);
        this.countMap.put(CharEncoding.ISO_8859_1, 0);
    }

    public FreeDBIndex() {
        initDecoders();
    }

    @Override // org.musicbrainz.search.index.Index
    public void addMetaInformation(IndexWriter indexWriter) throws IOException {
        MbDocument mbDocument = new MbDocument();
        mbDocument.addNumericField(MetaIndexField.LAST_UPDATED, Long.valueOf(new Date().getTime()));
        indexWriter.addDocument(mbDocument.getLuceneDocument());
    }

    public File getDumpFile() {
        return this.dumpFile;
    }

    public void setDumpFile(File file) {
        this.dumpFile = file;
    }

    @Override // org.musicbrainz.search.index.Index
    public Analyzer getAnalyzer() {
        return DatabaseIndex.getAnalyzer(FreeDBIndexField.class);
    }

    @Override // org.musicbrainz.search.index.Index
    public String getName() {
        return "freedb";
    }

    @Override // org.musicbrainz.search.index.Index
    public String getFilename() {
        return getName() + INDEX_SUFFIX;
    }

    public void indexData(IndexWriter indexWriter) throws IOException {
        TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(this.dumpFile))));
        String str = "";
        boolean z = false;
        while (true) {
            ArchiveEntry nextEntry = tarArchiveInputStream.getNextEntry();
            if (nextEntry == null) {
                break;
            }
            if (nextEntry.isDirectory()) {
                str = nextEntry.getName().replace("/", "");
                z = Arrays.asList(CATEGORIES).contains(str);
                if (z) {
                    System.out.println("  Indexing category: " + str);
                } else if (!".".equals(str)) {
                    System.out.println("  Skipping category: " + str);
                }
            } else if (z) {
                byte[] bArr = new byte[(int) nextEntry.getSize()];
                if (tarArchiveInputStream.read(bArr, 0, (int) nextEntry.getSize()) != nextEntry.getSize()) {
                    this.emptyCount++;
                } else {
                    Document documentFromFreeDBEntry = documentFromFreeDBEntry(nextEntry.getName(), str, bArr);
                    if (documentFromFreeDBEntry != null) {
                        indexWriter.addDocument(documentFromFreeDBEntry);
                    }
                }
            }
        }
        for (Map.Entry<String, Integer> entry : this.countMap.entrySet()) {
            System.out.println("No of " + entry.getKey() + " entries " + entry.getValue());
        }
        System.out.println("  No of empty entries " + this.emptyCount);
        System.out.println("  No of failed entries " + this.failedCount);
    }

    private String detectCharset(byte[] bArr) {
        UniversalDetector universalDetector = new UniversalDetector(null);
        universalDetector.handleData(bArr, 0, bArr.length);
        universalDetector.dataEnd();
        String detectedCharset = universalDetector.getDetectedCharset();
        universalDetector.reset();
        return detectedCharset;
    }

    private Document parseEntryAndCreateDocument(String str, String str2, byte[] bArr, CharsetDecoder charsetDecoder, String str3) {
        MbDocument mbDocument = new MbDocument();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bArr), charsetDecoder));
        charsetDecoder.reset();
        String str4 = "";
        String str5 = "";
        String str6 = "";
        String str7 = "";
        String str8 = "";
        String str9 = "";
        Integer num = 0;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                if (readLine.startsWith("DTITLE=")) {
                    str4 = str4 + readLine.substring(7);
                }
                if (readLine.startsWith("DISCID=")) {
                    str7 = readLine.substring(7);
                    if (str7.contains(",")) {
                        str7 = str7.substring(0, str7.indexOf(",") - 1);
                    }
                }
                if (readLine.startsWith("DYEAR=")) {
                    str8 = readLine.substring(6);
                }
                if (readLine.startsWith("TTITLE")) {
                    str9 = readLine;
                }
            } catch (IOException e) {
                return null;
            }
        }
        try {
            String[] split = str4.split(" / ");
            if (split != null && split.length >= 2) {
                str5 = split[0].trim();
                str6 = split[1].trim();
            }
            String[] split2 = str9.split("=");
            if (split2[0].length() >= 7) {
                num = Integer.valueOf(new Integer(split2[0].substring(6)).intValue() + 1);
            } else {
                System.err.println("Value of lastTrack cannot be parsed is:" + str9);
            }
            mbDocument.addField(FreeDBIndexField.ARTIST, str5);
            mbDocument.addField(FreeDBIndexField.TITLE, str6);
            mbDocument.addField(FreeDBIndexField.DISCID, str7);
            mbDocument.addField(FreeDBIndexField.CATEGORY, str2);
            mbDocument.addField(FreeDBIndexField.YEAR, str8);
            mbDocument.addField(FreeDBIndexField.TRACKS, num.toString());
            this.countMap.put(str3, Integer.valueOf(this.countMap.get(str3).intValue() + 1));
            return mbDocument.getLuceneDocument();
        } catch (Exception e2) {
            System.err.println("  " + str + " Unable to determine no of tracks from " + str9);
            e2.printStackTrace();
            return null;
        }
    }

    protected Document documentFromFreeDBEntry(String str, String str2, byte[] bArr) {
        Charset forName;
        CharsetDecoder charsetDecoder = null;
        String detectCharset = detectCharset(bArr);
        if (detectCharset != null) {
            charsetDecoder = this.decoderMap.get(detectCharset);
            if (charsetDecoder == null && !this.unknownCharsets.contains(detectCharset) && (forName = Charset.forName(detectCharset)) != null) {
                CharsetDecoder newDecoder = forName.newDecoder();
                newDecoder.onMalformedInput(CodingErrorAction.REPORT);
                newDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
                this.decoderMap.put(detectCharset, newDecoder);
                this.countMap.put(detectCharset, 0);
                charsetDecoder = newDecoder;
            }
        }
        if (charsetDecoder != null) {
            Document parseEntryAndCreateDocument = parseEntryAndCreateDocument(str, str2, bArr, charsetDecoder, detectCharset);
            return parseEntryAndCreateDocument != null ? parseEntryAndCreateDocument : parseEntryAndCreateDocument;
        }
        Document parseEntryAndCreateDocument2 = parseEntryAndCreateDocument(str, str2, bArr, this.decoderMap.get("UTF8"), "UTF8");
        if (parseEntryAndCreateDocument2 != null) {
            return parseEntryAndCreateDocument2;
        }
        Document parseEntryAndCreateDocument3 = parseEntryAndCreateDocument(str, str2, bArr, this.decoderMap.get(CharEncoding.ISO_8859_1), CharEncoding.ISO_8859_1);
        if (parseEntryAndCreateDocument3 != null) {
            return parseEntryAndCreateDocument3;
        }
        this.failedCount++;
        return null;
    }

    @Override // org.musicbrainz.search.index.Index
    public Similarity getSimilarity() {
        return null;
    }
}
