/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hop.pipeline.transforms.fuzzymatch;

import com.wcohen.ss.Jaro;
import com.wcohen.ss.JaroWinkler;
import com.wcohen.ss.NeedlemanWunsch;
import java.util.Iterator;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.commons.lang.StringUtils;
import org.apache.hop.core.Const;
import org.apache.hop.core.IRowSet;
import org.apache.hop.core.exception.HopException;
import org.apache.hop.core.exception.HopTransformException;
import org.apache.hop.core.exception.HopValueException;
import org.apache.hop.core.row.IRowMeta;
import org.apache.hop.core.row.IValueMeta;
import org.apache.hop.core.row.RowDataUtil;
import org.apache.hop.core.row.RowMeta;
import org.apache.hop.core.util.Utils;
import org.apache.hop.core.variables.IVariables;
import org.apache.hop.i18n.BaseMessages;
import org.apache.hop.pipeline.Pipeline;
import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transform.BaseTransform;
import org.apache.hop.pipeline.transform.ITransformData;
import org.apache.hop.pipeline.transform.ITransformMeta;
import org.apache.hop.pipeline.transform.TransformMeta;
import org.apache.hop.pipeline.transform.stream.IStream;
import org.apache.hop.pipeline.transforms.fuzzymatch.FuzzyMatchData;
import org.apache.hop.pipeline.transforms.fuzzymatch.FuzzyMatchMeta;
import org.apache.hop.pipeline.transforms.fuzzymatch.LetterPairSimilarity;

public class FuzzyMatch
extends BaseTransform<FuzzyMatchMeta, FuzzyMatchData> {
    private static final Class<?> PKG = FuzzyMatchMeta.class;

    public FuzzyMatch(TransformMeta transformMeta, FuzzyMatchMeta meta, FuzzyMatchData data, int copyNr, PipelineMeta pipelineMeta, Pipeline pipeline) {
        super(transformMeta, (ITransformMeta)meta, (ITransformData)data, copyNr, pipelineMeta, pipeline);
    }

    private boolean readLookupValues() throws HopException {
        ((FuzzyMatchData)this.data).infoStream = (IStream)((FuzzyMatchMeta)this.meta).getTransformIOMeta().getInfoStreams().get(0);
        if (((FuzzyMatchData)this.data).infoStream.getTransformMeta() == null) {
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.NoLookupTransformSpecified", (String[])new String[0]));
            return false;
        }
        if (this.isDetailed()) {
            this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadingFromStream", (String[])new String[0]) + ((FuzzyMatchData)this.data).infoStream.getTransformName() + "]");
        }
        boolean firstRun = true;
        IRowSet rowSet = this.findInputRowSet(((FuzzyMatchData)this.data).infoStream.getTransformName());
        Object[] rowData = this.getRowFrom(rowSet);
        while (rowData != null) {
            IValueMeta fromStreamRowMeta;
            if (firstRun) {
                ((FuzzyMatchData)this.data).infoMeta = rowSet.getRowMeta().clone();
                int indexOfLookupField = ((FuzzyMatchData)this.data).infoMeta.indexOfValue(this.resolve(((FuzzyMatchMeta)this.meta).getLookupField()));
                if (indexOfLookupField < 0) {
                    throw new HopException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindLookField", (String[])new String[]{((FuzzyMatchMeta)this.meta).getLookupField()}));
                }
                ((FuzzyMatchData)this.data).infoCache = new RowMeta();
                IValueMeta keyValueMeta = ((FuzzyMatchData)this.data).infoMeta.getValueMeta(indexOfLookupField);
                keyValueMeta.setStorageType(0);
                ((FuzzyMatchData)this.data).infoCache.addValueMeta(keyValueMeta);
                ((FuzzyMatchData)this.data).indexOfCachedFields[0] = indexOfLookupField;
                if (((FuzzyMatchData)this.data).addAdditionalFields) {
                    for (int i = 0; i < ((FuzzyMatchMeta)this.meta).getLookupValues().size(); ++i) {
                        FuzzyMatchMeta.FMLookupValue lookupValue = ((FuzzyMatchMeta)this.meta).getLookupValues().get(i);
                        int fi = i + 1;
                        ((FuzzyMatchData)this.data).indexOfCachedFields[fi] = ((FuzzyMatchData)this.data).infoMeta.indexOfValue(lookupValue.getName());
                        if (((FuzzyMatchData)this.data).indexOfCachedFields[fi] < 0) {
                            throw new HopException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindLookField", (String[])new String[]{lookupValue.getName()}));
                        }
                        IValueMeta additionalFieldValueMeta = ((FuzzyMatchData)this.data).infoMeta.getValueMeta(((FuzzyMatchData)this.data).indexOfCachedFields[fi]);
                        additionalFieldValueMeta.setStorageType(0);
                        ((FuzzyMatchData)this.data).infoCache.addValueMeta(additionalFieldValueMeta);
                    }
                    ((FuzzyMatchData)this.data).nrCachedFields += ((FuzzyMatchMeta)this.meta).getLookupValues().size();
                }
            }
            if (this.isRowLevel()) {
                this.logRowlevel(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadLookupRow", (String[])new String[0]) + rowSet.getRowMeta().getString(rowData));
            }
            Object[] storeData = new Object[((FuzzyMatchData)this.data).nrCachedFields];
            storeData[0] = rowData[((FuzzyMatchData)this.data).indexOfCachedFields[0]] == null ? "" : ((fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(((FuzzyMatchData)this.data).indexOfCachedFields[0])).isStorageBinaryString() ? fromStreamRowMeta.convertToNormalStorageType(rowData[((FuzzyMatchData)this.data).indexOfCachedFields[0]]) : rowData[((FuzzyMatchData)this.data).indexOfCachedFields[0]]);
            for (int i = 1; i < ((FuzzyMatchData)this.data).nrCachedFields; ++i) {
                IValueMeta fromStreamRowMeta2 = rowSet.getRowMeta().getValueMeta(((FuzzyMatchData)this.data).indexOfCachedFields[i]);
                storeData[i] = fromStreamRowMeta2.isStorageBinaryString() ? fromStreamRowMeta2.convertToNormalStorageType(rowData[((FuzzyMatchData)this.data).indexOfCachedFields[i]]) : rowData[((FuzzyMatchData)this.data).indexOfCachedFields[i]];
            }
            if (this.isDebug()) {
                this.logDebug(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.AddingValueToCache", (String[])new String[]{((FuzzyMatchData)this.data).infoCache.getString(storeData)}));
            }
            this.addToCache(storeData);
            rowData = this.getRowFrom(rowSet);
            if (!firstRun) continue;
            firstRun = false;
        }
        return true;
    }

    private Object[] lookupValues(IRowMeta rowMeta, Object[] row) throws HopException {
        Object[] add;
        if (this.first) {
            this.first = false;
            ((FuzzyMatchData)this.data).outputRowMeta = this.getInputRowMeta().clone();
            ((FuzzyMatchMeta)this.meta).getFields(((FuzzyMatchData)this.data).outputRowMeta, this.getTransformName(), new IRowMeta[]{((FuzzyMatchData)this.data).infoMeta}, null, (IVariables)this, this.metadataProvider);
            ((FuzzyMatchData)this.data).indexOfMainField = this.getInputRowMeta().indexOfValue(this.resolve(((FuzzyMatchMeta)this.meta).getMainStreamField()));
            if (((FuzzyMatchData)this.data).indexOfMainField < 0) {
                throw new HopException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindMainField", (String[])new String[]{((FuzzyMatchMeta)this.meta).getMainStreamField()}));
            }
        }
        if (row[((FuzzyMatchData)this.data).indexOfMainField] == null) {
            add = RowDataUtil.allocateRowData((int)((FuzzyMatchData)this.data).outputRowMeta.size());
        } else {
            try {
                add = this.getFromCache(row);
            }
            catch (Exception e) {
                throw new HopTransformException((Throwable)e);
            }
        }
        return RowDataUtil.addRowData((Object[])row, (int)rowMeta.size(), (Object[])add);
    }

    private void addToCache(Object[] value) throws HopException {
        try {
            ((FuzzyMatchData)this.data).look.add(value);
        }
        catch (OutOfMemoryError o) {
            throw new HopException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.JavaHeap", (String[])new String[]{o.toString()}));
        }
    }

    private Object[] getFromCache(Object[] keyRow) throws HopValueException {
        if (this.isDebug()) {
            this.logDebug(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadingMainStreamRow", (String[])new String[]{this.getInputRowMeta().getString(keyRow)}));
        }
        Object[] retval = null;
        switch (((FuzzyMatchMeta)this.meta).getAlgorithm()) {
            case LEVENSHTEIN: 
            case DAMERAU_LEVENSHTEIN: 
            case NEEDLEMAN_WUNSH: {
                retval = this.doDistance(keyRow);
                break;
            }
            case DOUBLE_METAPHONE: 
            case METAPHONE: 
            case SOUNDEX: 
            case REFINED_SOUNDEX: {
                retval = this.doPhonetic(keyRow);
                break;
            }
            case JARO: 
            case JARO_WINKLER: 
            case PAIR_SIMILARITY: {
                retval = this.doSimilarity(keyRow);
                break;
            }
        }
        return retval;
    }

    private Object[] doDistance(Object[] row) throws HopValueException {
        Object[] rowData = RowDataUtil.allocateRowData((int)((FuzzyMatchData)this.data).outputRowMeta.size());
        Iterator<Object[]> it = ((FuzzyMatchData)this.data).look.iterator();
        long distance = -1L;
        String lookupValueString = this.getInputRowMeta().getString(row, ((FuzzyMatchData)this.data).indexOfMainField);
        while (it.hasNext()) {
            String cacheValue;
            Object[] cachedData = it.next();
            String useCacheValue = cacheValue = (String)cachedData[0];
            String useLookupvalue = lookupValueString;
            if (!((FuzzyMatchMeta)this.meta).isCaseSensitive()) {
                useCacheValue = cacheValue.toLowerCase();
                useLookupvalue = lookupValueString.toLowerCase();
            }
            int cDistance = switch (((FuzzyMatchMeta)this.meta).getAlgorithm()) {
                case FuzzyMatchMeta.Algorithm.DAMERAU_LEVENSHTEIN -> Utils.getDamerauLevenshteinDistance((String)useCacheValue, (String)useLookupvalue);
                case FuzzyMatchMeta.Algorithm.NEEDLEMAN_WUNSH -> Math.abs((int)new NeedlemanWunsch().score(useCacheValue, useLookupvalue));
                default -> StringUtils.getLevenshteinDistance((String)useCacheValue, (String)useLookupvalue);
            };
            if (((FuzzyMatchData)this.data).minimalDistance > cDistance || cDistance > ((FuzzyMatchData)this.data).maximalDistance) continue;
            if (((FuzzyMatchMeta)this.meta).isCloserValue()) {
                if ((long)cDistance >= distance && distance != -1L) continue;
                distance = cDistance;
                int index = 0;
                rowData[index++] = cacheValue;
                if (((FuzzyMatchData)this.data).addValueFieldName) {
                    rowData[index++] = distance;
                }
                if (!((FuzzyMatchData)this.data).addAdditionalFields) continue;
                for (int i = 0; i < ((FuzzyMatchMeta)this.meta).getLookupValues().size(); ++i) {
                    int nr = i + 1;
                    int nf = i + index;
                    rowData[nf] = cachedData[nr];
                }
                continue;
            }
            if (rowData[0] == null) {
                rowData[0] = cacheValue;
                continue;
            }
            rowData[0] = String.valueOf(rowData[0]) + ((FuzzyMatchData)this.data).valueSeparator + cacheValue;
        }
        return rowData;
    }

    private Object[] doPhonetic(Object[] row) {
        Object[] rowData = RowDataUtil.allocateRowData((int)((FuzzyMatchData)this.data).outputRowMeta.size());
        Iterator<Object[]> it = ((FuzzyMatchData)this.data).look.iterator();
        Object o = row[((FuzzyMatchData)this.data).indexOfMainField];
        String lookupvalue = (String)o;
        String lookupValueMF = this.getEncodedMF(lookupvalue, ((FuzzyMatchMeta)this.meta).getAlgorithm());
        while (it.hasNext()) {
            Object[] cachedData = it.next();
            String cacheValue = (String)cachedData[0];
            String cacheValueMF = this.getEncodedMF(cacheValue, ((FuzzyMatchMeta)this.meta).getAlgorithm());
            if (!lookupValueMF.equals(cacheValueMF)) continue;
            int index = 0;
            rowData[index++] = cacheValue;
            if (((FuzzyMatchData)this.data).addValueFieldName) {
                rowData[index++] = cacheValueMF;
            }
            if (!((FuzzyMatchData)this.data).addAdditionalFields) continue;
            for (int i = 0; i < ((FuzzyMatchMeta)this.meta).getLookupValues().size(); ++i) {
                int nf = i + index;
                int nr = i + 1;
                rowData[nf] = cachedData[nr];
            }
        }
        return rowData;
    }

    private String getEncodedMF(String value, FuzzyMatchMeta.Algorithm algorithmType) {
        String encodedValueMF = "";
        switch (algorithmType) {
            case METAPHONE: {
                encodedValueMF = new Metaphone().metaphone(value);
                break;
            }
            case DOUBLE_METAPHONE: {
                encodedValueMF = new DoubleMetaphone().doubleMetaphone(value);
                break;
            }
            case SOUNDEX: {
                encodedValueMF = new Soundex().encode(value);
                break;
            }
            case REFINED_SOUNDEX: {
                encodedValueMF = new RefinedSoundex().encode(value);
                break;
            }
        }
        return encodedValueMF;
    }

    private Object[] doSimilarity(Object[] row) {
        String lookupValueString;
        Object[] rowData = RowDataUtil.allocateRowData((int)((FuzzyMatchData)this.data).outputRowMeta.size());
        Iterator<Object[]> it = ((FuzzyMatchData)this.data).look.iterator();
        double similarity = 0.0;
        Object o = row[((FuzzyMatchData)this.data).indexOfMainField];
        String string = lookupValueString = o == null ? "" : (String)o;
        while (it.hasNext()) {
            Object[] cachedData = it.next();
            String cacheValue = (String)cachedData[0];
            double cSimilarity = switch (((FuzzyMatchMeta)this.meta).getAlgorithm()) {
                case FuzzyMatchMeta.Algorithm.JARO -> new Jaro().score(cacheValue, lookupValueString);
                case FuzzyMatchMeta.Algorithm.JARO_WINKLER -> new JaroWinkler().score(cacheValue, lookupValueString);
                default -> LetterPairSimilarity.getSimiliarity(cacheValue, lookupValueString);
            };
            if (!(((FuzzyMatchData)this.data).minimalSimilarity <= cSimilarity) || !(cSimilarity <= ((FuzzyMatchData)this.data).maximalSimilarity)) continue;
            if (((FuzzyMatchMeta)this.meta).isCloserValue()) {
                if (!(cSimilarity > similarity) && (cSimilarity != 0.0 || !cacheValue.equals(lookupValueString))) continue;
                similarity = cSimilarity;
                int index = 0;
                rowData[index++] = cacheValue;
                if (((FuzzyMatchData)this.data).addValueFieldName) {
                    rowData[index++] = similarity;
                }
                if (!((FuzzyMatchData)this.data).addAdditionalFields) continue;
                for (int i = 0; i < ((FuzzyMatchMeta)this.meta).getLookupValues().size(); ++i) {
                    int nf = i + index;
                    int nr = i + 1;
                    rowData[nf] = cachedData[nr];
                }
                continue;
            }
            if (rowData[0] == null) {
                rowData[0] = cacheValue;
                continue;
            }
            rowData[0] = String.valueOf(rowData[0]) + ((FuzzyMatchData)this.data).valueSeparator + cacheValue;
        }
        return rowData;
    }

    public boolean processRow() throws HopException {
        Object[] r;
        if (((FuzzyMatchData)this.data).readLookupValues) {
            ((FuzzyMatchData)this.data).readLookupValues = false;
            if (!this.readLookupValues()) {
                this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.UnableToReadDataFromLookupStream", (String[])new String[0]));
                this.setErrors(1L);
                this.stopAll();
                return false;
            }
            if (this.isDetailed()) {
                this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadValuesInMemory", (Object[])new Object[]{((FuzzyMatchData)this.data).look.size()}));
            }
        }
        if ((r = this.getRow()) == null) {
            if (this.isDetailed()) {
                this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.StoppedProcessingWithEmpty", (Object[])new Object[]{this.getLinesRead()}));
            }
            this.setOutputDone();
            return false;
        }
        try {
            Object[] outputRow = this.lookupValues(this.getInputRowMeta(), r);
            if (outputRow == null) {
                this.setOutputDone();
                return false;
            }
            this.putRow(((FuzzyMatchData)this.data).outputRowMeta, outputRow);
            if (this.checkFeedback(this.getLinesRead()) && this.isBasic()) {
                this.logBasic(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.LineNumber", (String[])new String[0]) + this.getLinesRead());
            }
        }
        catch (HopException e) {
            if (this.getTransformMeta().isDoingErrorHandling()) {
                this.putError(this.getInputRowMeta(), r, 1L, e.toString(), ((FuzzyMatchMeta)this.meta).getMainStreamField(), "FuzzyMatch001");
            }
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ErrorInTransformRunning", (String[])new String[0]) + e.getMessage());
            this.setErrors(1L);
            this.stopAll();
            this.setOutputDone();
            return false;
        }
        return true;
    }

    public boolean init() {
        if (!super.init()) {
            return false;
        }
        if (StringUtils.isEmpty((String)((FuzzyMatchMeta)this.meta).getMainStreamField())) {
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.MainStreamFieldMissing", (String[])new String[0]));
            return false;
        }
        if (StringUtils.isEmpty((String)((FuzzyMatchMeta)this.meta).getLookupField())) {
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.LookupStreamFieldMissing", (String[])new String[0]));
            return false;
        }
        String matchField = this.resolve(((FuzzyMatchMeta)this.meta).getOutputMatchField());
        if (StringUtils.isEmpty((String)matchField)) {
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.OutputMatchFieldMissing", (String[])new String[0]));
            return false;
        }
        ((FuzzyMatchData)this.data).addValueFieldName = StringUtils.isNotEmpty((String)this.resolve(((FuzzyMatchMeta)this.meta).getOutputValueField())) && ((FuzzyMatchMeta)this.meta).isCloserValue();
        int nrFields = 1;
        if (!((FuzzyMatchMeta)this.meta).getLookupValues().isEmpty() && (((FuzzyMatchMeta)this.meta).isCloserValue() || ((FuzzyMatchMeta)this.meta).getAlgorithm() == FuzzyMatchMeta.Algorithm.DOUBLE_METAPHONE || ((FuzzyMatchMeta)this.meta).getAlgorithm() == FuzzyMatchMeta.Algorithm.SOUNDEX || ((FuzzyMatchMeta)this.meta).getAlgorithm() == FuzzyMatchMeta.Algorithm.REFINED_SOUNDEX || ((FuzzyMatchMeta)this.meta).getAlgorithm() == FuzzyMatchMeta.Algorithm.METAPHONE)) {
            ((FuzzyMatchData)this.data).addAdditionalFields = true;
            nrFields += ((FuzzyMatchMeta)this.meta).getLookupValues().size();
        }
        ((FuzzyMatchData)this.data).indexOfCachedFields = new int[nrFields];
        switch (((FuzzyMatchMeta)this.meta).getAlgorithm()) {
            case LEVENSHTEIN: 
            case DAMERAU_LEVENSHTEIN: 
            case NEEDLEMAN_WUNSH: {
                ((FuzzyMatchData)this.data).minimalDistance = Const.toInt((String)this.resolve(((FuzzyMatchMeta)this.meta).getMinimalValue()), (int)0);
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MinimalDistance", (Object[])new Object[]{((FuzzyMatchData)this.data).minimalDistance}));
                }
                ((FuzzyMatchData)this.data).maximalDistance = Const.toInt((String)this.resolve(((FuzzyMatchMeta)this.meta).getMaximalValue()), (int)5);
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MaximalDistance", (Object[])new Object[]{((FuzzyMatchData)this.data).maximalDistance}));
                }
                if (((FuzzyMatchMeta)this.meta).isCloserValue()) break;
                ((FuzzyMatchData)this.data).valueSeparator = this.resolve(((FuzzyMatchMeta)this.meta).getSeparator());
                if (!this.isDetailed()) break;
                this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.Separator", (String[])new String[]{((FuzzyMatchData)this.data).valueSeparator}));
                break;
            }
            case JARO: 
            case JARO_WINKLER: 
            case PAIR_SIMILARITY: {
                ((FuzzyMatchData)this.data).minimalSimilarity = Const.toDouble((String)this.resolve(((FuzzyMatchMeta)this.meta).getMinimalValue()), (double)0.0);
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MinimalSimilarity", (Object[])new Object[]{((FuzzyMatchData)this.data).minimalSimilarity}));
                }
                ((FuzzyMatchData)this.data).maximalSimilarity = Const.toDouble((String)this.resolve(((FuzzyMatchMeta)this.meta).getMaximalValue()), (double)1.0);
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MaximalSimilarity", (Object[])new Object[]{((FuzzyMatchData)this.data).maximalSimilarity}));
                }
                if (((FuzzyMatchMeta)this.meta).isCloserValue()) break;
                ((FuzzyMatchData)this.data).valueSeparator = this.resolve(((FuzzyMatchMeta)this.meta).getSeparator());
                if (!this.isDetailed()) break;
                this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.Separator", (String[])new String[]{((FuzzyMatchData)this.data).valueSeparator}));
                break;
            }
        }
        ((FuzzyMatchData)this.data).readLookupValues = true;
        return true;
    }

    public void dispose() {
        ((FuzzyMatchData)this.data).look.clear();
        super.dispose();
    }
}

