/*
 * Decompiled with CFR 0.152.
 */
package org.exist.xquery.modules.ngram;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.exist.dom.QName;
import org.exist.dom.persistent.DocumentSet;
import org.exist.dom.persistent.EmptyNodeSet;
import org.exist.dom.persistent.Match;
import org.exist.dom.persistent.NodeProxy;
import org.exist.dom.persistent.NodeSet;
import org.exist.indexing.ngram.NGramIndex;
import org.exist.indexing.ngram.NGramIndexWorker;
import org.exist.xquery.AnalyzeContextInfo;
import org.exist.xquery.Atomize;
import org.exist.xquery.BasicExpressionVisitor;
import org.exist.xquery.Dependency;
import org.exist.xquery.DynamicCardinalityCheck;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Expression;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.LocationStep;
import org.exist.xquery.NodeTest;
import org.exist.xquery.Optimizable;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.modules.ngram.query.AlternativeStrings;
import org.exist.xquery.modules.ngram.query.EmptyExpression;
import org.exist.xquery.modules.ngram.query.EndAnchor;
import org.exist.xquery.modules.ngram.query.EvaluatableExpression;
import org.exist.xquery.modules.ngram.query.FixedString;
import org.exist.xquery.modules.ngram.query.StartAnchor;
import org.exist.xquery.modules.ngram.query.Wildcard;
import org.exist.xquery.modules.ngram.query.WildcardedExpression;
import org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence;
import org.exist.xquery.modules.ngram.utils.NodeProxies;
import org.exist.xquery.modules.ngram.utils.NodeSets;
import org.exist.xquery.util.Error;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;

public class NGramSearch
extends Function
implements Optimizable {
    private static final String INTERVAL_QUALIFIER_PATTERN = "\\{([0-9]+),([0-9]+)\\}";
    private static final String SEARCH_DESCRIPTION = "Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.";
    private static final String WILDCARD_PATTERN_DESCRIPTION = "The string to search for.A full stop, '.', (not between brackets), without any qualifiers: Matches a single arbitrary character.A full stop, '.', (not between brackets), immediately followed by a single question mark, '?': Matches either no characters or one character.A full stop, '.', (not between brackets), immediately followed by a single asterisk, '*': Matches zero or more characters.A full stop, '.', (not between brackets), immediately followed by a single plus sign, '+': Matches one or more characters.A full stop, '.', immediately followed by a sequence of characters that matches the regular expression {[0-9]+,[0-9]+}: Matches a number of characters, where the number is no less than the number represented by the series of digits before the comma, and no greater than the number represented by the series of digits following the comma.An  expression  \"[\u2026]\"  matches a single character, namely any of the charactersenclosed by the brackets.  The string enclosed by the brackets cannot be empty; therefore ']' can be allowed between  the brackets, provided that it is the first character.(Thus, \"[][?]\" matches the three characters '[', ']' and '?'.)A circumflex accent, '^', at the start of the search string matches the start of the element content.A dollar sign, '$', at the end of the search string matches the end of the element content.One can remove the special meaning of any character mentioned above by preceding them by a backslash.Between brackets these characters stand for themselves.  Thus, \"[[?*\\]\" matchesthe four characters '[', '?', '*' and '\\'.'?', '*', '+' and character sequences matching the regular expression {[0-9]+,[0-9]+} not immediately preceeded by an unescaped period, '.', stand for themselves.'^' and '$' not at the very beginning or end of the search string, respectively, stand for themselves.";
    protected static final Logger LOG = LogManager.getLogger(NGramSearch.class);
    public static final FunctionSignature[] signatures = new FunctionSignature[]{new FunctionSignature(new QName("contains", "http://exist-db.org/xquery/ngram", "ngram"), "Similar to the standard XQuery fn:contains function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string may appear at any position within the node content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType("queryString", 22, 3, "The exact string to search for")}, (SequenceType)new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes containing the query string or the empty sequence")), new FunctionSignature(new QName("ends-with", "http://exist-db.org/xquery/ngram", "ngram"), "Similar to the standard XQuery fn:ends-with function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to appear at the end of the node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType("queryString", 22, 3, "The exact string to search for")}, (SequenceType)new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes ending with the query string or the empty sequence")), new FunctionSignature(new QName("starts-with", "http://exist-db.org/xquery/ngram", "ngram"), "Similar to the standard XQuery fn:starts-with function, but based on the NGram index. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to appear at the start of the node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType("queryString", 22, 3, "The exact string to search for")}, (SequenceType)new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes starting with the query string or the empty sequence")), new FunctionSignature(new QName("wildcard-contains", "http://exist-db.org/xquery/ngram", "ngram"), "Similar to the standard XQuery fn:matches function, but based on the NGram index and allowing wildcards in the query string. Searches the given $queryString in the index defined on the input node set $nodes. String comparison is case insensitive. Nodes need to have an ngram index to be searched.The string has to match the whole node's content.", new SequenceType[]{new FunctionParameterSequenceType("nodes", -1, 7, "The input node set to search"), new FunctionParameterSequenceType("queryString", 22, 3, "The string to search for.A full stop, '.', (not between brackets), without any qualifiers: Matches a single arbitrary character.A full stop, '.', (not between brackets), immediately followed by a single question mark, '?': Matches either no characters or one character.A full stop, '.', (not between brackets), immediately followed by a single asterisk, '*': Matches zero or more characters.A full stop, '.', (not between brackets), immediately followed by a single plus sign, '+': Matches one or more characters.A full stop, '.', immediately followed by a sequence of characters that matches the regular expression {[0-9]+,[0-9]+}: Matches a number of characters, where the number is no less than the number represented by the series of digits before the comma, and no greater than the number represented by the series of digits following the comma.An  expression  \"[\u2026]\"  matches a single character, namely any of the charactersenclosed by the brackets.  The string enclosed by the brackets cannot be empty; therefore ']' can be allowed between  the brackets, provided that it is the first character.(Thus, \"[][?]\" matches the three characters '[', ']' and '?'.)A circumflex accent, '^', at the start of the search string matches the start of the element content.A dollar sign, '$', at the end of the search string matches the end of the element content.One can remove the special meaning of any character mentioned above by preceding them by a backslash.Between brackets these characters stand for themselves.  Thus, \"[[?*\\]\" matchesthe four characters '[', '?', '*' and '\\'.'?', '*', '+' and character sequences matching the regular expression {[0-9]+,[0-9]+} not immediately preceeded by an unescaped period, '.', stand for themselves.'^' and '$' not at the very beginning or end of the search string, respectively, stand for themselves.")}, (SequenceType)new FunctionReturnSequenceType(-1, 7, "a set of nodes from the input node set $nodes matching the query string or the empty sequence"))};
    private LocationStep contextStep = null;
    protected QName contextQName = null;
    protected int axis = -1;
    private NodeSet preselectResult = null;
    protected boolean optimizeSelf = false;
    protected boolean optimizeChild = false;

    public NGramSearch(XQueryContext context, FunctionSignature signature) {
        super(context, signature);
    }

    public void setArguments(List<Expression> arguments) throws XPathException {
        this.steps.clear();
        Expression path = arguments.get(0);
        this.steps.add(path);
        Expression arg = arguments.get(1);
        arg = new DynamicCardinalityCheck(this.context, 3, arg, new Error("D02", (Object)"2", (Object)this.mySignature));
        if (!Type.subTypeOf((int)arg.returnsType(), (int)20)) {
            arg = new Atomize(this.context, arg);
        }
        this.steps.add(arg);
    }

    public void analyze(AnalyzeContextInfo contextInfo) throws XPathException {
        super.analyze(contextInfo);
        List steps = BasicExpressionVisitor.findLocationSteps((Expression)this.getArgument(0));
        if (!steps.isEmpty()) {
            NodeTest test;
            LocationStep firstStep = (LocationStep)steps.get(0);
            LocationStep lastStep = (LocationStep)steps.get(steps.size() - 1);
            if (firstStep != null && steps.size() == 1 && firstStep.getAxis() == 12) {
                LocationStep outerStep;
                NodeTest test2;
                Expression outerExpr = contextInfo.getContextStep();
                if (outerExpr != null && outerExpr instanceof LocationStep && !(test2 = (outerStep = (LocationStep)outerExpr).getTest()).isWildcardTest() && test2.getName() != null) {
                    this.contextQName = outerStep.getAxis() == 6 || outerStep.getAxis() == 13 ? new QName(test2.getName(), 1) : new QName(test2.getName());
                    this.contextStep = firstStep;
                    this.axis = outerStep.getAxis();
                    this.optimizeSelf = true;
                }
            } else if (lastStep != null && firstStep != null && !(test = lastStep.getTest()).isWildcardTest() && test.getName() != null) {
                this.contextQName = lastStep.getAxis() == 6 || lastStep.getAxis() == 13 ? new QName(test.getName(), 1) : new QName(test.getName());
                this.axis = firstStep.getAxis();
                this.optimizeChild = steps.size() == 1 && (this.axis == 5 || this.axis == 6);
                this.contextStep = lastStep;
            }
        }
    }

    public boolean canOptimize(Sequence contextSequence) {
        return this.contextQName != null;
    }

    public boolean optimizeOnSelf() {
        return this.optimizeSelf;
    }

    public boolean optimizeOnChild() {
        return this.optimizeChild;
    }

    public int getOptimizeAxis() {
        return this.axis;
    }

    public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XPathException {
        this.preselectResult = null;
        long start = System.currentTimeMillis();
        NGramIndexWorker index = (NGramIndexWorker)this.context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
        DocumentSet docs = contextSequence.getDocumentSet();
        String key = this.getArgument(1).eval(contextSequence).getStringValue();
        ArrayList<QName> qnames = new ArrayList<QName>(1);
        qnames.add(this.contextQName);
        this.preselectResult = this.processMatches(index, docs, qnames, key, useContext ? contextSequence.toNodeSet() : null, 1);
        if (this.context.getProfiler().traceFunctions()) {
            this.context.getProfiler().traceIndexUsage(this.context, "ngram", (Expression)this, 2, System.currentTimeMillis() - start);
        }
        return this.preselectResult;
    }

    public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
        NodeSet result;
        if (contextItem != null) {
            contextSequence = contextItem.toSequence();
        }
        if (this.preselectResult == null) {
            Sequence input = this.getArgument(0).eval(contextSequence, contextItem);
            if (input.isEmpty()) {
                result = NodeSet.EMPTY_SET;
            } else {
                long start = System.currentTimeMillis();
                NodeSet inNodes = input.toNodeSet();
                DocumentSet docs = inNodes.getDocumentSet();
                NGramIndexWorker index = (NGramIndexWorker)this.context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
                String key = this.getArgument(1).eval(contextSequence, contextItem).getStringValue();
                ArrayList<QName> qnames = null;
                if (this.contextQName != null) {
                    qnames = new ArrayList<QName>(1);
                    qnames.add(this.contextQName);
                }
                result = this.processMatches(index, docs, qnames, key, inNodes, 0);
                if (this.context.getProfiler().traceFunctions()) {
                    this.context.getProfiler().traceIndexUsage(this.context, "ngram", (Expression)this, 1, System.currentTimeMillis() - start);
                }
            }
        } else {
            this.contextStep.setPreloadedData(contextSequence.getDocumentSet(), this.preselectResult);
            result = this.getArgument(0).eval(contextSequence).toNodeSet();
        }
        return result;
    }

    private String getLocalName() {
        return this.getSignature().getName().getLocalPart();
    }

    private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
        EvaluatableExpression parsedQuery = null;
        parsedQuery = this.getLocalName().equals("wildcard-contains") ? this.parseQuery(query) : new FixedString(this, query);
        LOG.debug("Parsed Query: " + parsedQuery);
        NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
        if (this.getLocalName().startsWith("starts-with")) {
            result = NodeSets.getNodesMatchingAtStart(result, this.getExpressionId());
        } else if (this.getLocalName().startsWith("ends-with")) {
            result = NodeSets.getNodesMatchingAtEnd(result, this.getExpressionId());
        }
        result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, this.getExpressionId()));
        return result;
    }

    private EvaluatableExpression parseQuery(String query) throws XPathException {
        List<String> queryTokens = NGramSearch.tokenizeQuery(query);
        LOG.trace("Tokenized query: " + queryTokens);
        if (queryTokens.isEmpty()) {
            return new EmptyExpression();
        }
        ArrayList<WildcardedExpression> expressions = new ArrayList<WildcardedExpression>();
        if (queryTokens.get(0).equals("^")) {
            expressions.add(new StartAnchor());
            queryTokens.remove(0);
        }
        if (queryTokens.isEmpty()) {
            return new EmptyExpression();
        }
        boolean endAnchorPresent = false;
        if (queryTokens.get(queryTokens.size() - 1).equals("$")) {
            endAnchorPresent = true;
            queryTokens.remove(queryTokens.size() - 1);
        }
        if (queryTokens.isEmpty()) {
            return new EmptyExpression();
        }
        for (String token : queryTokens) {
            if (token.startsWith(".")) {
                Wildcard wildcard = null;
                if (token.length() == 1) {
                    wildcard = new Wildcard(1, 1);
                } else {
                    String qualifier = token.substring(1);
                    if (qualifier.equals("?")) {
                        wildcard = new Wildcard(0, 1);
                    } else if (qualifier.equals("*")) {
                        wildcard = new Wildcard(0, Integer.MAX_VALUE);
                    } else if (qualifier.equals("+")) {
                        wildcard = new Wildcard(1, Integer.MAX_VALUE);
                    } else {
                        Pattern p = Pattern.compile(INTERVAL_QUALIFIER_PATTERN);
                        Matcher m = p.matcher(qualifier);
                        if (!m.matches()) {
                            throw new XPathException((Expression)this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax");
                        }
                        try {
                            wildcard = new Wildcard(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)));
                        }
                        catch (NumberFormatException nfe) {
                            throw new XPathException((Expression)this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax", (Sequence)new StringValue(query), (Throwable)nfe);
                        }
                    }
                }
                expressions.add(wildcard);
                continue;
            }
            if (token.startsWith("[")) {
                HashSet<String> strings = new HashSet<String>(token.length() - 2);
                for (int i = 1; i < token.length() - 1; ++i) {
                    strings.add(Character.toString(token.charAt(i)));
                }
                expressions.add(new AlternativeStrings(this, strings));
                continue;
            }
            expressions.add(new FixedString(this, NGramSearch.unescape(token)));
        }
        if (endAnchorPresent) {
            expressions.add(new EndAnchor());
        }
        return new WildcardedExpressionSequence(expressions);
    }

    private static String unescape(String s) {
        return s.replaceAll("\\\\(.)", "$1");
    }

    private static List<String> tokenizeQuery(String query) throws XPathException {
        ArrayList<String> result = new ArrayList<String>();
        StringBuilder token = new StringBuilder();
        for (int i = 0; i < query.length(); ++i) {
            char currentChar = query.charAt(i);
            if (currentChar == '\\') {
                if (i + 1 < query.length()) {
                    token.append(query.substring(i, i + 2));
                    ++i;
                    continue;
                }
                throw new XPathException("err:FTDY0020: query string is terminated by an unescaped backslash");
            }
            if (currentChar == '.') {
                int wildcardEnd = i;
                if (token.length() > 0) {
                    result.add(token.toString());
                    token = new StringBuilder();
                }
                if (i + 1 < query.length()) {
                    char peek = query.charAt(i + 1);
                    if (peek == '?' || peek == '*' || peek == '+') {
                        wildcardEnd = i + 1;
                    }
                    if (peek == '{') {
                        wildcardEnd = query.indexOf(125, i + 2);
                        if (wildcardEnd == -1) {
                            throw new XPathException("err:FTDY0020: query string violates wildcard syntax: Unmatched qualifier start { in query string; marked by <-- HERE in \"" + query.substring(0, i + 2) + " <-- HERE " + query.substring(i + 2) + "\"");
                        }
                        if (!query.substring(i + 1, wildcardEnd + 1).matches(INTERVAL_QUALIFIER_PATTERN)) {
                            throw new XPathException("err:FTDY0020: query string violates wildcard qualifier syntax;  marked by <-- HERE in \"" + query.substring(0, wildcardEnd + 1) + " <-- HERE " + query.substring(wildcardEnd + 1) + "\"");
                        }
                    }
                }
                result.add(query.substring(i, wildcardEnd + 1));
                i = wildcardEnd;
                continue;
            }
            if (currentChar == '[') {
                int characterClassEnd = query.indexOf(93, i + 2);
                if (characterClassEnd == -1) {
                    throw new XPathException("err:FTDY0020: query string violates wildcard syntax: Unmatched [ in query string; marked by <-- HERE in \"" + query.substring(0, i + 1) + " <-- HERE " + query.substring(i + 1) + "\"");
                }
                if (token.length() > 0) {
                    result.add(token.toString());
                    token = new StringBuilder();
                }
                result.add(query.substring(i, characterClassEnd + 1));
                i = characterClassEnd;
                continue;
            }
            if (currentChar == '^') {
                if (token.length() > 0) {
                    result.add(token.toString());
                    token = new StringBuilder();
                }
                result.add("^");
                continue;
            }
            if (currentChar == '$') {
                if (token.length() > 0) {
                    result.add(token.toString());
                    token = new StringBuilder();
                }
                result.add("$");
                continue;
            }
            token.append(currentChar);
        }
        if (token.length() > 0) {
            result.add(token.toString());
        }
        return result;
    }

    public NodeSet fixedStringSearch(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
        String[] ngrams = NGramSearch.getDistinctNGrams(query, index.getN());
        if (ngrams.length == 0) {
            return new EmptyNodeSet();
        }
        String firstNgramm = ngrams[0];
        LOG.trace("First NGRAM: " + firstNgramm);
        NodeSet result = index.search(this.getExpressionId(), docs, qnames, firstNgramm, firstNgramm, this.context, nodeSet, axis);
        for (int i = 1; i < ngrams.length; ++i) {
            String ngram = ngrams[i];
            int len = ngram.codePointCount(0, ngram.length());
            int fillSize = index.getN() - len;
            String filledNgram = ngram;
            if (fillSize > 0) {
                String filler = ngrams[i - 1];
                StringBuilder buf = new StringBuilder();
                int pos = filler.offsetByCodePoints(0, len);
                for (int j = 0; j < fillSize; ++j) {
                    int codepoint = filler.codePointAt(pos);
                    pos += Character.charCount(codepoint);
                    buf.appendCodePoint(codepoint);
                }
                buf.append(ngram);
                filledNgram = buf.toString();
                LOG.debug("Filled: " + filledNgram);
            }
            NodeSet nodes = index.search(this.getExpressionId(), docs, qnames, filledNgram, ngram, this.context, nodeSet, axis);
            NodeSet nodesContainingFirstINgrams = result;
            result = NodeSets.transformNodes(nodes, proxy -> Optional.ofNullable(nodesContainingFirstINgrams.get(proxy)).map(before -> this.getContinuousMatches((NodeProxy)before, (NodeProxy)proxy)).orElse(null));
        }
        return result;
    }

    private NodeProxy getContinuousMatches(NodeProxy head, NodeProxy tail) {
        Match continuousMatch = null;
        for (Match headMatch = head.getMatches(); headMatch != null && continuousMatch == null; headMatch = headMatch.getNextMatch()) {
            for (Match tailMatch = tail.getMatches(); tailMatch != null && continuousMatch == null; tailMatch = tailMatch.getNextMatch()) {
                continuousMatch = headMatch.continuedBy(tailMatch);
            }
        }
        if (continuousMatch != null) {
            NodeProxies.filterMatches(tail, match -> match.getContextId() != this.getExpressionId());
            tail.addMatch(continuousMatch);
            return tail;
        }
        return null;
    }

    public int getDependencies() {
        Expression stringArg = this.getArgument(0);
        if (Type.subTypeOf((int)stringArg.returnsType(), (int)-1) && !Dependency.dependsOn((Expression)stringArg, (int)2)) {
            return 1;
        }
        return 3;
    }

    public int returnsType() {
        return -1;
    }

    private static String[] getDistinctNGrams(String text, int ngramSize) {
        int len = text.codePointCount(0, text.length());
        int count = len / ngramSize;
        int remainder = len % ngramSize;
        String[] n = new String[remainder > 0 ? count + 1 : count];
        int pos = 0;
        for (int i = 0; i < count; ++i) {
            StringBuilder bld = new StringBuilder(ngramSize);
            for (int j = 0; j < ngramSize; ++j) {
                int next = Character.toLowerCase(text.codePointAt(pos));
                pos += Character.charCount(next);
                bld.appendCodePoint(next);
            }
            n[i] = bld.toString();
        }
        if (remainder > 0) {
            StringBuilder bld = new StringBuilder(remainder);
            for (int j = 0; j < remainder; ++j) {
                int next = Character.toLowerCase(text.codePointAt(pos));
                pos += Character.charCount(next);
                bld.appendCodePoint(next);
            }
            n[count] = bld.toString();
        }
        return n;
    }
}

