/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.udf.generic;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;

@Description(name="sentences", value="_FUNC_(str, lang, country) - Splits str into arrays of sentences, where each sentence is an array of words. The 'lang' and'country' arguments are optional, and if omitted, the default locale is used.", extended="Example:\n  > SELECT _FUNC_('Hello there! I am a UDF.') FROM src LIMIT 1;\n  [ [\"Hello\", \"there\"], [\"I\", \"am\", \"a\", \"UDF\"] ]\n  > SELECT _FUNC_(review, language) FROM movies;\nUnnecessary punctuation, such as periods and commas in English, is automatically stripped. If specified, 'lang' should be a two-letter ISO-639 language code (such as 'en'), and 'country' should be a two-letter ISO-3166 code (such as 'us'). Not all country and language codes are fully supported, and if an unsupported code is specified, a default locale is used to process that string.")
public class GenericUDFSentences
extends GenericUDF {
    private ObjectInspectorConverters.Converter[] converters;

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if (arguments.length < 1 || arguments.length > 3) {
            throw new UDFArgumentLengthException("The function sentences takes between 1 and 3 arguments.");
        }
        this.converters = new ObjectInspectorConverters.Converter[arguments.length];
        for (int i = 0; i < arguments.length; ++i) {
            this.converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
        }
        return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector));
    }

    @Override
    public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
        assert (arguments.length >= 1 && arguments.length <= 3);
        if (arguments[0].get() == null) {
            return null;
        }
        Locale locale = null;
        if (arguments.length > 1 && arguments[1].get() != null) {
            Text language = (Text)this.converters[1].convert(arguments[1].get());
            Text country = null;
            if (arguments.length > 2 && arguments[2].get() != null) {
                country = (Text)this.converters[2].convert(arguments[2].get());
            }
            locale = country != null ? new Locale(language.toString().toLowerCase(), country.toString().toUpperCase()) : new Locale(language.toString().toLowerCase());
        } else {
            locale = Locale.getDefault();
        }
        Text chunk = (Text)this.converters[0].convert(arguments[0].get());
        String text = chunk.toString();
        ArrayList result = new ArrayList();
        BreakIterator bi = BreakIterator.getSentenceInstance(locale);
        bi.setText(text);
        int idx = 0;
        while (bi.next() != -1) {
            String sentence = text.substring(idx, bi.current());
            idx = bi.current();
            result.add(new ArrayList());
            BreakIterator wi = BreakIterator.getWordInstance(locale);
            wi.setText(sentence);
            int widx = 0;
            ArrayList sent_array = (ArrayList)result.get(result.size() - 1);
            while (wi.next() != -1) {
                String word = sentence.substring(widx, wi.current());
                widx = wi.current();
                if (!Character.isLetterOrDigit(word.charAt(0))) continue;
                sent_array.add(new Text(word));
            }
        }
        return result;
    }

    @Override
    public String getDisplayString(String[] children) {
        assert (children.length >= 1 && children.length <= 3);
        String display = "sentences(" + children[0];
        if (children.length > 1) {
            display = display + ", " + children[1];
            if (children.length > 2) {
                display = display + ", " + children[2];
            }
        }
        display = display + ")";
        return display;
    }
}

