Hive UDF Examples

There are 2 ways to write UDF's in Hive by extending:

  • org.apache.hadoop.hive.ql.exec.UDF
  • org.apache.hadoop.hive.ql.udf.generic.GenericUDF

First example below is simple one which can be used with hadoop primitive types.
Second example is bit complex as this can used with complex types arrays, maps etc. 

package hive;

import org.apache.hadoop.hive.ql.exec.UDF;

public class SimpleUDFExample extends UDF {

       public Text evaluate(Text input) {
              if(input ==null)
                     return null;
              return new Text("Hello "+input.toString());


package hive;

import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

public class ComplexUDFExample extends GenericUDF {

       ListObjectInspector listOI;
       StringObjectInspector elementOI;

       public Object evaluate(DeferredObject[] arguments) throws HiveException {
              // get the list and string from the deferred objects using the object inspectors
           List<Text> list = (List<Text>) this.listOI.getList(arguments[0].get());
           String arg = elementOI.getPrimitiveJavaObject(arguments[1].get());
           // check for nulls
           if (list == null || arg == null) {
             return null;
        // see if our list contains the value we need
           for(Text s: list) {
               if (arg.equals(s.toString())) return new Boolean(true);
           return new Boolean(false);

       public String getDisplayString(String[] arg0) {
              return "arrayContainsExample()";

       public ObjectInspector initialize(ObjectInspector[] arguments)
                     throws UDFArgumentException {
              if (arguments.length != 2) {
                     throw new UDFArgumentLengthException(
                                  "arrayContainsExample only takes 2 arguments: List<T>, T");

              ObjectInspector a = arguments[0];
              ObjectInspector b = arguments[1];

              if (!(a instanceof ListObjectInspector)
                           || !(b instanceof StringObjectInspector)) {
                     throw new UDFArgumentException(
                                  "first argument must be a list / array, second argument must be a string");
              this.listOI = (ListObjectInspector) a;
           this.elementOI = (StringObjectInspector) b;
           if(!(listOI.getListElementObjectInspector() instanceof StringObjectInspector)){
              throw new UDFArgumentException("first argument must be a list of strings");
           // the return type of our function is a boolean, so we provide the correct object inspector
           return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;


Compile above code and register the jar file and use the UDF's as below:

hive> add jar
hive> /root/hadoop-examples.jar;
hive> create temporary function helloworld as 'hive.SimpleUDFExample';
hive> create temporary function containsString as 'hive.ComplexUDFExample';
hive> select helloworld(salary) from sample_07;
hive> select containsString(a.col1, a.col2) from ( select array("a", "b", "c") as col1 , "q" as col2  from sample_07 limit 1 ) a;


