Hive UDF Examples

There are 2 ways to write UDF's in Hive by extending:

org.apache.hadoop.hive.ql.exec.UDF
org.apache.hadoop.hive.ql.udf.generic.GenericUDF

First example below is simple one which can be used with hadoop primitive types.
Second example is bit complex as this can used with complex types arrays, maps etc.

package hive;

import org.apache.hadoop.hive.ql.exec.UDF;

import org.apache.hadoop.io.Text;

public class SimpleUDFExample extends UDF {

public Text evaluate(Text input) {

if(input ==null)

return null;

return new Text("Hello "+input.toString());

}

package hive;

import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;

import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;

import org.apache.hadoop.hive.ql.metadata.HiveException;

import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;

import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

import org.apache.hadoop.io.Text;

public class ComplexUDFExample extends GenericUDF {

ListObjectInspector listOI;

StringObjectInspector elementOI;

@Override

public Object evaluate(DeferredObject[] arguments) throws HiveException {

// get the list and string from the deferred objects using the object inspectors

List<Text> list = (List<Text>) this.listOI.getList(arguments[0].get());

String arg = elementOI.getPrimitiveJavaObject(arguments[1].get());

// check for nulls

if (list == null || arg == null) {

return null;

}

// see if our list contains the value we need

for(Text s: list) {

if (arg.equals(s.toString())) return new Boolean(true);

}

return new Boolean(false);

}

@Override

public String getDisplayString(String[] arg0) {

return "arrayContainsExample()";

}

@Override

public ObjectInspector initialize(ObjectInspector[] arguments)

throws UDFArgumentException {

if (arguments.length != 2) {

throw new UDFArgumentLengthException(

"arrayContainsExample only takes 2 arguments: List<T>, T");

}

ObjectInspector a = arguments[0];

ObjectInspector b = arguments[1];

if (!(a instanceof ListObjectInspector)

|| !(b instanceof StringObjectInspector)) {

throw new UDFArgumentException(

"first argument must be a list / array, second argument must be a string");

}

this.listOI = (ListObjectInspector) a;

this.elementOI = (StringObjectInspector) b;

if(!(listOI.getListElementObjectInspector() instanceof StringObjectInspector)){

throw new UDFArgumentException("first argument must be a list of strings");

}

// the return type of our function is a boolean, so we provide the correct object inspector

return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;

}

Compile above code and register the jar file and use the UDF's as below:

hive> add jar

hive> /root/hadoop-examples.jar;

hive> create temporary function helloworld as 'hive.SimpleUDFExample';

hive> create temporary function containsString as 'hive.ComplexUDFExample';

hive> select helloworld(salary) from sample_07;

….

…

hive> select containsString(a.col1, a.col2) from ( select array("a", "b", "c") as col1 , "q" as col2 from sample_07 limit 1 ) a;

…..

…….

QueryDB

Search This Blog

Hive UDF Examples

Comments

Post a Comment

Popular posts

Hive Parse JSON with Array Columns and Explode it in to Multiple rows.

org.apache.spark.sql.AnalysisException: Cannot overwrite a path that is also being read from.;

Read from a hive table and write back to it using spark sql

Hadoop Distcp Error Duplicate files in input path

Scala Spark building Jar leads java.lang.StackOverflowError