1.创建一个简单的maven项目
2.pom文件设置需要的依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>hiveTest</artifactId>
<version>1.0-SNAPSHOT</version>
<!--根据要连接的hadoop和hive,设置与自己集群版本匹配的版本-->
<properties>
<hadoop.version>2.6.0-cdh5.13.3</hadoop.version>
<hive.version>1.1.0-cdh5.13.3</hive.version>
</properties>
<!--因为使用cdh的hadoop和hive,因此要添加cdh官方的repository,才能够下载相对应的依赖包-->
<!--如果使用apache版本的hadoop和hive,则不需要添加-->
<repositories>
<repository>
<id>cloudera</id>
<url>http://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
3.具体实现代码
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.io.Text;
public class HelloUDF01 extends UDF{
public Text evaluate(Text input){
return new Text("hello" +':'+ input);
}
public static void main(String[] args) {
HelloUDF01 udf =new HelloUDF01();
Text result = udf.evaluate(new Text("zhangsan"));
System.out.println(result.toString());
}
}
4.打包jar
view->tool_windows->build->package->run
5.上传jar到hdfs目录
hdfs dfs -put /home/hadoop/read_xwf/hiveTest-1.0-SNAPSHOT.jar /user/xin/
6.注册udf函数
create function func.HelloUDF as 'HelloUDF01' using jar 'hdfs://nameservice/user/xin/hiveTest-1.0-SNAPSHOT.jar';
as前面是自己定义的函数名,as后面是自己代码的方法名(见下面的截图),jar后面是jar的具体位置要补全(可以从建表语句中的location看到具体的位置,复制到前面即可)
7.测试
select 'lisi',func.HelloUDF('lisi');