博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Hive常用函数 傻瓜学习笔记 附完整示例
阅读量:4316 次
发布时间:2019-06-06

本文共 5347 字,大约阅读时间需要 17 分钟。

  • 创建表

drop table if exists mydatabase.test;

create table mydatabase.test
    (id int, name string, timestring string, salary double, bonus double)
    row format delimited
    fields terminated by '\t'
    stored as textfile;

 

  • 插入数据

方式1

vim test.txt
hadoop fs -mkdir test
hadoop fs -put test.txt /user/myname/test
load data inpath '/user/myname/test'
    overwrite into table mydatabase.test;
方式2
insert into mydatabase.test values
    (1,' J ','2018-01-08 10:11:32',128.54,-45.23),
    (2,' J ','2018-02-09 10:51:12',128.54,-78.25),
    (3,' J ','2018-03-05 11:22:21',128.52,null),
    (4,' J ','2018-04-08 15:40:51',256.23,345.23),
    (5,' J ','2018-05-08 10:21:21',128.54,267.12),
    (6,' J ','2018-06-08 10:00:50',256.27,-78.49),
    (7,'Rose','2018-01-08 10:11:32',512.65,-76.44),
    (8,'Rose','2018-02-09 10:51:12',512.54,-45.30),
    (9,'Rose','2018-03-05 11:22:21',512.13,-87.09),
    (10,'Rose','2018-04-08 15:40:51',512.34,19.12),
    (11,'Dickson','2018-01-08 10:21:21',256.87,null),
    (12,'Dickson','2018-02-08 10:00:50',256.52,null),
    (13,'Dickson','2018-04-08 11:00:00',256.12,3.69);

 

  • 数学函数

四舍五入

select id, round(salary) from mydatabase.test;
四舍五入,小数保留
select id, round(salary, 1) from mydatabase.test;
向下,向上取整
select id, floor(salary), ceil(salary) from mydatabase.test;
随机数(0~1)
select id, salary*(1+rand()*0.1) from mydatabase.test;
指数,对数,取模
select id, pow(e(), salary), log(e(), salary), pmod(id, 3) from mydatabase.test;
绝对值,最大值,最小值
select id, abs(bonus), greatest(salary, bonus), least(salary, bonus) from mydatabase.test;

 

  • 类型转换函数

select id, cast(salary as int) from mydatabase.test;

 

  • 日期函数

当前时间

select id, name, unix_timestamp() from mydatabase.test;
时间戳转换为字符串
select id, name, from_unixtime(unix_timestamp(), 'yyyy-MM-dd hh:mm:ss') from mydatabase.test;
字符串转换为时间戳
select id, name, unix_timestamp('2019-02-13 11:22:33') from mydatabase.test;
字符串转换为时间戳
select id, name, unix_timestamp('20190213 11:22:33', 'yyyyMMdd HH:mm:ss') from mydatabase.test;
时间子元素
select id, name, to_date(timestring), year(timestring), month(timestring), day(timestring), hour(timestring), minute(timestring), second(timestring) from mydatabase.test;

 

  • 条件函数

IF条件

select id, if(bonus > 0, 'yes', 'no') from mydatabase.test;
NULL判断
select id, isnull(bonus) from mydatabase.test;
NULL条件,第二参数为默认值
select id, nvl(bonus, 0) from mydatabase.test;
非空查找函数
select id, coalesce(bonus, 0, null) from mydatabase.test;
CASE匹配条件
select id, name,
    (case name
    when 'Jack' then 'A'
    when 'Rose' then 'B'
    else 'C'
    end)
    from mydatabase.test;
CASE搜索条件
select id, salary, bonus,
    (case
    when salary > 500 then 'A'
    when salary > 100 and bonus > 0 then 'B'
    else 'C'
    end)
    from mydatabase.test;

 

  • 聚合函数

去重

select distinct(name) from mydatabase.test;
计数
select count(*) from mydatabase.test;
条件计数
select count(bonus > 0) from mydatabase.test;
求和,求平均,最大,最小,方差
select name, sum(salary), avg(salary), min(salary), max(salary), variance(salary) from mydatabase.test group by name;
生成列表
select name, collect_list(salary) from mydatabase.test group by name;
生成非重列表
select name, collect_set(salary) from mydatabase.test group by name;

 

  • 字符串函数

长度

select name, length(name) from mydatabase.test;
查找
select name, locate('o', name) from mydatabase.test;
左填充,右填充
select name, lpad(name, 4, '_'), rpad(name, 4, '_') from mydatabase.test;
去除左空格,去除右空格,去除左右空格
select name, ltrim(name), rtrim(name), trim(name) from mydatabase.test;
字符距离
select n1, n2, levenshtein(n1, n2) from
    (select distinct(name) as n1 from mydatabase.test)db0
    join
    (select distinct(name) as n2 from mydatabase.test)db1
    on n1 != n2;
分割
select name, split(timestring, '-') from mydatabase.test;
子字符串
select substr(name, 1, -1), substr(timestring, -8) from mydatabase.test;
替换(注意转义替换可能需要四个斜杆)
select regexp_replace(timestring, '\\d+-\\d+-\\d+', '###') from mydatabase.test;
提取(注意转义替换可能需要四个斜杆)
select regexp_extract(timestring, '\\d+', 1) from mydatabase.test;
拼接
select name, concat(year(timestring), '|', month(timestring), '|', cast(salary as string)) from mydatabase.test;
拼接列表
select name,
    concat_ws('|',
    collect_list(cast(salary as string))
    ) from mydatabase.test group by name;
拼接列表(有序)
select name,
    concat_ws('|',
    sort_array(
    collect_list(cast(salary as string))
    )) from mydatabase.test group by name;

 

  • 生成函数

EXPLODE

select id, part from mydatabase.test lateral view explode(split(timestring,' ')) t as part;

 

  • 选择函数

IN

select id, name from mydatabase.test where name in('Dickson', 'Rose');

 

  • 分组排序函数

ROW_NUMBER

select id, name, salary, row_number() over(partition by name order by salary desc) rank from mydatabase.test;
RANK
select id, name, salary, rank() over(partition by name order by salary desc) rank from mydatabase.test;
DENSE_RANK
select id, name, salary, dense_rank() over(partition by name order by salary desc) rank from mydatabase.test;

 

  • 分组函数

GROUPING SETS

select month, day, sum(salary) from
    (select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
    group by month, day grouping sets(month, (month, day)) order by month asc, day asc;
CUBE
select month, day, sum(salary) from
    (select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
    group by month, day with cube order by month asc, day asc;
ROLLUP
select month, day, sum(salary) from
    (select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
    group by month, day with rollup order by month asc, day asc;

 

 

参考文献:

转载于:https://www.cnblogs.com/jhc888007/p/11085012.html

你可能感兴趣的文章
使用jquery去掉时光轴头尾部的线条
查看>>
算法(转)
查看>>
IT职场人生系列之十五:语言与技术II
查看>>
如何在FreePBX ISO 中文版本安装讯时网关,潮流16FXS 网关和潮流话机
查看>>
基于Wolfpack开发业务监控系统
查看>>
通过Jexus 部署 dotnetcore版本MusicStore 示例程序
查看>>
程序员最常见的谎话和我自己的理解
查看>>
mine 数据
查看>>
poj2728 Desert King
查看>>
三个和尚的故事 与 项目机构管理
查看>>
Excel 日期转换
查看>>
js中的NaN、Infinity、null和undefined
查看>>
Runtime
查看>>
struts2 if标签示例
查看>>
Animate CSS
查看>>
.NET安全审核检查表
查看>>
application.properties数据库敏感信息加密这么简单?
查看>>
Language Codes: ISO 639, Microsoft and Macintosh
查看>>
centos6 x64安装elasticsearch5.5.2启动报错
查看>>
公司拷贝回家的工程用sts导入clean package报错java.lang.NoClassDefFoundError
查看>>