- Array
$ vi arrayfile
1,abc,40000,a$b$c,hyd
2,def,3000,d$f,bang
3,abc,40000,a$b$c,hyd
4,def,3000,d$f,bang
5,abc,40000,a$b$c,hyd
6,def,3000,d$f,bang
7,abc,40000,a$b$c,hyd
8,def,3000,d$f,bang
9,abc,40000,a$b$c,hyd
10,def,3000,d$f$d$e$d$e$e$r$g,bang
|
hive> create table array_tab (id int, name string, salary bigint,
sub array<string>, city string)
> row format delimited
> fields terminated by
','
> collection items
terminated by '$';
|
hive> load data local inpath '/root/arrayfile' into table
array_tab;
|
hive> select * from array_tab;
OK
1 abc 40000
["a","b","c"] hyd
2 def 3000
["d","f"]
bang
3 abc 40000
["a","b","c"] hyd
4 def 3000
["d","f"]
bang
5 abc 40000
["a","b","c"] hyd
6 def 3000
["d","f"]
bang
7 abc 40000
["a","b","c"] hyd
8 def 3000
["d","f"]
bang
9 abc 40000
["a","b","c"] hyd
10 def 3000
["d","f","d","e","d","e","e","r","g"] bang
|
hive> describe array_tab;
OK
id int
name string
salary bigint
sub
array<string>
city string
Time taken: 0.79 seconds, Fetched: 5 row(s)
|
hive> select sub[0] from array_tab where id=1;
….
OK
a
|
- Map
$ vi mapfile
1,abc,40000,a$b$c,pf#500$epf#200,hyd
2,def,3000,d$f,pf#500,bang
2,abc,40000,a$b$c,pf#500$epf#200,hyd
3,def,3000,d$f,pf#500,bang
4,abc,40000,a$b$c,pf#500$epf#200,hyd
5,def,3000,d$f,pf#500,bang
6,abc,40000,a$b$c,pf#500$epf#200,hyd
7,def,3000,d$f,pf#500,bang
8,abc,40000,a$b$c,pf#500$epf#200,hyd
|
hive> create table arr_map_tab (id int, name string, salary
bigint, sub array<string>, details map<string, int>, city string)
> row format delimited
> fields terminated by
','
> collection items
terminated by '$'
> map keys terminated by
'#';
|
hive> load data local inpath 'mapfile' into table arr_map_tab;
|
hive> select * from arr_map_tab;
OK
1 abc 40000
["a","b","c"] {"pf":500,"epf":200} hyd
2 def 3000
["d","f"] {"pf":500} bang
2 abc 40000
["a","b","c"] {"pf":500,"epf":200} hyd
3 def 3000
["d","f"]
{"pf":500} bang
4 abc 40000
["a","b","c"] {"pf":500,"epf":200} hyd
5 def 3000
["d","f"] {"pf":500} bang
6 abc 40000
["a","b","c"] {"pf":500,"epf":200} hyd
7 def 3000
["d","f"]
{"pf":500} bang
8 abc 40000
["a","b","c"] {"pf":500,"epf":200} hyd
Time taken: 2.04 seconds, Fetched: 9 row(s)
|
hive> describe arr_map_tab;
OK
id int
name string
salary bigint
sub
array<string>
details
map<string,int>
city string
Time taken: 0.838 seconds, Fetched: 6 row(s)
|
hive> select details["pf"] from arr_map_tab limit 1;
…
…
OK
500
Time taken: 33.805 seconds, Fetched: 1 row(s)
|
- Struct
$ vi structfile
1,abc,40000,a$b$c,pf#500$epf#200,hyd$ap$500001
2,def,3000,d$f,pf#500,bang$kar$600038
4,abc,40000,a$b$c,pf#500$epf#200,bhopal$MP$452013
5,def,3000,d$f,pf#500,Indore$MP$452014
|
hive> create table arr_map_struct_tab (id int, name string, salary
bigint, sub array<string>, details map<string, int>, address
struct<city:string, state:string, pin:int>)
> row format delimited
> fields terminated by ',' > collection items terminated by '$' > map keys terminated by #'; OK
Time taken: 4.982 seconds
|
hive> describe arr_map_struct_tab;
OK
id int
name string
salary bigint
sub
array<string>
details
map<string,int>
address
struct<city:string,state:string,pin:int>
Time taken: 1.416 seconds, Fetched: 6 row(s)
|
hive> load data local inpath 'structfile' into table
arr_map_struct_tab;
|
hive> select * from arr_map_struct_tab;
OK
1 abc 40000
["a","b","c"] {"pf":500,"epf":200}
{"city":"hyd","state":"ap","pin":500001}
2 def 3000
["d","f"]
{"pf":500}
{"city":"bang","state":"kar","pin":600038}
4 abc 40000
["a","b","c"] {"pf":500,"epf":200}
{"city":"bhopal","state":"MP","pin":452013}
5 def 3000
["d","f"]
{"pf":500}
{"city":"Indore","state":"MP","pin":452014}
Time taken: 1.226 seconds, Fetched: 4 row(s)
|
hive> select address.city from arr_map_struct_tab where
details["pf"]="500" and sub[0]="a";
…
…
OK
hyd
bhopal
Time taken: 20.286 seconds, Fetched: 2 row(s)
|
- Uniontype
hive> CREATE TABLE union_tab(col1 UNIONTYPE<INT, DOUBLE,
STRING, ARRAY<string>, STRUCT<a:INT,b:string>>)
> row format delimited
> fields terminated by ','
> COLLECTION ITEMS TERMINATED BY '|'
> LINES TERMINATED BY '\n';
OK
Time taken: 2.356 seconds
|
$ vi unionfile
0|1
0|12
1|1.234
1|2.3456
2|dinesh
2|Dinesh Sachdev
|
hive> load data local inpath 'unionfile' overwrite into
table union_tab;
|
hive> select * from union_tab;
OK
{0:1}
{0:12}
{1:1.234}
{1:2.3456}
{2:"dinesh"}
{2:"Dinesh Sachdev"}
Time taken: 1.211 seconds, Fetched: 6 row(s)
|
It becomes quiet simple to load data into uniontype for
primitives. But what about complex types? For example if we edit ‘unionfile’
and append an array:
$vi unionfile
0|1
0|12
1|1.234
1|2.3456
2|dinesh
2|Dinesh Sachdev
3|din|esh|sach|dev
|
hive> load data local inpath 'unionfile' overwrite into
table union_tab;
|
hive> select * from union_tab;
OK
{0:1}
{0:12}
{1:1.234}
{1:2.3456}
{2:"dinesh"}
{2:"Dinesh Sachdev"}
{3:["din|esh|sach|dev"]}
Time taken: 1.11 seconds, Fetched: 7 row(s)
There is only a single element in array whereas we expected to
have array of 4 strings [“din”,”esh”,”sach”,”dev”]
For this we can use create_union UDF:
hive> insert into table union_tab
> select create_union(4,1, 1.02,"d",
array("d","f"), named_struct('a',1, 'b','dinesh'))
from sample_07 limit 1;
... ...
hive> insert into table union_tab
> select create_union(3,1,
1.02,"d", array("d","f"),
named_struct('a',1, 'b','dinesh')) from sample_07 limit 1;
|
hive> select * from union_tab;
OK {4:{"a":1,"b":"dinesh"}} {3:["d","f"]} {0:1} {0:12} {1:1.234} {1:2.3456} {2:"dinesh"} {2:"Dinesh Sachdev"} {3:["din|esh|sach|dev"]} Time taken: 0.064 seconds, Fetched: 9 row(s) |
boss, how can I query an union? suppose say I want to say "select from union_tab where "
ReplyDelete