Fork me on GitHub

加载json文件到hive表

struct类型应用

准备json文件

simple.json

1
{"foo":"abc","bar":"200901011000000","quux":{"quuxid":1234,"quuxname":"sam"}}

添加hive-hcatalog-core.jar包

1
add jar /home/hadoop/hive/lib/hive-hcatalog-core.jar

建立测试表

1
2
3
4
5
6
7
8
9
create database if not exists mytest;
use mytest;
create table if not exists my_table(
foo string,
bar string,
quux struct<quuxid:int,quuxname:string>
)
row format serde 'org.apache.hive.hcatlog.data.JsonSerde'
stored as textfile;

装载数据

1
load data local inpath '/home/hadoop/data/simple.json' into table my_table;

查询

1
select foo, bar,quux.quuxid,quux.quuname from my_table;

sstruct结合array类型应用

准备json文件

complex.json

1
{"docid":"abc","user":{"id":123,"username":"saml1234","name":"sam","shippingaddress":{"address1":"123mainst","address2:""","city":"durham","state":"nc"},"orders":[{"itemid":6789,"orderdate":"11/11/2012"},{"itemid":4352,"orderdate":"12/12/2012"}]}}

添加hive-hcatalog-core.jar包

1
add jar /home/hadoop/hive/lib/hive-hcatalog-core.jar

建立测试表

1
2
3
4
5
6
7
8
9
10
11
12
13
use mytest;
create table if not exists complex_json(
docid string,
user struct<id: int,
username: string,
shippingaddress:struct<address1:string
address2: string,
city: string,
state: string>,
orders:array<struct<itemid:int,orderdate:String>>>
)
row format serde 'org.apache.hive.hcatlog.data.JsonSerde'
stored as textfile;

装载数据

1
load data local inpath '/home/hadoop/data/complex.json' overwrite table complex_json;

查询

1
2
select docid,user.id,user.shippingaddress.city as city ,user.orders[0].itemid as order0id,user.orders[1].itemid as order1ib    
from complex_json;

动态map类型应用

json文件

a.json

1
2
3
4
5
{"conflict":{"liveid":123,"zhuboid":"456","media":789,"proxy":"ac","result":10000}}
{"conflict":{"liveid":123,"zhuboid":"456","media":789,"proxy":"ac"}}
{"conflict":{"liveid":123,"zhuboid":"456","media":789}}
{"conflict":{"liveid":123,"zhuboid":"456"}}
{"conflict":{"liveid":123}}

添加hive-hcatalog-core.jar包

1
add jar /home/hadoop/hive/lib/hive-hcatalog-core.jar

建立测试表

1
2
3
4
5
6
use mytest;
create table if not exists json_table(
conflict map<string,string>
)
row format serde 'org.apache.hive.hcatlog.data.JsonSerde'
stored as textfile;

查询

1
2
select * from json_table;   
select conflict['media'] from json_table;

本文标题:加载json文件到hive表

文章作者:tang

发布时间:2018年07月28日 - 00:07

最后更新:2018年07月28日 - 08:07

原始链接:https://tgluon.github.io/2018/07/28/加载json文件到hive表/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

-------------本文结束感谢您的阅读-------------