Monday, July 8, 2019

Hive Partitioning


//create a stage table with out partition.
hive> drop table emp_det_stage;
OK
Time taken: 0.079 seconds

hive> create table emp_det_stage(name string,dept string,exp int, loc string) row format delimited fields terminated by ',';
OK
Time taken: 0.088 seconds

hive> Load data local Inpath "/home/hadoop/Partition.csv" overwrite into table emp_det_stage;
Loading data to table default.emp_det_stage
OK
Time taken: 0.42 seconds
//view the loaded data
hive> select * from emp_det_stage;
OK
emp_det_stage.name      emp_det_stage.dept      emp_det_stage.exp       emp_det_stage.loc
Kartheek        BI      5       Hyd
Raj     Apps    5       Mas
mahesh  BI      5       Hyd
Denesh  BI      6       Hyd
Rajesh  Frontend        7       KOL
Time taken: 0.125 seconds, Fetched: 5 row(s)

//create an actual table with static partition:

hive> create table emp_det_part(name string,dept string,exp int) partitioned by (loc string);
OK
Time taken: 0.067 seconds
hive>  insert overwrite table emp_det_part partition(loc='Hyd') select name,dept,exp from emp_det_stage where loc='Hyd';

//verify data
hive> dfs -ls /user/hive/warehouse/emp_det_part/loc=Hyd;
Found 1 items
-rwxrwxrwt   1 hadoop hadoop         38 2019-07-08 12:08 /user/hive/warehouse/emp_det_part/loc=Hyd/000000_0

//create an actual table with dynamic partition:

hive> set hive.exec.dynamic.partition.mode=nonstrict;
hive> insert overwrite table emp_det_part partition(loc) select * from emp_det_stage;

//verify data files
hive> dfs -ls /user/hive/warehouse/emp_det_part/
    > ;
Found 3 items
drwxrwxrwt   - hadoop hadoop          0 2019-07-08 12:10 /user/hive/warehouse/emp_det_part/loc=Hyd
drwxrwxrwt   - hadoop hadoop          0 2019-07-08 12:10 /user/hive/warehouse/emp_det_part/loc=KOL
drwxrwxrwt   - hadoop hadoop          0 2019-07-08 12:10 /user/hive/warehouse/emp_det_part/loc=Mas




No comments:

Post a Comment