CREATE TABLE hive_hadoop3.zbaranow_db.collectd(

  dstype varchar ,
  host varchar ,
  interval double ,
  plugin varchar ,
  plugin_instance varchar ,
  time bigint ,
  type varchar ,
  type_instance varchar ,
  env varchar ,
  region varchar ,
  dc varchar ,
  value double ,
  value_instance varchar ,
  _id varchar ,
  availability_zone varchar ,
  event_timestamp bigint ,
  submitter_environment varchar ,
  submitter_hostgroup varchar ,
  timestamp bigint ,
  toplevel_hostgroup varchar ,
  version varchar ,
  year int,
  month int,
  day int
)
WITH (
  format = 'PARQUET',
  external_location = '/project/monitoring/collectd/database',
  partitioned_by = ARRAY['year','month','day']
);

use hive_hadoop3;

SELECT * FROM hive_hadoop3.zbaranow_db."collectd$partitions"

call system.sync_partition_metadata('zbaranow_db', 'collectd', 'ADD')

SELECT * FROM hive_hadoop3.zbaranow_db."collectd$partitions"


select count(*) from hive_hadoop3.zbaranow_db.collectd;

select month, count(*) from hive_hadoop3.zbaranow_db.collectd group by month;

select month, avg(value)/1024/1024 from hive_hadoop3.zbaranow_db.collectd where plugin='hadoop' and type_instance='BytesWritten' group by month ;

create table phoenix_hadoop3.default.collectd_hadoop WITH (  rowkeys = 'type_instance,time,submitter_hostgroup,host',  compression = 'SNAPPY',  salt_buckets = 20) as select host,plugin,plugin_instance,time,value as val,type_instance,submitter_hostgroup,submitter_environment from hive_hadoop3.zbaranow_db.collectd where plugin='hadoop';