同一句insert overwrite sql往DBeaver内执行跟使用shell脚本执行写入的数据量不一样,shell写入的数据约比DBeaver内直接执行少了一半:
insert overwrite table dws_jxyl_nps_analyse.dws_jxyl_question_answer_detail_test1 partition(p_date = 'all')
select
count(1)
from
(
select
answer_id,
form_id,
b.form_name,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
question,
control_type,
answer,
answer_index,
c.third_org_id as org_id,
d.name as org_name,
create_time,
create_date,
update_time,
update_date,
matrix_selection_title,
matrix_selection_subtitle,
matrix_selection_answer,
concat(min_date_of_week_natural, ' ~ ', max_date_of_week_natural) as aggr_week
from
(
--+++++++++++++++++++++++++++++++++++解析checkbox控件普通选项+++++++++++++++++++++++++++++++++++++++++
select
answer_id,
form_id,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
title as question,
control_type,
answer,
cast(value_index as int) as answer_index,
create_time,
create_date,
update_time,
update_date,
'' as matrix_selection_title,
'' as matrix_selection_subtitle,
'' as matrix_selection_answer
from
(
select
answer_id,
form_id,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
title,
q_describe,
control_type,
enable_option_assignment,
data_type,
option_type,
question_options,
value,
create_time,
create_date,
update_time,
update_date
from
(
select
t1_1.id as answer_id,
t1_1.form_id,
t1_1.union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
GET_JSON_OBJECT(questions,'$.base.title') as title,
GET_JSON_OBJECT(questions,'$.base.describe') as q_describe,
GET_JSON_OBJECT(questions,'$.base.controlType') as control_type,
GET_JSON_OBJECT(questions,'$.data.enableOptionAssignment') as enable_option_assignment,
question_options,
data_type,
option_type,
regexp_replace(GET_JSON_OBJECT(questions,'$.data.value'),'\\[|\\"|\\]','') as value,
create_time,
create_date,
update_time,
update_date
from
(
select
za.id,
za.form_id,
union_id,
GET_JSON_OBJECT(user_info,'$.mobile') as user_tel,
GET_JSON_OBJECT(user_info,'$.nickname') as user_name,
GET_JSON_OBJECT(user_info,'$.idCard') as user_idcard,
GET_JSON_OBJECT(user_info,'$.drjzsyks') as drjzsyks,
GET_JSON_OBJECT(user_info,'$.drszks') as drszks,
GET_JSON_OBJECT(user_info,'$.drszjdys') as drszjdys,
GET_JSON_OBJECT(user_info,'$.drszzd') as drszzd,
questions,
GET_JSON_OBJECT(questions,'$.data.type') as data_type,
GET_JSON_OBJECT(questions,'$.data.options.type') as option_type,
GET_JSON_OBJECT(questions,'$.data.options.value') as question_options,
create_time,
substring(create_time,1,10) as create_date,
update_time,
substring(update_time,1,10) as update_date
from
ods_formdb.ods_answer za
lateral view default.explodejsonarray(GET_JSON_OBJECT(answer_content,'$.question')) qd as questions
) t1_1
) t
where
control_type = 'checkbox'
) t
lateral view explode(split(value,',')) v as answer
lateral view posexplode(split(question_options,',')) qo as value_index,option_value
where
answer = regexp_replace(option_value,'\\[|\\"|\\]','')
) a
left join
(
select
id,
client_id,
form_name
from
ods_formdb.ods_form
) b
on a.form_id = b.id
left join
(
select
id,
third_org_id
from
ods_formdb.ods_client
) c
on b.client_id = c.id
left join
(
select
id,
name
from
ods_crm_db.ods_crm_jinxin_organization
) d
on c.third_org_id = d.id
left join
ods_jxyl_smartbi_config_map.ods_date_info_map e
on a.create_date = e.date_str
脚本如下:
hive_sql=$(cat <<!EOF
select
answer_id,
form_id,
b.form_name,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
question,
control_type,
answer,
answer_index,
c.third_org_id as org_id,
d.name as org_name,
create_time,
create_date,
update_time,
update_date,
matrix_selection_title,
matrix_selection_subtitle,
matrix_selection_answer,
concat(min_date_of_week_natural, ' ~ ', max_date_of_week_natural) as aggr_week
from
(
select
answer_id,
form_id,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
title as question,
control_type,
answer,
cast(value_index as int) as answer_index,
create_time,
create_date,
update_time,
update_date,
'' as matrix_selection_title,
'' as matrix_selection_subtitle,
'' as matrix_selection_answer
from
(
select
answer_id,
form_id,
union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
title,
q_describe,
control_type,
enable_option_assignment,
data_type,
option_type,
question_options,
value,
create_time,
create_date,
update_time,
update_date
from
(
select
t1_1.id as answer_id,
t1_1.form_id,
t1_1.union_id,
user_tel,
user_name,
user_idcard,
drjzsyks,
drszks,
drszjdys,
drszzd,
GET_JSON_OBJECT(questions,'$.base.title') as title,
GET_JSON_OBJECT(questions,'$.base.describe') as q_describe,
GET_JSON_OBJECT(questions,'$.base.controlType') as control_type,
GET_JSON_OBJECT(questions,'$.data.enableOptionAssignment') as enable_option_assignment,
question_options,
data_type,
option_type,
regexp_replace(GET_JSON_OBJECT(questions,'$.data.value'),'\\[|\\"|\\]','') as value,
create_time,
create_date,
update_time,
update_date
from
(
select
za.id,
za.form_id,
union_id,
GET_JSON_OBJECT(user_info,'$.mobile') as user_tel,
GET_JSON_OBJECT(user_info,'$.nickname') as user_name,
GET_JSON_OBJECT(user_info,'$.idCard') as user_idcard,
GET_JSON_OBJECT(user_info,'$.drjzsyks') as drjzsyks,
GET_JSON_OBJECT(user_info,'$.drszks') as drszks,
GET_JSON_OBJECT(user_info,'$.drszjdys') as drszjdys,
GET_JSON_OBJECT(user_info,'$.drszzd') as drszzd,
questions,
GET_JSON_OBJECT(questions,'$.data.type') as data_type,
GET_JSON_OBJECT(questions,'$.data.options.type') as option_type,
GET_JSON_OBJECT(questions,'$.data.options.value') as question_options,
create_time,
substring(create_time,1,10) as create_date,
update_time,
substring(update_time,1,10) as update_date
from
ods_formdb.ods_answer za
lateral view default.explodejsonarray(GET_JSON_OBJECT(answer_content,'$.question')) qd as questions
) t1_1
) t
where
control_type = 'checkbox'
) t
lateral view explode(split(value,',')) v as answer
lateral view posexplode(split(question_options,',')) qo as value_index,option_value
where
answer = regexp_replace(option_value,'\\[|\\"|\\]','')
) a
left join
(
select
id,
client_id,
form_name
from
ods_formdb.ods_form
) b
on a.form_id = b.id
left join
(
select
id,
third_org_id
from
ods_formdb.ods_client
) c
on b.client_id = c.id
left join
(
select
id,
name
from
ods_crm_db.ods_crm_jinxin_organization
) d
on c.third_org_id = d.id
left join
ods_jxyl_smartbi_config_map.ods_date_info_map e
on a.create_date = e.date_str
limit 1000000000
;
!EOF
)
hive -e "${hive_sql}"