hive 通过shell插入数据缺失

同一句insert overwrite sql往DBeaver内执行跟使用shell脚本执行写入的数据量不一样,shell写入的数据约比DBeaver内直接执行少了一半:

insert overwrite table dws_jxyl_nps_analyse.dws_jxyl_question_answer_detail_test1 partition(p_date = 'all')
select 
  count(1)
from 
(
select 
  answer_id,
  form_id,
  b.form_name,
  union_id,
  user_tel,
  user_name,
  user_idcard,
  drjzsyks,
  drszks,
  drszjdys,
  drszzd,
  question,
  control_type,
  answer,
  answer_index,
  c.third_org_id as org_id,
  d.name as org_name,
  create_time,
  create_date,
  update_time,
  update_date,
  matrix_selection_title,
  matrix_selection_subtitle,
  matrix_selection_answer,
  concat(min_date_of_week_natural, ' ~ ', max_date_of_week_natural) as aggr_week
from 
(
  --+++++++++++++++++++++++++++++++++++解析checkbox控件普通选项+++++++++++++++++++++++++++++++++++++++++
  select
    answer_id,
    form_id,
    union_id,
    user_tel,
    user_name,
    user_idcard,
    drjzsyks,
    drszks,
    drszjdys,
    drszzd,
    title as question,
    control_type,
    answer,
    cast(value_index as int) as answer_index,
    create_time,
    create_date,
    update_time,
    update_date,
    '' as matrix_selection_title,
    '' as matrix_selection_subtitle,
    '' as matrix_selection_answer
  from
  (
    select
      answer_id,
      form_id,
      union_id,
      user_tel,
      user_name,
      user_idcard,
      drjzsyks,
      drszks,
      drszjdys,
      drszzd,
      title,
      q_describe,
      control_type,
      enable_option_assignment,
      data_type,
      option_type,
      question_options,
      value,
      create_time,
      create_date,
      update_time,
      update_date
    from
    (
      select
        t1_1.id as answer_id,
        t1_1.form_id,
        t1_1.union_id,
        user_tel,
        user_name,
        user_idcard,
        drjzsyks,
        drszks,
        drszjdys,
        drszzd,
        GET_JSON_OBJECT(questions,'$.base.title') as title,
        GET_JSON_OBJECT(questions,'$.base.describe') as q_describe,
        GET_JSON_OBJECT(questions,'$.base.controlType') as control_type,
        GET_JSON_OBJECT(questions,'$.data.enableOptionAssignment') as enable_option_assignment,
        question_options,
        data_type,
        option_type,
        regexp_replace(GET_JSON_OBJECT(questions,'$.data.value'),'\\[|\\"|\\]','')  as value,
        create_time,
        create_date,
        update_time,
        update_date
      from
      (
        select
          za.id,
          za.form_id,
          union_id,
          GET_JSON_OBJECT(user_info,'$.mobile') as user_tel,
          GET_JSON_OBJECT(user_info,'$.nickname') as user_name,
          GET_JSON_OBJECT(user_info,'$.idCard') as user_idcard,
          GET_JSON_OBJECT(user_info,'$.drjzsyks') as drjzsyks,
          GET_JSON_OBJECT(user_info,'$.drszks') as drszks,
          GET_JSON_OBJECT(user_info,'$.drszjdys') as drszjdys,
          GET_JSON_OBJECT(user_info,'$.drszzd') as drszzd,
          questions,
          GET_JSON_OBJECT(questions,'$.data.type') as data_type,
          GET_JSON_OBJECT(questions,'$.data.options.type') as option_type,
          GET_JSON_OBJECT(questions,'$.data.options.value') as question_options,
          create_time,
          substring(create_time,1,10) as create_date,
          update_time,
          substring(update_time,1,10) as update_date
        from
          ods_formdb.ods_answer za
        lateral view default.explodejsonarray(GET_JSON_OBJECT(answer_content,'$.question')) qd as questions
      ) t1_1
    ) t
    where
      control_type = 'checkbox'
  ) t
  lateral view explode(split(value,',')) v as answer
  lateral view posexplode(split(question_options,',')) qo as value_index,option_value
where
  answer = regexp_replace(option_value,'\\[|\\"|\\]','')
) a

left join 

(
  select
    id,
    client_id,
    form_name
  from
    ods_formdb.ods_form
) b
on a.form_id = b.id
left join 
(
  select
    id,
    third_org_id
  from
    ods_formdb.ods_client
) c
on b.client_id = c.id
left join
(
  select
    id,
    name
  from
    ods_crm_db.ods_crm_jinxin_organization
) d
on c.third_org_id = d.id
left join
  ods_jxyl_smartbi_config_map.ods_date_info_map e
on a.create_date = e.date_str

脚本如下:

hive_sql=$(cat <<!EOF
select 
  answer_id,
  form_id,
  b.form_name,
  union_id,
  user_tel,
  user_name,
  user_idcard,
  drjzsyks,
  drszks,
  drszjdys,
  drszzd,
  question,
  control_type,
  answer,
  answer_index,
  c.third_org_id as org_id,
  d.name as org_name,
  create_time,
  create_date,
  update_time,
  update_date,
  matrix_selection_title,
  matrix_selection_subtitle,
  matrix_selection_answer,
  concat(min_date_of_week_natural, ' ~ ', max_date_of_week_natural) as aggr_week
from 
(
  select
    answer_id,
    form_id,
    union_id,
    user_tel,
    user_name,
    user_idcard,
    drjzsyks,
    drszks,
    drszjdys,
    drszzd,
    title as question,
    control_type,
    answer,
    cast(value_index as int) as answer_index,
    create_time,
    create_date,
    update_time,
    update_date,
    '' as matrix_selection_title,
    '' as matrix_selection_subtitle,
    '' as matrix_selection_answer
  from
  (
    select
      answer_id,
      form_id,
      union_id,
      user_tel,
      user_name,
      user_idcard,
      drjzsyks,
      drszks,
      drszjdys,
      drszzd,
      title,
      q_describe,
      control_type,
      enable_option_assignment,
      data_type,
      option_type,
      question_options,
      value,
      create_time,
      create_date,
      update_time,
      update_date
    from
    (
      select
        t1_1.id as answer_id,
        t1_1.form_id,
        t1_1.union_id,
        user_tel,
        user_name,
        user_idcard,
        drjzsyks,
        drszks,
        drszjdys,
        drszzd,
        GET_JSON_OBJECT(questions,'$.base.title') as title,
        GET_JSON_OBJECT(questions,'$.base.describe') as q_describe,
        GET_JSON_OBJECT(questions,'$.base.controlType') as control_type,
        GET_JSON_OBJECT(questions,'$.data.enableOptionAssignment') as enable_option_assignment,
        question_options,
        data_type,
        option_type,
        regexp_replace(GET_JSON_OBJECT(questions,'$.data.value'),'\\[|\\"|\\]','')  as value,
        create_time,
        create_date,
        update_time,
        update_date
      from
      (
        select
          za.id,
          za.form_id,
          union_id,
          GET_JSON_OBJECT(user_info,'$.mobile') as user_tel,
          GET_JSON_OBJECT(user_info,'$.nickname') as user_name,
          GET_JSON_OBJECT(user_info,'$.idCard') as user_idcard,
          GET_JSON_OBJECT(user_info,'$.drjzsyks') as drjzsyks,
          GET_JSON_OBJECT(user_info,'$.drszks') as drszks,
          GET_JSON_OBJECT(user_info,'$.drszjdys') as drszjdys,
          GET_JSON_OBJECT(user_info,'$.drszzd') as drszzd,
          questions,
          GET_JSON_OBJECT(questions,'$.data.type') as data_type,
          GET_JSON_OBJECT(questions,'$.data.options.type') as option_type,
          GET_JSON_OBJECT(questions,'$.data.options.value') as question_options,
          create_time,
          substring(create_time,1,10) as create_date,
          update_time,
          substring(update_time,1,10) as update_date
        from
          ods_formdb.ods_answer za
        lateral view default.explodejsonarray(GET_JSON_OBJECT(answer_content,'$.question')) qd as questions
      ) t1_1
    ) t
    where
      control_type = 'checkbox'
  ) t
  lateral view explode(split(value,',')) v as answer
  lateral view posexplode(split(question_options,',')) qo as value_index,option_value
where
  answer = regexp_replace(option_value,'\\[|\\"|\\]','')
) a

left join 

(
  select
    id,
    client_id,
    form_name
  from
    ods_formdb.ods_form
) b
on a.form_id = b.id
left join 
(
  select
    id,
    third_org_id
  from
    ods_formdb.ods_client
) c
on b.client_id = c.id
left join
(
  select
    id,
    name
  from
    ods_crm_db.ods_crm_jinxin_organization
) d
on c.third_org_id = d.id
left join
  ods_jxyl_smartbi_config_map.ods_date_info_map e
on a.create_date = e.date_str
limit 1000000000
;
!EOF
)

hive -e "${hive_sql}"