Hive SQL数据查询,子查询

hive SQL数据查询

当一个用户(uid)有Android和iOS的双端播放记录,这个用户(uid)的播放记录信息就是需要筛选出来
表table

ansbitresuidosbitsektscp
42335666android3454762545
6445253789ios345345265225
523453666ios34523424524
33223453android34524525645
24345666android652342342455
35623345android346243234243
53423345ios346243234243
63578123ios346243234243
38489345android346243234243
对表table进行处理,当用户uid的同时由OS中Android和iOS的使用,输出该uid的所有播放信息

输出
结果1:

ansbitresuidosbitsektscp
42335666android3454762545
523453666ios34523424524
24345666android652342342455
35623345android346243234243
53423345ios346243234243
38489345android346243234243

结果2:

uidtotal_playandroid_play_cntios_paly_cnt
666321
666321
select 
    count(os) as os_cnt,
    uid
from table
where day >= '2023-02-01' and day <= '2023-02-05'
group by uid;

select 
    uid,
    os
    --count(*) as os_count, 
    --sum(cnt) as total_playback_cnt,
    --sum(if(os = 'Android', cnt, 0)) as android_playback_cnt,
    --sum(if(os = 'iOS', cnt, 0)) as ios_playback_cnt
from table
where os_cnt >=2
group by 1
--order by 2 desc, 3 desc
limit 1000


运行结果及详细报错内容

img

用groupby 算出同一个 uid 下有几个 os,然后筛出 os_cnt >= 2

列不存在啊。
代码如下,望采纳,谢谢!
第一个结果表,查询明细:

select * from table where uid in(
select 
    uid
from table
group by uid
having count(distinct(os)) >= 2 
)

第一个结果表,方式二:

select * from table t1 where exists (
select 1 from table t2 where t1.uid=t2.uid and t1.os<> t2.os 
)

第二个结果表,查询统计:

select 
    uid, 
    count(*) as total_play,
    sum(case when lower(os) = 'android' then 1 else 0 end) as android_play_cnt,
    sum(case when lower(os) = 'ios' then 1 else 0 end) as ios_play_cnt
from table
group by uid
having count(distinct(os)) >= 2
order by total_play desc, android_play_cnt desc, ios_play_cnt desc
limit 1000

select uid,count(*) from table group by uid having count(*)>1

该回答引用ChatGPT
结果1

SELECT * FROM table WHERE uid = '666'

结果2


SELECT uid, 
       COUNT(*) AS total_play, 
       SUM(CASE WHEN os = 'android' THEN 1 ELSE 0 END) AS android_play_cnt, 
       SUM(CASE WHEN os = 'ios' THEN 1 ELSE 0 END) AS ios_paly_cnt 
FROM table 
WHERE uid = '666'
GROUP BY uid


-- 1 自连,用户id相等且播放设备不相等
select * from table a 
join table b 
on a.uid=b.uid and a.od <> b.os;

-- 2 先分别求出来,在求和
select a.uid ,android_play_cnt+ios_play_cnt total_play
,android_play_cnt,ios_play_cnt from 
(select uid,
count(os) android_play_cnt    
from table where os = 'android'
group by uid) a
join 
(select uid,
count(os) ios_play_cnt    
from table where os = 'ios'
group by uid) b
on a.uid =b.uid;

请问一下,为啥你的表名称叫做table,这不是一个关键词吗?
建议规范命名。
系统学一下:https://blog.csdn.net/weishuai90/category_12159920.html


select * from table t1 where exists (
select 1 from table t2 where t1.uid=t2.uid and t1.os<> t2.os 
)


select * from table t1 where exists (
select 
    1
from table t2
where t1.uid = t2.uid
group by uid
having count(distinct(os)) >= 2
)



select 
    uid, 
    count(*) as total_play,
    sum(case when lower(os) = 'android' then 1 else 0 end) as android_play_cnt,
    sum(case when lower(os) = 'ios' then 1 else 0 end) as ios_play_cnt
from table
group by uid
having count(distinct(os)) >= 2
order by total_play desc, android_play_cnt desc, ios_play_cnt desc
limit 1000