腾讯数据分析面试题 求解

 

1、这题重点在后面,汇总和分地区数据呈现在一张表中,需要用到窗口函数来处理
这里统计日期不太明确,是说哪个日期呢?我这里就写一个不限制日期的吧

select region
    ,subject
    ,pay_cnt
    ,total_cnt
    ,pay_cnt/total_cnt as pay_rate   -- 不同地区不同科目的续报率
    ,region_pay_cnt
    ,region_total_cnt
    ,region_pay_cnt/region_total_cnt as region_pay_rate -- 不同地区的续报率
    ,subject_pay_cnt
    ,subject_total_cnt
    ,subject_pay_cnt/subject_total_cnt as subject_pay_rate  -- 不同科目的续报率
from
(
    select region
        ,subject
        ,pay_cnt
        ,total_cnt
        ,sum(pay_cnt) over(partition by region order by subject rows between unbounded preceding and unbounded following) as region_pay_cnt  -- 窗口函数,计算该分区内所有数据
        ,sum(total_cnt) over(partition by region order by subject rows between unbounded preceding and unbounded following) as region_total_cnt
        ,sum(pay_cnt) over(partition by subject order by region rows between unbounded preceding and unbounded following) as subject_pay_cnt
        ,sum(total_cnt) over(partition by subject order by region rows between unbounded preceding and unbounded following) as subject_total_cnt
    from
    (
        select region
            ,subject
            ,sum(pay_cnt) as pay_cnt
            ,sum(total_cnt) as total_cnt
        from 
        ( 
            select teacher_name
                ,teacher_id
                ,region
                ,course_id
                ,subject
            from teacher
        ) t1
        left join
        ( -- 用户续报表计算续报数量到老师id
            select teacher_id
                ,sum(is_pay) as pay_cnt  -- 续报数
                ,count(is_pay) as total_cnt -- 总数
            from user 
            group by teacher_id
        )t2
        on t1.teacher_id = t2.teacher_id --通过老师id关联取地区和科目
        group by region
            ,subject
    ) t
) t