关于#sql#的问题:基础信息:日活用户表table包含所有每日活跃的用户信息,每个登陆APP的用户会在table中生成一条记录

sql面试题计算活跃最多的是男性还是女性

基础信息:日活用户表table包含所有每日活跃的用户信息,每个登陆APP的用户会在table中生成一条记录。表结构如下
date(活跃日期) user_id(用户id) city(登陆城市) gender(性别) age_tier(年龄段)
2022-03-24 0321438232 BJ Male 0-12
2022-03-25 0321425463 SZ Female 25-30

题目1.求每天每个城市的用户次日留存率
题目2.求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?

该回答引用ChatGPT:题目1.求每天每个城市的用户次日留存率

可以通过以下SQL语句计算每天每个城市的用户次日留存率:

SELECT 
    date, city, 
    COUNT(DISTINCT user_id) AS active_users,
    COUNT(DISTINCT CASE WHEN EXISTS (
        SELECT 1 FROM table t2 
        WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY) 
        AND t2.user_id = t1.user_id
    ) THEN user_id END) AS next_day_retention,
    COUNT(DISTINCT CASE WHEN EXISTS (
        SELECT 1 FROM table t2 
        WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY) 
        AND t2.user_id = t1.user_id 
        AND t2.city = t1.city
    ) THEN user_id END) AS next_day_retention_same_city,
    COUNT(DISTINCT CASE WHEN EXISTS (
        SELECT 1 FROM table t2 
        WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY) 
        AND t2.user_id = t1.user_id 
        AND t2.city <> t1.city
    ) THEN user_id END) AS next_day_retention_diff_city,
    COUNT(DISTINCT CASE WHEN EXISTS (
        SELECT 1 FROM table t2 
        WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY) 
        AND t2.user_id = t1.user_id 
        AND t2.city = t1.city 
        AND t2.gender = t1.gender
    ) THEN user_id END) AS next_day_retention_same_city_gender,
    COUNT(DISTINCT CASE WHEN EXISTS (
        SELECT 1 FROM table t2 
        WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY) 
        AND t2.user_id = t1.user_id 
        AND t2.city = t1.city 
        AND t2.gender <> t1.gender
    ) THEN user_id END) AS next_day_retention_same_city_diff_gender
FROM table t1
GROUP BY date, city

题目2.求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?

可以通过以下SQL语句计算3月25日每个城市活跃用户数最多的性别:

SELECT 
    city,
    CASE WHEN COUNT(DISTINCT CASE WHEN gender = 'Male' THEN user_id END) > COUNT(DISTINCT CASE WHEN gender = 'Female' THEN user_id END) THEN 'Male' ELSE 'Female' END AS dominant_gender
FROM table
WHERE date = '2022-03-25'
GROUP BY city
#1
SELECT t1.date, t1.city, COUNT(DISTINCT t1.user_id) AS day1_users, COUNT(DISTINCT t2.user_id) AS day2_users, 
       COUNT(DISTINCT t2.user_id) / COUNT(DISTINCT t1.user_id) AS retention_rate
FROM your_table t1
LEFT JOIN your_table t2 ON t1.user_id = t2.user_id AND t1.date = DATEADD(day, -1, t2.date)
GROUP BY t1.date, t1.city;
#2
SELECT t.city, t.gender
FROM (
    SELECT city, gender, COUNT(DISTINCT user_id) AS active_users
    FROM your_table
    WHERE date = '2022-03-25'
    GROUP BY city, gender
) t
INNER JOIN (
    SELECT city, MAX(active_users) AS max_active_users
    FROM (
        SELECT city, gender, COUNT(DISTINCT user_id) AS active_users
        FROM your_table
        WHERE date = '2022-03-25'
        GROUP BY city, gender
    ) sub
    GROUP BY city
) max_users ON t.city = max_users.city AND t.active_users = max_users.max_active_users;

第一题:

 SELECT 
    date AS 活跃日期,
    city AS 城市,
    COUNT(DISTINCT user_id) AS 当日活跃用户数,
    SUM(CASE WHEN EXISTS (SELECT 1 FROM 日活用户表 t2 WHERE t1.user_id = t2.user_id AND t2.date = (t1.date + INTERVAL '1 day')) THEN 1 ELSE 0 END) AS 次日留存用户数,
    ROUND(SUM(CASE WHEN EXISTS (SELECT 1 FROM 日活用户表 t2 WHERE t1.user_id = t2.user_id AND t2.date = (t1.date + INTERVAL '1 day')) THEN 1 ELSE 0 END) * 100.0 / COUNT(DISTINCT user_id), 2) AS 次日留存率
FROM 
    日活用户表 t1
GROUP BY 
    date, city;

第二题:

WITH 每个城市活跃用户数 AS (
    SELECT 
        city,
        gender,
        COUNT(DISTINCT user_id) AS 活跃用户数
    FROM 
        日活用户表
    WHERE 
        date = '2022-03-25'
    GROUP BY 
        city, gender
), 每个城市活跃用户排序 AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY city ORDER BY 活跃用户数 DESC) AS 排名
    FROM 
        每个城市活跃用户数
)
SELECT 
    city AS 城市,
    CASE WHEN gender = 'Male' THEN '男性' ELSE '女性' END AS 最多的性别
FROM 
    每个城市活跃用户排序
WHERE 
    排名 = 1;

思路如下
题目1. 求每天每个城市的用户次日留存率

确定参考日期和对比日期。次日留存率是指在参考日期登录的用户中,在对比日期仍然登录的用户占比。
根据参考日期查询所有活跃用户的记录,并将它们的用户ID和城市保存下来。
根据对比日期查询相同城市中登录的用户,并统计其数量。
将对比日期登录的用户数量除以参考日期登录的用户数量,并乘以100,得到次日留存率。

题目2. 求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?

根据日期为2022-03-25查询所有活跃用户的记录。
对每个城市进行分组,并统计每个城市中不同性别的用户数量。
比较每个城市中不同性别的用户数量,找到每个城市活跃用户数最多的性别。

该回答引用百度ai功能

每天每个城市的用户次日留存率


SELECT 
    date, 
    city, 
    COUNT(DISTINCT user_id) AS active_users, 
    COUNT(DISTINCT CASE WHEN date = DATE_ADD(date, INTERVAL 1 DAY) THEN user_id END) AS retained_users,
    ROUND(COUNT(DISTINCT CASE WHEN date = DATE_ADD(date, INTERVAL 1 DAY) THEN user_id END) / COUNT(DISTINCT user_id) * 100, 2) AS retention_rate
FROM 
    table
GROUP BY 
    date, 
    city;

求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?


SELECT 
    city, 
    MAX(COUNT(DISTINCT user_id)) AS max_active_users, 
    CASE 
        WHEN COUNT(DISTINCT user_id) = MAX(COUNT(DISTINCT user_id)) THEN gender 
        ELSE NULL 
    END AS gender 
FROM 
    table 
WHERE 
    date = '2022-03-25' 
GROUP BY 
    city 
ORDER BY 
    MAX(COUNT(DISTINCT user_id)) DESC 
LIMIT 
    1;
select a.date, a.city , 
count(distinct a.user_id)*1.0/count(distinct b.user_id) as retention_rate
from table a 
join table b 
on a.user_id = b.user_id and a.date = b.date - 1
group by a.date, a.city