sql面试题计算活跃最多的是男性还是女性
基础信息:日活用户表table包含所有每日活跃的用户信息,每个登陆APP的用户会在table中生成一条记录。表结构如下
date(活跃日期) user_id(用户id) city(登陆城市) gender(性别) age_tier(年龄段)
2022-03-24 0321438232 BJ Male 0-12
2022-03-25 0321425463 SZ Female 25-30
题目1.求每天每个城市的用户次日留存率
题目2.求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?
该回答引用ChatGPT:题目1.求每天每个城市的用户次日留存率
可以通过以下SQL语句计算每天每个城市的用户次日留存率:
SELECT
date, city,
COUNT(DISTINCT user_id) AS active_users,
COUNT(DISTINCT CASE WHEN EXISTS (
SELECT 1 FROM table t2
WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
AND t2.user_id = t1.user_id
) THEN user_id END) AS next_day_retention,
COUNT(DISTINCT CASE WHEN EXISTS (
SELECT 1 FROM table t2
WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
AND t2.user_id = t1.user_id
AND t2.city = t1.city
) THEN user_id END) AS next_day_retention_same_city,
COUNT(DISTINCT CASE WHEN EXISTS (
SELECT 1 FROM table t2
WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
AND t2.user_id = t1.user_id
AND t2.city <> t1.city
) THEN user_id END) AS next_day_retention_diff_city,
COUNT(DISTINCT CASE WHEN EXISTS (
SELECT 1 FROM table t2
WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
AND t2.user_id = t1.user_id
AND t2.city = t1.city
AND t2.gender = t1.gender
) THEN user_id END) AS next_day_retention_same_city_gender,
COUNT(DISTINCT CASE WHEN EXISTS (
SELECT 1 FROM table t2
WHERE t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
AND t2.user_id = t1.user_id
AND t2.city = t1.city
AND t2.gender <> t1.gender
) THEN user_id END) AS next_day_retention_same_city_diff_gender
FROM table t1
GROUP BY date, city
题目2.求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?
可以通过以下SQL语句计算3月25日每个城市活跃用户数最多的性别:
SELECT
city,
CASE WHEN COUNT(DISTINCT CASE WHEN gender = 'Male' THEN user_id END) > COUNT(DISTINCT CASE WHEN gender = 'Female' THEN user_id END) THEN 'Male' ELSE 'Female' END AS dominant_gender
FROM table
WHERE date = '2022-03-25'
GROUP BY city
#1
SELECT t1.date, t1.city, COUNT(DISTINCT t1.user_id) AS day1_users, COUNT(DISTINCT t2.user_id) AS day2_users,
COUNT(DISTINCT t2.user_id) / COUNT(DISTINCT t1.user_id) AS retention_rate
FROM your_table t1
LEFT JOIN your_table t2 ON t1.user_id = t2.user_id AND t1.date = DATEADD(day, -1, t2.date)
GROUP BY t1.date, t1.city;
#2
SELECT t.city, t.gender
FROM (
SELECT city, gender, COUNT(DISTINCT user_id) AS active_users
FROM your_table
WHERE date = '2022-03-25'
GROUP BY city, gender
) t
INNER JOIN (
SELECT city, MAX(active_users) AS max_active_users
FROM (
SELECT city, gender, COUNT(DISTINCT user_id) AS active_users
FROM your_table
WHERE date = '2022-03-25'
GROUP BY city, gender
) sub
GROUP BY city
) max_users ON t.city = max_users.city AND t.active_users = max_users.max_active_users;
第一题:
SELECT
date AS 活跃日期,
city AS 城市,
COUNT(DISTINCT user_id) AS 当日活跃用户数,
SUM(CASE WHEN EXISTS (SELECT 1 FROM 日活用户表 t2 WHERE t1.user_id = t2.user_id AND t2.date = (t1.date + INTERVAL '1 day')) THEN 1 ELSE 0 END) AS 次日留存用户数,
ROUND(SUM(CASE WHEN EXISTS (SELECT 1 FROM 日活用户表 t2 WHERE t1.user_id = t2.user_id AND t2.date = (t1.date + INTERVAL '1 day')) THEN 1 ELSE 0 END) * 100.0 / COUNT(DISTINCT user_id), 2) AS 次日留存率
FROM
日活用户表 t1
GROUP BY
date, city;
第二题:
WITH 每个城市活跃用户数 AS (
SELECT
city,
gender,
COUNT(DISTINCT user_id) AS 活跃用户数
FROM
日活用户表
WHERE
date = '2022-03-25'
GROUP BY
city, gender
), 每个城市活跃用户排序 AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY city ORDER BY 活跃用户数 DESC) AS 排名
FROM
每个城市活跃用户数
)
SELECT
city AS 城市,
CASE WHEN gender = 'Male' THEN '男性' ELSE '女性' END AS 最多的性别
FROM
每个城市活跃用户排序
WHERE
排名 = 1;
思路如下
题目1. 求每天每个城市的用户次日留存率
确定参考日期和对比日期。次日留存率是指在参考日期登录的用户中,在对比日期仍然登录的用户占比。
根据参考日期查询所有活跃用户的记录,并将它们的用户ID和城市保存下来。
根据对比日期查询相同城市中登录的用户,并统计其数量。
将对比日期登录的用户数量除以参考日期登录的用户数量,并乘以100,得到次日留存率。
题目2. 求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?
根据日期为2022-03-25查询所有活跃用户的记录。
对每个城市进行分组,并统计每个城市中不同性别的用户数量。
比较每个城市中不同性别的用户数量,找到每个城市活跃用户数最多的性别。
该回答引用百度ai功能
每天每个城市的用户次日留存率
SELECT
date,
city,
COUNT(DISTINCT user_id) AS active_users,
COUNT(DISTINCT CASE WHEN date = DATE_ADD(date, INTERVAL 1 DAY) THEN user_id END) AS retained_users,
ROUND(COUNT(DISTINCT CASE WHEN date = DATE_ADD(date, INTERVAL 1 DAY) THEN user_id END) / COUNT(DISTINCT user_id) * 100, 2) AS retention_rate
FROM
table
GROUP BY
date,
city;
求3月25日每个城市活跃用户数最多的性别分别是男性还是女性?
SELECT
city,
MAX(COUNT(DISTINCT user_id)) AS max_active_users,
CASE
WHEN COUNT(DISTINCT user_id) = MAX(COUNT(DISTINCT user_id)) THEN gender
ELSE NULL
END AS gender
FROM
table
WHERE
date = '2022-03-25'
GROUP BY
city
ORDER BY
MAX(COUNT(DISTINCT user_id)) DESC
LIMIT
1;
select a.date, a.city ,
count(distinct a.user_id)*1.0/count(distinct b.user_id) as retention_rate
from table a
join table b
on a.user_id = b.user_id and a.date = b.date - 1
group by a.date, a.city