现有用户行为表,包含设备id、账号id 、用户行为、行为发生时间、进程id四个字段。其中用户行为包含:点击下单按钮(click_order_button) 、打开提交订单页 (open_submit_order_page)、点击提交订单按钮(click_submit_order_button) 、打开支付页(open_payment_page) 、点击确认支付(click_confirm_payment) 、跳转支付成功页面(open_payment_success_page)。
在每个阶段,都有用户的流失。计算该产品漏斗的数据(即每个步骤的用户留存量和留存率)以及每个步骤间的用户平均停留时间。
```sql
CREATE EXTERNAL TABLE dwd_user_behavior
(
device_id STRING COMMENT '设备ID',
account_id STRING COMMENT '账号ID',
action_id STRING COMMENT '行为类型ID',
action_time STRING COMMENT '行为时间',
pid STRING COMMENT '进程id'
)
COMMENT '用户行为表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 'hdfs://hadoop01:8020/test/dwd_user_behavior'
;
要计算该产品漏斗的数据,包括每个步骤的用户留存量、留存率以及每个步骤间的用户平均停留时间,您可以按照以下步骤进行操作:
确定要计算的漏斗步骤。根据您提供的信息,可以将用户行为分为以下步骤:点击下单按钮、打开提交订单页、点击提交订单按钮、打开支付页、点击确认支付、跳转支付成功页面。
使用SQL查询语句从用户行为表中筛选出相关数据,并按照用户和行为发生时间排序。查询如下:
SELECT device_id, account_id, user_action, action_time, process_id
FROM dwd_user_behavior
ORDER BY device_id, action_time;
根据设备ID和账号ID对数据进行分组,以便计算每个用户在每个阶段的留存量和留存率。
WITH funnel_cte AS (
SELECT device_id, account_id,
MAX(CASE WHEN user_action = 'click_order_button' THEN action_time END) AS order_click_time,
MAX(CASE WHEN user_action = 'open_submit_order_page' THEN action_time END) AS open_order_page_time,
MAX(CASE WHEN user_action = 'click_submit_order_button' THEN action_time END) AS submit_order_click_time,
MAX(CASE WHEN user_action = 'open_payment_page' THEN action_time END) AS open_payment_page_time,
MAX(CASE WHEN user_action = 'click_confirm_payment' THEN action_time END) AS confirm_payment_click_time,
MAX(CASE WHEN user_action = 'open_payment_success_page' THEN action_time END) AS payment_success_page_time
FROM dwd_user_behavior
GROUP BY device_id, account_id
)
SELECT COUNT(*) AS user_count,
COUNT(order_click_time) AS step1_count,
COUNT(open_order_page_time) AS step2_count,
COUNT(submit_order_click_time) AS step3_count,
COUNT(open_payment_page_time) AS step4_count,
COUNT(confirm_payment_click_time) AS step5_count,
COUNT(payment_success_page_time) AS step6_count,
COUNT(order_click_time) / COUNT(*) AS step1_retention_rate,
COUNT(open_order_page_time) / COUNT(order_click_time) AS step2_retention_rate,
COUNT(submit_order_click_time) / COUNT(open_order_page_time) AS step3_retention_rate,
COUNT(open_payment_page_time) / COUNT(submit_order_click_time) AS step4_retention_rate,
COUNT(confirm_payment_click_time) / COUNT(open_payment_page_time) AS step5_retention_rate,
COUNT(payment_success_page_time) / COUNT(confirm_payment_click_time) AS step6_retention_rate
FROM funnel_cte;
对于每个步骤,计算用户平均停留时间。可以通过计算相邻两个步骤之间的时间差,并取平均值来得到。
WITH funnel_cte AS (
-- 与上述查询中生成的CTE相同
)
SELECT AVG(DATEDIFF(open_order_page_time, order_click_time)) AS avg_step1_to_step2_time,
AVG(DATEDIFF(submit_order_click_time, open_order_page_time)) AS avg_step2_to_step3_time,
AVG(DATEDIFF(open_payment_page_time, submit_order_click_time)) AS avg_step3_to_step4_time,
AVG(DATEDIFF(confirm_payment_click_time, open_payment_page_time)) AS avg_step4_to_step5_time,
AVG(DATEDIFF(payment_success_page_time, confirm_payment_click_time)) AS avg_step5_to_step6_time
FROM funnel_cte;