1. 常见排名和排序
-- 行号 ROW_NUMBER() OVER (ORDER BY sales DESC) AS rank -- 排名(允许并列) RANK() OVER (ORDER BY sales DESC) AS rank -- 密集排名 DENSE_RANK() OVER (ORDER BY sales DESC) AS dense_rank -- 分位数 NTILE(4) OVER (ORDER BY salary DESC) AS quartile2. 时间序列分析
-- 移动平均 SELECT date, sales, AVG(sales) OVER ( ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ) AS 7_day_avg FROM daily_sales; -- 环比增长 SELECT month, revenue, (revenue - LAG(revenue, 1) OVER (ORDER BY month)) / LAG(revenue, 1) OVER (ORDER BY month) * 100 AS growth_rate FROM monthly_revenue;3. 累计计算
-- 累计求和 SELECT date, sales, SUM(sales) OVER ( ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS running_total FROM sales; -- 累计百分比 SELECT customer, revenue, revenue / SUM(revenue) OVER () * 100 AS pct_total, SUM(revenue) OVER ( ORDER BY revenue DESC ROWS UNBOUNDED PRECEDING ) * 100.0 / SUM(revenue) OVER () AS cum_pct FROM customers;4. 数据比较
-- 与前一行比较 SELECT date, temperature, temperature - LAG(temperature, 1) OVER (ORDER BY date) AS diff_prev, LEAD(temperature, 1) OVER (ORDER BY date) - temperature AS diff_next FROM weather_data; -- 与分组内第一行比较 SELECT department, employee, salary, FIRST_VALUE(salary) OVER (PARTITION BY department ORDER BY salary DESC) as top_salary, salary - FIRST_VALUE(salary) OVER (PARTITION BY department ORDER BY salary DESC) as gap_from_top FROM employees;5. 高级分析场景
-- 会话分析(找出用户连续访问) SELECT user_id, login_time, LAG(logout_time) OVER (PARTITION BY user_id ORDER BY login_time) as prev_logout, CASE WHEN login_time <= LAG(logout_time) OVER (PARTITION BY user_id ORDER BY login_time) THEN 0 ELSE 1 END as is_new_session FROM user_sessions; -- 查找重复记录 SELECT *, ROW_NUMBER() OVER (PARTITION BY email, name ORDER BY created_at) as dup_count FROM users WHERE dup_count > 1; -- 计算留存率 WITH user_activity AS ( SELECT user_id, login_date, MIN(login_date) OVER (PARTITION BY user_id) as first_login, LEAD(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as next_login FROM logins ) SELECT first_login as cohort_date, COUNT(DISTINCT user_id) as cohort_size, COUNT(DISTINCT CASE WHEN next_login = first_login + INTERVAL '1 day' THEN user_id END) as day1_retained FROM user_activity GROUP BY first_login;6. 复杂业务场景
-- 漏斗分析 WITH funnel AS ( SELECT user_id, MAX(CASE WHEN event = 'view' THEN 1 ELSE 0 END) as viewed, MAX(CASE WHEN event = 'click' THEN 1 ELSE 0 END) as clicked, MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) as purchased FROM events GROUP BY user_id ) SELECT COUNT(*) as total_users, SUM(viewed) as viewers, SUM(clicked) as clickers, SUM(purchased) as buyers, 100.0 * SUM(clicked) / NULLIF(SUM(viewed), 0) as click_rate FROM funnel; -- 间隔计算 SELECT user_id, event_time, EXTRACT(EPOCH FROM (event_time - LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) )) as seconds_since_last_event FROM events;7. 性能优化技巧
-- 避免自连接 -- 传统方式(需要自连接) SELECT a.id, a.value, MAX(b.value) as max_so_far FROM table a JOIN table b ON a.id >= b.id GROUP BY a.id, a.value; -- 使用窗口函数(更高效) SELECT id, value, MAX(value) OVER (ORDER BY id ROWS UNBOUNDED PRECEDING) as max_so_far FROM table;最佳实践建议:
注意性能:窗口函数在大量数据上可能较慢,合理使用分区
结合索引:ORDER BY子句中的字段建议有索引
使用FILTER(如果数据库支持):
AVG(salary) FILTER (WHERE department = 'Sales') OVER () as avg_sales_salary明确窗口范围:明确指定ROWS或RANGE避免歧义