import pandas as pd 
from datetime import timedelta

df=pd.read_csv('nowcoder.csv')
df['date1']=pd.to_datetime(df['date']).dt.date
df_1=df[['user_id','date1']].drop_duplicates()  # 去重
# 求最大连续天数
df_1['date1_rank']=df_1.groupby('user_id')['date1'].rank() # 按照user_id分组后,求每个date1的排名
# print(df_1)
# date1-排名 得到连续天数的初始日期
df_1['date1_rank']=pd.to_datetime(df_1['date1_rank'],unit='d').dt.date 
df_1['question_id'] = df_1['date1'] - df_1['date1_rank']
# 按user_id分组后,统计连续初始日期的个数,得到客户的连续天数
num=df_1.groupby(['user_id','question_id'])['question_id'].count()
#筛选连续天数在3天及以上的客户
print(num[num>=3].reset_index('question_id',drop=True))