#多注意小细节 import pandas as pd sales = pd.read_csv('sales.csv') # 按照结果要求转换类型 sales[['monetary']] = sales[['monetary']].astype('float32') # 求百分位 des = sales[['recency', 'frequency', 'monetary']].describe().loc['25%':'75%'] # 计算RFM R = sales['recency'].apply(lambda x: 4 if x <= des.iloc[0,0] else (3 if x <= des.iloc[1,0] else (2 if x <= des.iloc[2,0] else 1))).astype('str') F = sales['frequency'].apply(lambda x: 1 if x <= des.iloc[0,1] else (2 if x <= des.iloc[1,1] else (3 if x <= des.iloc[2,1] else 4))).astype('str') M = sales['monetary'].apply(lambda x: 1 if x <= des.iloc[0,2] else (2 if x <= des.iloc[1,2] else (3 if x <= des.iloc[2,2] else 4))).astype('str') # 合并RFM sales['RFMClass'] = R+F+M # print(sales.head()) # 筛选444用户 sales1 = sales[sales['RFMClass'] == '444'].sort_values(by='monetary', ascending=False).reset_index(drop=True) # print(sales1.head())