2022.8.16 提交完最后一道Pandas数据分析,有机会重头再做一遍
import pandas as pd import numpy as np sales = pd.read_csv("sales.csv") down = sales.quantile(0.25) mid = sales.quantile(0.5) up = sales.quantile(0.75) sales["R_Quartile"] = np.where( sales.recency <= down.recency, 4, np.where( sales.recency <= mid.recency, 3, np.where(sales.recency <= up.recency, 2, 1) ), ) sales["F_Quartile"] = np.where( sales.frequency <= down.frequency, 1, np.where( sales.frequency <= mid.frequency, 2, np.where(sales.frequency <= up.frequency, 3, 4), ), ) sales["M_Quartile"] = np.where( sales.monetary <= down.monetary, 1, np.where( sales.monetary <= mid.monetary, 2, np.where(sales.monetary <= up.monetary, 3, 4) ), ) sales["RFMClass"] = ( sales["R_Quartile"].map(str) + sales["F_Quartile"].map(str) + sales["M_Quartile"].map(str) ) print(sales.head(5).loc[:, ["user_id", "recency", "frequency", "monetary", "RFMClass"]]) print() sales.query("RFMClass == '444'").sort_values("monetary", ascending=False).head(5).loc[ :, ["user_id", "recency", "frequency", "monetary", "RFMClass"] ]