import pandas as pd
df = pd.read_csv("sales.csv")
# 设定规则
q1_recency = df["recency"].quantile(0.25)
median_recency = df["recency"].quantile(0.5)
q3_recency = df["recency"].quantile(0.75)
q1_frequency = df["frequency"].quantile(0.25)
median_frequency = df["frequency"].quantile(0.5)
q3_frequency = df["frequency"].quantile(0.75)
q1_monetary = df["monetary"].quantile(0.25)
median_monetary = df["monetary"].quantile(0.5)
q3_monetary = df["monetary"].quantile(0.75)
# 为 recency 特征评分
def score_recency(value):
if value <= q1_recency:
return 4
elif value <= median_recency:
return 3
elif value <= q3_recency:
return 2
else:
1
# 为 frequency 特征评分
def score_frequency(value):
if value <= q1_frequency:
return 1
elif value <= median_frequency:
return 2
elif value <= q3_frequency:
return 3
else:
4
# 为 monetary 特征评分
def score_monetary(value):
if value <= q1_monetary:
return 1
elif value <= median_monetary:
return 2
elif value <= q3_monetary:
return 3
else:
4
df['R_Quartile'] = df['recency'].apply(score_recency)
df['F_Quartile'] = df['frequency'].apply(score_frequency)
df['M_Quartile'] = df['monetary'].apply(score_monetary)
print(df.head(5))