import sys for line in sys.stdin: a = line.split() print(int(a[0]) + int(a[1])) import pandas as pd df = pd.read_csv("sales.csv") # df["R_Quartile"] = pd.qcut(df["recency"], 4, labels=["4", "3", "2", "1"]).astype(str) # df["F_Quartile"] = pd.qcut(df["frequency"], 4, labels=["1", "2", "3", "4"]).astype(str) # df["M_Quartile"] = pd.qcut(df["monetary"], 4, labels=["1", "2", "3", "4"]).astype(str) a = pd.qcut(df["recency"], 4, labels=["4", "3", "2", "1"]).astype(str) b = pd.qcut(df["frequency"], 4, labels=["1", "2", "3", "4"]).astype(str) c = pd.qcut(df["monetary"], 4, labels=["1", "2", "3", "4"]).astype(str) # df['RFMClass'] = df["R_Quartile"] + df["F_Quartile"] + df["M_Quartile"] df['RFMClass'] = a + b + c print(df[:5]) print() index = df['RFMClass']=='444' print(df[index].sort_values('monetary', ascending=False).reset_index(drop=True)[:5])
首先使用qcut对RFM列进行离散处理,然后,将其合并成为一列,这个地方我使用了a,b,c变量的目的是为了不想后面在把每个字段写一遍,为了偷个懒。