import pandas as pd
## the time column doesn't matter in your problem
df = pd.DataFrame({
'time':['2021-3-19','2021-3-20','2021-3-21','2021-3-22',
'2021-3-23','2021-3-24','2021-3-25','2021-3-26','2021-3-27'],
'value':[10,11,9,5,4,2,4,9,5],
'status':['X']*3+['Y']+['X']+['Y']*2+['X']*2
})
df_new = pd.DataFrame(columns=df.columns)
## perform a groupby on consecutive values
for _, g in df.groupby([(df.status != df.status.shift()).cumsum()]):
g = g.sort_values(by='value')
## keep the highest value for X
if g.status.values[0] == 'X':
g = g.drop_duplicates(subset=['status'], keep='last')
## keep the lowest value for Y
elif g.status.values[0] == 'Y':
g = g.drop_duplicates(subset=['status'], keep='first')
else:
pass
df_new = pd.concat([df_new, g])
df_new = df_new.reset_index(drop=True)
def inp(x):
if x.time in df_new.time.values:
return x.value
df["value_1"]=df.apply(inp,axis=1)
#参照df_new.time,给相应df.value_1赋值.
df.loc[:0,'value_1']=df.head(1).value
#第一行必然: df.value_1= df.value
df["value_1"]=df.value_1.fillna(method='ffill')
#对空值NaN 参照上一行的value_1赋值.
print(df)
exit()
## the time column doesn't matter in your problem
df = pd.DataFrame({
'time':['2021-3-19','2021-3-20','2021-3-21','2021-3-22',
'2021-3-23','2021-3-24','2021-3-25','2021-3-26','2021-3-27'],
'value':[10,11,9,5,4,2,4,9,5],
'status':['X']*3+['Y']+['X']+['Y']*2+['X']*2
})
df_new = pd.DataFrame(columns=df.columns)
## perform a groupby on consecutive values
for _, g in df.groupby([(df.status != df.status.shift()).cumsum()]):
g = g.sort_values(by='value')
## keep the highest value for X
if g.status.values[0] == 'X':
g = g.drop_duplicates(subset=['status'], keep='last')
## keep the lowest value for Y
elif g.status.values[0] == 'Y':
g = g.drop_duplicates(subset=['status'], keep='first')
else:
pass
df_new = pd.concat([df_new, g])
df_new = df_new.reset_index(drop=True)
def inp(x):
if x.time in df_new.time.values:
return x.value
df["value_1"]=df.apply(inp,axis=1)
#参照df_new.time,给相应df.value_1赋值.
df.loc[:0,'value_1']=df.head(1).value
#第一行必然: df.value_1= df.value
df["value_1"]=df.value_1.fillna(method='ffill')
#对空值NaN 参照上一行的value_1赋值.
print(df)
exit()