-
sort_values和reset_index
new_titanic_survival = titanic_survival.sort_values("Age",ascending=False) print (new_titanic_survival[0:10]) titanic_reindexed = new_titanic_survival.reset_index(drop=True) print(titanic_reindexed.iloc[0:10])
运行结果:
-
自定义函数
# This function returns the hundredth item from a series def hundredth_row(column): # Extract the hundredth item hundredth_item = column.iloc[99] return hundredth_item # Return the hundredth item from each column hundredth_row = titanic_survival.apply(hundredth_row) print (hundredth_row)
运行结果:
-
非0行个数
def not_null_count(column): column_null = pd.isnull(column) null = column[column_null] return len(null) column_null_count = titanic_survival.apply(not_null_count) print (column_null_count)
运行结果:
-
练习
#By passing in the axis=1 argument, we can use the DataFrame.apply() method to iterate over rows instead of columns. def which_class(row): pclass = row['Pclass'] if pd.isnull(pclass): return "Unknown" elif pclass == 1: return "First Class" elif pclass == 2: return "Second Class" elif pclass == 3: return "Third Class" classes = titanic_survival.apply(which_class, axis=1) print (classes)
运行结果:
-
连续值离散化
def is_minor(row): if row["Age"] < 18: return True else: return False minors = titanic_survival.apply(is_minor, axis=1) #print minors def generate_age_label(row): age = row["Age"] if pd.isnull(age): return "unknown" elif age < 18: return "minor" else: return "adult" age_labels = titanic_survival.apply(generate_age_label, axis=1) print (age_labels)
运行结果:
-
添加列
titanic_survival['age_labels'] = age_labels age_group_survival = titanic_survival.pivot_table(index="age_labels", values="Survived") print (age_group_survival)
运行结果: