Pandas
Series
data = [1,2,3,4] index = list('abcd') s1 = pd.Series(data, index=index, name='books' 追加 s2 = s1.append(Series({"e": 100, "a":200})) 相同索引相加 s3 = s1.add(Series({"e": 100, "a":200})) 排序 s2.sort_index() s2.sort_values(drop=True) #降序 去掉索引 s4 = s2.reset_index(drop=True) 更换索引 inx = "this is a test".split()" s3.reindex(idx) s1.max() s1.idxmax() #最大值索引 s1.count() s1.sum() s1.var() s1.mean() s2.describe() #各类统计列表
DataFrame
pd.DataFrame() l = [[1,2,3], [4,5,6]] df = pd.append(l) s = pd.Series({"x":1, "y": 2}, name='a') pd.append(s, ignore_index=True) #自动加索引, 避免索引相同 data = np.array([[5, 5, 3, 3, 4], [3, 4, 5, 5, 4], [3, 4, 3, 4, 5], [5, 5, 3, 4, 4]]) df = pd.DataFrame(data, columns=['The Shawshank Redemption', 'Forrest Gump', 'Avengers: Endgame', 'Iron Man', 'Titanic'], index=['user1', 'user2', 'user3', 'user4']) print(df['Iron Man']) #返回Series print(df[['Iron Man', 'Titanic']]) #返回DataFrame #改列名 df3.rename(columns={'value_x':'valueX', 'value_y':'valueY'},inplace=True) print('增加一列,两种方式') colnames.append('memo') df5 = df3.reindex(columns=colnames) df5.eval('memo=lkey+rkey',inplace=True) df5['total'] = df5['valueX']+df5['valueY'] 分组和统计 print(df2.groupby('grade').count()) print(df2.groupby('grade').sum()) print(df2.groupby('grade')['Iron Man'].count()) print(df2.groupby('grade').count()['Iron Man']) 索引list df1.index[:3].tolist() 截取部分行 df1.iloc[:3] 更新一列的所有值 df2['grade'] = 2020 更新一行一列值 df2.loc['user1','grade']=2019 df2.at['user2','grade']=2019 df2.at['user3','grade']=2021 df2.at['user5','grade']=2021 选取一行 df1.loc['user1'] 选取多行 df1.loc[['user1', 'user2']] 选取一列 df1['Iron Man'] 选取多列 df1[['Iron Man', 'Titanic']] 类SQL(select name,age from staff where genda = 'm') df1[df.genda = 'm'][['name', 'age']] 类SQL(update staff set age = age + 1 where genga = 'm' and age = 21) df1.loc[(df.genda == 'm') & (df.age=21), 'age'] = df1['age'] + 1 增加一个统计行 df1.loc['sum'] = df.apply(lambda x: x.sum()) 增加一个统计列 df1['sum'] = df.apply(lambda x: x.sum(), axis=1)