Pandas的统计计算¶
In [212]:
import numpy as np
import pandas as pd
In [216]:
df = pd.DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]],
index=['a','b','c','d'],
columns=['one','two'])
df
Out[216]:
one | two | |
---|---|---|
a | 1.40 | NaN |
b | 7.10 | -4.5 |
c | NaN | NaN |
d | 0.75 | -1.3 |
In [217]:
# 求列的总和,形成新的Series对象
df.sum()
Out[217]:
one 9.25 two -5.80 dtype: float64
In [218]:
# 求行的总和
df.sum(axis=1)
Out[218]:
a 1.40 b 2.60 c 0.00 d -0.55 dtype: float64
In [219]:
# 求行最大值的索引
df.idxmax()
Out[219]:
one b two d dtype: object
In [222]:
df.cumsum()
Out[222]:
one | two | |
---|---|---|
a | 1.40 | NaN |
b | 8.50 | -4.5 |
c | NaN | NaN |
d | 9.25 | -5.8 |
In [223]:
df.describe()
Out[223]:
one | two | |
---|---|---|
count | 3.000000 | 2.000000 |
mean | 3.083333 | -2.900000 |
std | 3.493685 | 2.262742 |
min | 0.750000 | -4.500000 |
25% | 1.075000 | -3.700000 |
50% | 1.400000 | -2.900000 |
75% | 4.250000 | -2.100000 |
max | 7.100000 | -1.300000 |