panda缺失值处理

2018-05-18 15:01:01作者: Frank_07来源: [链接]己有:3528人学习过
import pandas as pdimport numpy as np12
df=pd.DataFrame(np.random.rand(5,6))
df12

012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.7105270.3616840.0940260.8953360.848446
20.5528510.4566020.4106530.8352230.7694560.803724
30.3920670.3978410.3932200.7453610.3608590.383625
40.8207690.8935940.3128870.3781150.5842610.214013
# Make a few areas have NaN valuesdf.iloc[1:3,1] = np.nan
df.iloc[3:,3] = np.nan
df.iloc[2,5]=np.nan
df12345

012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
df.isnull()1

012345
0FalseFalseFalseFalseFalseFalse
1FalseTrueFalseFalseFalseFalse
2FalseTrueFalseFalseFalseTrue
3FalseFalseFalseTrueFalseFalse
4FalseFalseFalseTrueFalseFalse
#显示存在缺失值的行列,清楚的确定缺失值的位置df[df.isnull().values==True]12

.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }


012345
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
#填充缺失数据df.fillna(0)12

.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }


012345
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.0000000.3616840.0940260.8953360.848446
20.5528510.0000000.4106530.8352230.7694560.000000
30.3920670.3978410.3932200.0000000.3608590.383625
40.8207690.8935940.3128870.0000000.5842610.214013
#根据(axis=0列,axis=1行)的前一个值进行填充,method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}, default Nonedf.fillna(axis=0,method='ffill')12

ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000170.5466350.3616840.0940260.8953360.848446
20.5528510.5466350.4106530.8352230.7694560.848446
30.3920670.3978410.3932200.8352230.3608590.383625
40.8207690.8935940.3128870.8352230.5842610.214013
df.columns=['A','B','C','D','E','F']
df12

ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.200017NaN0.3616840.0940260.8953360.848446
20.552851NaN0.4106530.8352230.769456NaN
30.3920670.3978410.393220NaN0.3608590.383625
40.8207690.8935940.312887NaN0.5842610.214013
#Replace all NaN elements in column ‘A’, ‘B’, ‘C’, and so on with 0, 1, 2, and 3 respectively.values = {'A': 0, 'B': 1, 'C': 2, 'D': 3,'E':4,'F':5}
df.fillna(value=values)123

ABCDEF
00.3999160.5466350.7292550.9927350.7479170.157603
10.2000171.0000000.3616840.0940260.8953360.848446
20.5528511.0000000.4106530.8352230.7694565.000000
30.3920670.3978410.3932203.0000000.3608590.383625
40.8207690.8935940.3128873.0000000.5842610.214013


标签(TAG)pandas  

分享到 :

0条评论 添加新评论

后发表评论