import pandas as pdimport numpy as np12
df=pd.DataFrame(np.random.rand(5,6))
df12
| 0 | 1 | 2 | 3 | 4 | 5 |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | 0.710527 | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | 0.456602 | 0.410653 | 0.835223 | 0.769456 | 0.803724 |
---|
3 | 0.392067 | 0.397841 | 0.393220 | 0.745361 | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | 0.378115 | 0.584261 | 0.214013 |
---|
# Make a few areas have NaN valuesdf.iloc[1:3,1] = np.nan
df.iloc[3:,3] = np.nan
df.iloc[2,5]=np.nan
df12345
| 0 | 1 | 2 | 3 | 4 | 5 |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | NaN | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | NaN | 0.410653 | 0.835223 | 0.769456 | NaN |
---|
3 | 0.392067 | 0.397841 | 0.393220 | NaN | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | NaN | 0.584261 | 0.214013 |
---|
df.isnull()1
| 0 | 1 | 2 | 3 | 4 | 5 |
---|
0 | False | False | False | False | False | False |
---|
1 | False | True | False | False | False | False |
---|
2 | False | True | False | False | False | True |
---|
3 | False | False | False | True | False | False |
---|
4 | False | False | False | True | False | False |
---|
#显示存在缺失值的行列,清楚的确定缺失值的位置df[df.isnull().values==True]12
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
| 0 | 1 | 2 | 3 | 4 | 5 |
---|
1 | 0.200017 | NaN | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | NaN | 0.410653 | 0.835223 | 0.769456 | NaN |
---|
2 | 0.552851 | NaN | 0.410653 | 0.835223 | 0.769456 | NaN |
---|
3 | 0.392067 | 0.397841 | 0.393220 | NaN | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | NaN | 0.584261 | 0.214013 |
---|
#填充缺失数据df.fillna(0)12
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
| 0 | 1 | 2 | 3 | 4 | 5 |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | 0.000000 | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | 0.000000 | 0.410653 | 0.835223 | 0.769456 | 0.000000 |
---|
3 | 0.392067 | 0.397841 | 0.393220 | 0.000000 | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | 0.000000 | 0.584261 | 0.214013 |
---|
#根据(axis=0列,axis=1行)的前一个值进行填充,method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}, default Nonedf.fillna(axis=0,method='ffill')12
| A | B | C | D | E | F |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | 0.546635 | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | 0.546635 | 0.410653 | 0.835223 | 0.769456 | 0.848446 |
---|
3 | 0.392067 | 0.397841 | 0.393220 | 0.835223 | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | 0.835223 | 0.584261 | 0.214013 |
---|
df.columns=['A','B','C','D','E','F']
df12
| A | B | C | D | E | F |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | NaN | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | NaN | 0.410653 | 0.835223 | 0.769456 | NaN |
---|
3 | 0.392067 | 0.397841 | 0.393220 | NaN | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | NaN | 0.584261 | 0.214013 |
---|
#Replace all NaN elements in column ‘A’, ‘B’, ‘C’, and so on with 0, 1, 2, and 3 respectively.values = {'A': 0, 'B': 1, 'C': 2, 'D': 3,'E':4,'F':5}
df.fillna(value=values)123
| A | B | C | D | E | F |
---|
0 | 0.399916 | 0.546635 | 0.729255 | 0.992735 | 0.747917 | 0.157603 |
---|
1 | 0.200017 | 1.000000 | 0.361684 | 0.094026 | 0.895336 | 0.848446 |
---|
2 | 0.552851 | 1.000000 | 0.410653 | 0.835223 | 0.769456 | 5.000000 |
---|
3 | 0.392067 | 0.397841 | 0.393220 | 3.000000 | 0.360859 | 0.383625 |
---|
4 | 0.820769 | 0.893594 | 0.312887 | 3.000000 | 0.584261 | 0.214013 |
---|
0条评论 添加新评论