[파이썬] #11 - pandas 다루기
"""
Created on Mon Dec 9 20:13:36 2019
pandas 는 데이터 분석을 위해 사용하는 라이브러리 패키지
데이터의 간단한 통계 정보 및 시각화를 위한 다양한 기능제공
- 손쉬운 파일 입출력
- 다양한 파일 포맷 지원
#pip install pandas
#conda install pandas
"""
import pandas as pd
series_data = list(range(1,10,2))
s = pd.Series(series_data)
print(f'type(s) -> {type(s)}')
print(s)
"""
Created on Mon Dec 9 20:38:09 2019
"""
import pandas as pd
data = {
'year' : [2017,2018,2019,2020],
'GDP Rate' : [1.8,3.1,3.0,None],
'GDP' : ['1.637M', '1.859M', '2.237M',None]
}
df = pd.DataFrame(data)
print(f'type(df) -> {type(df)}')
print(df)
import pandas as pd
data = {
'year' : [2017,2018,2019],
'GDP Rate' : [1.8,3.1,3.0],
'GDP' : ['1.637M', '1.859M', '2.237M']
}
df = pd.DataFrame(data)
print(f'type(df) -> {type(df)}')
print(df)
print(df.GDP)
print(df.year)
print(df['GDP Rate'])
print(df.year >= 2018)
print("="*30)
print(df[df.year >= 2018])
print("="*30)
print(df['GDP'][df.year >= 2018])
print("="*30)
print(df[['year', 'GDP Rate']][df.year>=2018])
print("="*30)
print(df.GDP)
try :
print(df.GDP.startswith('1.'))
except :
print('typeError 발생')
print(df.GDP.str.startswith('1.'))
else :
pass
print(df.GDP[0] == '1')
print("="*30)
print(df.GDP.str.startswith('1.'))
print("="*30)
print(df.year.to_string().endswith('9'))
print(df.year[df.GDP.str.startswith('1')])
"""
Created on Mon Dec 9 21:15:59 2019
"""
import pandas as pd
import pandas_data as pdData
pdData.data['year'].append(2020)
pdData.data['GDP'].append(None)
pdData.data['GDP Rate'].append(None)
df = pd.DataFrame(pdData.data)
print("="*30)
print(df)
print("="*10+" df.info() "+"="*10)
print(df.info())
print("="*10+" df.describe() "+"="*10)
print(df.describe())
print(df.year.sum())
print(df.year.mean())
print(df.year.max())
print(df.year.min())
minYear = df.year.min()
print(df[df.year == minYear])
print("="*10+" df.value_counts() "+"="*10)
print(df['GDP Rate'].value_counts())
print(df['GDP Rate'].value_counts()/len(df))
"""
Created on Mon Dec 9 21:37:47 2019
"""
import pandas as pd
import pandas_data as pdData
print(pdData.data)
df = pd.DataFrame(pdData.data)
print(df)
print(df.head(2))
print(df.tail(1))
"""
Created on Mon Dec 9 21:46:35 2019
"""
import pandas as pd
fname = './data/iris.csv'
iris = pd.read_csv(fname)
print(iris)
print(iris.info())
print(iris.describe())
"""
Created on Mon Dec 9 21:59:12 2019
"""
import pandas as pd
fname = './data/winequality-red.csv'
wine = pd.read_csv(fname, sep=";")
print(wine.info())
print(wine.quality.value_counts())
quality_min = wine.quality.min()
low_quality = wine[wine.quality == quality_min]
print(low_quality.info())
low_quality.to_csv('./data/low_quality.csv')
low_quality.to_csv('./data/low_quality.csv', index=False)
low_quality.to_csv('./data/low_quality.csv', index=False, sep=';')
low_quality.to_csv('./data/low_quality.csv', index=False, header=False)
low_quality.to_csv('./data/low_quality.csv', index=False, encoding='utf-8')
댓글
댓글 쓰기