import pandas as pd


# 데이터 불러오기
df = pd.read_csv('/content/drive/MyDrive/Study/Pytorch/PYTORCH_NOTEBOOKS/00-Crash-Course-Topics/01-Crash-Course-Pandas/bank.csv')


# 5개의 row만 출력하기
df.head()


# 데이터의 age 칼럼 평균 구하기
df['age'].mean()

41.17009511170095


# 가장 나이가 어린 사람의 혼인여부 찾기
df['marital'][df['age'].idxmin()]

'single'


# 풀이
# 가장 나이가 어린 사람 찾기
df['age'].min()

19


df[df['age']==19]


df['marital'][503]

'single'


df['marital'][df['age'].idxmin()]

'single'


# 직업 카테고리의 유일 갯수 구하기
df['job'].nunique()

12


# 각 직업군에 해당하는 인원이 몇명인지 구하기
df['job'].value_counts()

management       969
blue-collar      946
technician       768
admin.           478
services         417
retired          230
self-employed    183
entrepreneur     168
unemployed       128
housemaid        112
student           84
unknown           38
Name: job, dtype: int64


# 데이터셋에서 결혼 한 사람의 비율 구하기
(len(df[df['marital']=='married'])/len(df))*100

61.86684361866843


# 풀이
# 'marital'칼럼에서 'married' 즉 결혼한 인원을 조건으로 만듦
df['marital']=='married'

0        True
1        True
2       False
3        True
4        True
        ...  
4516     True
4517     True
4518     True
4519     True
4520    False
Name: marital, Length: 4521, dtype: bool


df[df['marital']=='married']


len(df[df['marital']=='married'])

2797


100*len(df[df['marital']=='married'])/len(df)

61.86684361866843


# .apply를 활용하여 기혼 여부를 's' 또는 'm'으로 나타내는 칼럼 'marital code' 만들기
df['marital code']=df['marital'].apply(lambda status: status[0])
df


# 'duration' 칼럼에서 가장 큰 값 찾기
df['duration'].max()

3025


# 직업이 없는 사람들의 교육 현황 구하기
df[df['job']=='unemployed']['education'].value_counts()

secondary    68
tertiary     32
primary      26
unknown       2
Name: education, dtype: int64


# 직업이 없는 사람들의 나이 평균 구하기
df[df['job']=='unemployed']['age'].mean()

40.90625

	age	job	marital	education	default	balance	housing	loan	contact	day	month	duration	campaign	pdays	previous	poutcome	y
0	30	unemployed	married	primary	no	1787	no	no	cellular	19	oct	79	1	-1	0	unknown	no
1	33	services	married	secondary	no	4789	yes	yes	cellular	11	may	220	1	339	4	failure	no
2	35	management	single	tertiary	no	1350	yes	no	cellular	16	apr	185	1	330	1	failure	no
3	30	management	married	tertiary	no	1476	yes	yes	unknown	3	jun	199	4	-1	0	unknown	no
4	59	blue-collar	married	secondary	no	0	yes	no	unknown	5	may	226	1	-1	0	unknown	no

	age	job	marital	education	default	balance	housing	loan	contact	day	month	duration	campaign	pdays	poutcome	y
503	19	student	single	primary	no	103	no	no	cellular	10	jul	104	2	-1	unknown	yes
1900	19	student	single	unknown	no	0	no	no	cellular	11	feb	123	3	-1	unknown	no
2780	19	student	single	secondary	no	302	no	no	cellular	16	jul	205	1	-1	unknown	yes
3233	19	student	single	unknown	no	1169	no	no	cellular	6	feb	463	18	-1	unknown	no

[Pandas 기초]Section 04.Pandas Exercise