In [1]:
import pandas as pd
In [2]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
{'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
{'name': 'Abraham', 'major': "Physics", 'sex': "male"},
{'name': 'Brian', 'major': "Psychology", 'sex': "male"},
{'name': 'Janny', 'major': "Economics", 'sex': "female"},
{'name': 'Yuna', 'major': "Economics", 'sex': "female"},
{'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
{'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
{'name': 'Zara', 'major': "Psychology", 'sex': "female"},
{'name': 'Wendy', 'major': "Economics", 'sex': "female"},
{'name': 'Sera', 'major': "Psychology", 'sex': "female"}
]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df
Out[2]:
name | major | sex | |
---|---|---|---|
0 | John | Computer Science | male |
1 | Nate | Computer Science | male |
2 | Abraham | Physics | male |
3 | Brian | Psychology | male |
4 | Janny | Economics | female |
5 | Yuna | Economics | female |
6 | Jeniffer | Computer Science | female |
7 | Edward | Computer Science | male |
8 | Zara | Psychology | female |
9 | Wendy | Economics | female |
10 | Sera | Psychology | female |
In [3]:
# 전공으로 그룹화
groupby_major = df.groupby('major')
In [4]:
groupby_major.groups
Out[4]:
{'Computer Science': [0, 1, 6, 7], 'Economics': [4, 5, 9], 'Physics': [2], 'Psychology': [3, 8, 10]}
In [5]:
#보기좋게
for name, group in groupby_major:
print(name + ":" + str(len(group)))
print(group)
print()
Computer Science:4 name major sex 0 John Computer Science male 1 Nate Computer Science male 6 Jeniffer Computer Science female 7 Edward Computer Science male Economics:3 name major sex 4 Janny Economics female 5 Yuna Economics female 9 Wendy Economics female Physics:1 name major sex 2 Abraham Physics male Psychology:3 name major sex 3 Brian Psychology male 8 Zara Psychology female 10 Sera Psychology female
In [8]:
# 그룹지어 새로 데이터프레임 생성 .reset_index()는 인덱스정렬에 사용
df_major_cnt = pd.DataFrame({'count' : groupby_major.size()}).reset_index()
df_major_cnt
Out[8]:
major | count | |
---|---|---|
0 | Computer Science | 4 |
1 | Economics | 3 |
2 | Physics | 1 |
3 | Psychology | 3 |
In [9]:
# 성별로 그룹화
groupby_sex = df.groupby('sex')
for name, group in groupby_sex:
print(name +":"+ str(len(group)))
print(group)
print()
female:6 name major sex 4 Janny Economics female 5 Yuna Economics female 6 Jeniffer Computer Science female 8 Zara Psychology female 9 Wendy Economics female 10 Sera Psychology female male:5 name major sex 0 John Computer Science male 1 Nate Computer Science male 2 Abraham Physics male 3 Brian Psychology male 7 Edward Computer Science male
In [ ]:
'데이터 전처리(Python Pandas)' 카테고리의 다른 글
Pandas 기초 10 (널 데이터 변경) (0) | 2021.01.27 |
---|---|
Pandas 기초 9 (중복데이터 삭제) (0) | 2021.01.27 |
Pandas 기초7 (데이터프레임 생성 및 추가) (0) | 2021.01.26 |
Pandas 기초 5 (데이터프레임 행,열 삭제) (0) | 2021.01.26 |
Pandas 기초 4(데이터프레임 필터링) (0) | 2021.01.26 |