-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollege_majors
38 lines (30 loc) · 2 KB
/
college_majors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#Use the /Total column to calculate the number of people who fall under each /Major_category and
#store the result as a separate dictionary for each dataset. The key for the dictionary should be
#the /Major_category and the value should be the total count. For the counts from /all_ages, store
#the results as a dictionary named /all_ages_major_categories and for the counts from /recent_grads,
#store the results as a dictionary named /recent_grads_major_categories.
import pandas as pd
import numpy as np
#I don't have this data locally due to it not being available from a broken link, but it should work
#within the dataquest challenge browser.
#Create a function to summarize the Major categories
def summarize_major_categories():
#Read in csv datasets with Pandas .read_csv() method
all_ages = pd.read_csv("all-ages.csv")
recent_grads = pd.read_csv("recent-grads.csv")
#Initialize the dictionaries to contain the answer to the question.
#The keys here will be the Major_category column value strings and
#the values of the keys will be the sum of Total for that Major_category as an integer.
all_ages_major_categories = {}
recent_grads_major_categories = {}
#Creating this list of all Major_category entries is not necessary in my solution
#maj_cats = all_ages.loc[:,"Major_category"].tolist()
#This creates the solution, but in the form of a Series object
maj_cat_sums = all_ages.pivot_table(index = "Major_category", values = "Total", aggfunc = np.sum)
#Taking advantage of Pandas Series object's .to_dict() method to get the solution
all_ages_major_categories = pd.Series.to_dict(maj_cat_sums)
#Repeating the process for the other .csv file
recent_maj_cat_sums = recent_grads.pivot_table(index = "Major_category", values = "Total", aggfunc = np.sum)
recent_grads_major_categories = pd.Series.to_dict(recent_maj_cat_sums)
return all_ages_major_categories
return recent_grads_major_categories