def f(x):
    return x**2
print(f(3))   

f = lambda x: x**2
print(f(3))

9
9

def f(x):
    return 2*x \
        + 3
print(f(2))

def f(x):
    return (
        2*x
        + 3
    )
print(f(2))

lst = [
    "a", 
    "b", 
    "c"
]
print(lst)

7
7
['a', 'b', 'c']

import pandas as pd 
df = pd.read_stata("WAGE1.DTA")
df

from pandas.io.stata import StataReader

file = StataReader("WAGE1.dta")
variables = file.variable_labels()
df = file.read()

variables

{'wage': 'average hourly earnings',
 'educ': 'years of education',
 'exper': 'years potential experience',
 'tenure': 'years with current employer',
 'nonwhite': '=1 if nonwhite',
 'female': '=1 if female',
 'married': '=1 if married',
 'numdep': 'number of dependents',
 'smsa': '=1 if live in SMSA',
 'northcen': '=1 if live in north central U.S',
 'south': '=1 if live in southern region',
 'west': '=1 if live in western region',
 'construc': '=1 if work in construc. indus.',
 'ndurman': '=1 if in nondur. manuf. indus.',
 'trcommpu': '=1 if in trans, commun, pub ut',
 'trade': '=1 if in wholesale or retail',
 'services': '=1 if in services indus.',
 'profserv': '=1 if in prof. serv. indus.',
 'profocc': '=1 if in profess. occupation',
 'clerocc': '=1 if in clerical occupation',
 'servocc': '=1 if in service occupation',
 'lwage': 'log(wage)',
 'expersq': 'exper^2',
 'tenursq': 'tenure^2'}

df

df["group"] = df.groupby(["female"], group_keys=False).wage.apply(
    lambda x: pd.qcut(x, 5, labels=range(1, 6))
)
df[["female", "wage", "group"]]

	wage	educ	exper	tenure	nonwhite	female	married	numdep	smsa	northcen	...	trcommpu	trade	services	profserv	profocc	clerocc	servocc	lwage	expersq	tenursq
0	3.10	11	2	0	0	1	0	2	1	0	...	0	0	0	0	0	0	0	1.131402	4	0
1	3.24	12	22	2	0	1	1	3	1	0	...	0	0	1	0	0	0	1	1.175573	484	4
2	3.00	11	2	0	0	0	0	2	0	0	...	0	1	0	0	0	0	0	1.098612	4	0
3	6.00	8	44	28	0	0	1	0	1	0	...	0	0	0	0	0	1	0	1.791759	1936	784
4	5.30	12	7	2	0	0	1	1	0	0	...	0	0	0	0	0	0	0	1.667707	49	4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
521	15.00	16	14	2	0	1	1	2	0	0	...	0	0	0	1	1	0	0	2.708050	196	4
522	2.27	10	2	0	0	1	0	3	0	0	...	0	1	0	0	1	0	0	0.819780	4	0
523	4.67	15	13	18	0	0	1	3	0	0	...	0	0	0	0	1	0	0	1.541159	169	324
524	11.56	16	5	1	0	0	1	0	0	0	...	0	0	0	0	0	0	0	2.447551	25	1
525	3.50	14	5	4	1	1	0	2	0	0	...	0	0	0	1	0	1	0	1.252763	25	16

	wage	educ	exper	tenure	nonwhite	female	married	numdep	smsa	northcen	...	trcommpu	trade	services	profserv	profocc	clerocc	servocc	lwage	expersq	tenursq
0	3.10	11	2	0	0	1	0	2	1	0	...	0	0	0	0	0	0	0	1.131402	4	0
1	3.24	12	22	2	0	1	1	3	1	0	...	0	0	1	0	0	0	1	1.175573	484	4
2	3.00	11	2	0	0	0	0	2	0	0	...	0	1	0	0	0	0	0	1.098612	4	0
3	6.00	8	44	28	0	0	1	0	1	0	...	0	0	0	0	0	1	0	1.791759	1936	784
4	5.30	12	7	2	0	0	1	1	0	0	...	0	0	0	0	0	0	0	1.667707	49	4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
521	15.00	16	14	2	0	1	1	2	0	0	...	0	0	0	1	1	0	0	2.708050	196	4
522	2.27	10	2	0	0	1	0	3	0	0	...	0	1	0	0	1	0	0	0.819780	4	0
523	4.67	15	13	18	0	0	1	3	0	0	...	0	0	0	0	1	0	0	1.541159	169	324
524	11.56	16	5	1	0	0	1	0	0	0	...	0	0	0	0	0	0	0	2.447551	25	1
525	3.50	14	5	4	1	1	0	2	0	0	...	0	0	0	1	0	1	0	1.252763	25	16

Day 2¶

BUSI 520: Python for Business Research¶

Kerry Back, JGSB, Rice University¶

Lambda functions¶

Line continuations¶

Reading and writing dataframes with pandas¶

Two ways to read stata dta files¶

What can we do with pandas?¶

Explore¶

Select¶

Transform¶

Aggregate¶

Sort, rank, and cut¶

Filter¶

Aggregate by group¶

ask Julius¶