import numpy as np
import pandas as pd
import wrds       
conn = wrds.Connection()

WRDS recommends setting up a .pgpass file.
You can create this file yourself at any time
with the create_pgpass_file() function.
Loading library list...
Done


comp = conn.raw_sql(
    """
    select gvkey, datadate, at
    from comp.funda
    where datadate >= '2000-01-01' and at>0 
    and indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C'
    order by gvkey, datadate
    """, 
    date_cols=['datadate']
)

# convert string or float to int
comp.gvkey = comp.gvkey.astype(int)


def days_between(dates) :
    days = dates.diff()                                          
    return days.map(lambda d: d.days)

comp["days"] = comp.groupby("gvkey").datadate.apply(days_between)


comp["inv"] = comp.groupby("gvkey")["at"].pct_change()
comp["inv"] = np.where((comp.days).notnull() & (comp.days>720), np.nan, comp.inv)


comp[comp.gvkey==1019].head()


# define date as June 30 of year following datadate
comp['date'] = pd.to_datetime(comp.datadate.map(lambda d: str(d.year+1)+'-06-30'))
    
# if two annual reports in one calendar year (due to change of fiscal year), keep last one
comp = comp.drop_duplicates(subset=['gvkey', 'date'], keep='last')


link = conn.raw_sql(
    """
    select distinct gvkey, lpermno as permno, linkdt, linkenddt
    from crsp.Ccmxpf_linktable
    where linktype in ('LU', 'LC')
    and LINKPRIM in ('P', 'C')
    """
)

# convert strings or floats to ints
link['gvkey'] = link.gvkey.astype(int)
link['permno'] = link.permno.astype(int)

# fill in missing end dates with a future date
link['linkenddt'] = pd.to_datetime(link.linkenddt).fillna(pd.Timestamp('21000101'))

# merge with Compustat data and keep rows with Compustat datadate between link date and link end date
comp = comp.merge(link, on='gvkey', how='inner')
comp = comp[(comp.datadate>=comp.linkdt) & (comp.datadate<=comp.linkenddt)]

comp = comp.drop(columns=['days', 'gvkey', 'datadate', 'linkdt', 'linkenddt'])


crsp = conn.raw_sql(
    """
    select a.permno, a.permco, a.date, a.ret, abs(a.prc)*a.shrout as me, b.exchcd
    from crsp.msf a inner join crsp.msenames b
    on a.permno=b.permno and a.date between b.namedt and b.nameendt
    and b.exchcd in (1,2,3) and b.shrcd in (10,11)
    where a.date >= '2000-01-01'
    order by a.permno, a.date
    """,
date_cols=['date']
)

# change strings or floats to integers
for col in ['permno','permco']:
    crsp[col] = crsp[col].astype(int)

# define market equity as sum of market equities of all permnos associated with a permco
crsp['me'] = crsp.groupby(['date','permco']).me.transform(sum)


mse = conn.raw_sql(
    """
    select permno, dlret, dlstcd
    from crsp.mse
    where event='DELIST' and dlstcd>100
    order by permno
    """
)

# change string or float to int
mse['permno'] = mse.permno.astype(int)

# merge with crsp, keeping all rows of crsp
crsp = crsp.merge(mse, how='left', on='permno')
del mse

# series of True and False, True if it is the last date for a stock
LastObs = crsp.permno != crsp.permno.shift(-1)

# series of True and False, True if delisted for poor performance
DLCode = (crsp.dlstcd==500) | ((crsp.dlstcd >=520)&(crsp.dlstcd<=584))

# -35% if no delisting return, delisted for poor performance and NYSE/AMEX
crsp['dlret'] = np.where(DLCode & crsp.dlret.isnull() & crsp.exchcd.isin([1,2]), -0.35, crsp.dlret) 

# -55% if no delisting return, delisted for poor performance and Nasdaq
crsp['dlret'] = np.where(DLCode & crsp.dlret.isnull() & (crsp.exchcd==3), -0.55, crsp.dlret)

# if delisting return exists and < -1, change to -1
crsp['dlret'] = np.where(crsp.dlret.notnull() & crsp.dlret<-1,-1,crsp.dlret)

# if last day and return exists, define return by compounding with delisting return (if exists)      
crsp['ret'] = np.where(LastObs & crsp.ret.notnull(), (1+crsp.ret)*(1+crsp.dlret.fillna(0))-1, crsp.ret)

# if last day and return does not exist, define return as delisting return
crsp['ret'] = np.where(LastObs & crsp.ret.isnull(), crsp.dlret, crsp.ret)


crsp.date = crsp.date.dt.to_period('M')
comp.date = comp.date.dt.to_period('M')

df = crsp.merge(comp, on=['permno', 'date'], how='left')


df[['at', 'inv']] = df.groupby('permno')[['at', 'inv']].ffill(limit=11)


df.head()


df[df.days>720].gvkey.unique()

array([177405.,  28877.,  30950.,  15253.,  61697., 120354., 114747.,
       135425.,  25632.])


comp = conn.raw_sql(
    """
    select gvkey, datadate, pstkrv, pstkl, pstk
    from comp.funda 
    where datadate >= '2000-01-01' 
    and indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C'
    order by gvkey, datadate
    """, 
    date_cols=['datadate']
)

comp['preferred'] = comp[['pstkrv','pstkl','pstk']].bfill(axis=1).iloc[:, 0].fillna(0)


comp[comp.preferred != 0].head()


comp[(comp.pstkrv.isna()) & (comp.preferred != 0)].head()

	gvkey	datadate	at	days	inv
38	1019	2000-12-31	28.638	NaN	NaN
39	1019	2001-12-31	30.836	365.0	0.076751
40	1019	2007-12-31	34.180	2191.0	NaN
41	1019	2008-12-31	33.486	366.0	-0.020304
42	1019	2009-12-31	32.678	365.0	-0.024129

	permno	permco	date	ret	me	exchcd	dlret	dlstcd	at	days	inv
0	10001	7953	2000-01	-0.044118	19906.25	3.0	0.011583	233.0	NaN	NaN	NaN
1	10001	7953	2000-02	0.015385	20212.50	3.0	0.011583	233.0	NaN	NaN	NaN
2	10001	7953	2000-03	-0.015758	19712.00	3.0	0.011583	233.0	NaN	NaN	NaN
3	10001	7953	2000-04	0.011719	19943.00	3.0	0.011583	233.0	NaN	NaN	NaN
4	10001	7953	2000-05	-0.023166	19481.00	3.0	0.011583	233.0	NaN	NaN	NaN

	gvkey	datadate	pstkrv	pstkl	pstk	preferred
23	001010	2000-12-31	11.225	9.600	9.600	11.225
24	001010	2001-12-31	48.070	48.070	48.100	48.070
25	001010	2002-12-31	70.209	70.209	70.200	70.209
65	001021	2006-06-30	4.744	4.744	4.744	4.744
77	001037	2000-03-31	2.583	2.583	2.583	2.583

	gvkey	datadate	pstkrv	pstkl	pstk	preferred
1303	001429	2000-08-31	NaN	NaN	38.275	38.275
1304	001429	2001-08-31	NaN	NaN	38.275	38.275
2857	001932	2000-12-31	NaN	NaN	44.865	44.865
2858	001932	2001-12-31	NaN	NaN	43.629	43.629
2859	001932	2002-12-31	NaN	49.895	49.895	49.895

Annual Compustat¶

Days between datadates¶

Investment rate¶

Use Fama-French lagging¶

Assign permnos if merging with CRSP¶

CRSP¶

Define delisting returns¶

Merge CRSP with Compustat¶

Fill Compustat data into months¶

Check result¶

Coalescing Compustat data¶