Step 1: Get Input Data
We assume the user already has the relevant 990 Efile data downloaded. Instructions for downloading the need 990 efile data, see .
We will use a subset of a test set that was already created. See
data-raw/01-get-example-data.R
here
for details on how this data set was created.
Step 2: Get Features Matrix
Use the get_features()
function to clean the data and
transform it into a features matrix.
features_example <- get_features(dat_example)
head(features_example) %>% data.table()
#> OBJECTID
#> 1: 3.380285e-295
#> 2: 3.367334e-295
#> 3: 3.380176e-295
#> 4: 3.418468e-295
#> 5: 3.383922e-295
#> 6: 3.418376e-295
#> URL
#> 1: https://s3.amazonaws.com/irs-form-990/201432979349300843_public.xml
#> 2: https://s3.amazonaws.com/irs-form-990/201403179349300430_public.xml
#> 3: https://s3.amazonaws.com/irs-form-990/201432729349300543_public.xml
#> 4: https://s3.amazonaws.com/irs-form-990/201520839349300612_public.xml
#> 5: https://s3.amazonaws.com/irs-form-990/201441349349301624_public.xml
#> 6: https://s3.amazonaws.com/irs-form-990/201520629349300707_public.xml
#> RETURN_VERSION ORG_EIN year RETURN_TYPE F9_04_AFS_IND_X F9_04_AFS_CONSOL_X
#> 1: 2013v3.1 770489324 2013 990 false false
#> 2: 2013v3.1 591829834 2013 990 false false
#> 3: 2013v3.0 362852762 2013 990 1 0
#> 4: 2013v4.0 200345249 2013 990 1 0
#> 5: 2013v3.1 232550366 2013 990 true false
#> 6: 2013v4.0 540788685 2013 990 0 0
#> F9_04_BIZ_TRANSAC_DTK_X F9_04_BIZ_TRANSAC_DTK_FAM_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 0 1
#> 5: false false
#> 6: 0 0
#> F9_04_BIZ_TRANSAC_DTK_ENTITY_X F9_04_CONTR_NONCSH_MT_25K_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 0 0
#> 5: false false
#> 6: 0 0
#> F9_04_CONTR_ART_HIST_X F9_06_GVRN_NUM_VOTING_MEMB
#> 1: false 5
#> 2: false 7
#> 3: 0 13
#> 4: 0 10
#> 5: false 8
#> 6: 0 9
#> F9_06_GVRN_NUM_VOTING_MEMB_IND F9_06_GVRN_DTK_FAMBIZ_RELATION_X
#> 1: 0 false
#> 2: 7 true
#> 3: 12 1
#> 4: 10 1
#> 5: 8 false
#> 6: 9 0
#> F9_06_GVRN_DELEGATE_MGMT_DUTY_X F9_06_GVRN_DOC_GVRN_BODY_X
#> 1: false false
#> 2: false true
#> 3: 0 1
#> 4: 0 1
#> 5: false true
#> 6: 0 1
#> F9_06_POLICY_FORM990_GVRN_BODY_X F9_06_POLICY_COI_X
#> 1: false false
#> 2: true false
#> 3: 1 0
#> 4: 1 1
#> 5: true true
#> 6: 0 0
#> F9_06_POLICY_COI_DISCLOSURE_X F9_06_POLICY_COI_MONITOR_X
#> 1: false false
#> 2: <NA> <NA>
#> 3: <NA> <NA>
#> 4: 1 1
#> 5: true true
#> 6: <NA> <NA>
#> F9_06_POLICY_WHSTLBLWR_X F9_06_POLICY_DOC_RETENTION_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 1 1
#> 5: true false
#> 6: 0 0
#> F9_06_POLICY_COMP_PROCESS_CEO_X F9_06_DISCLOSURE_AVBL_OTH_X
#> 1: false <NA>
#> 2: false <NA>
#> 3: 0 <NA>
#> 4: 1 <NA>
#> 5: false <NA>
#> 6: 0 <NA>
#> F9_06_DISCLOSURE_AVBL_OTH_WEB_X F9_06_DISCLOSURE_AVBL_REQUEST_X
#> 1: <NA> <NA>
#> 2: <NA> X
#> 3: <NA> X
#> 4: X X
#> 5: X X
#> 6: <NA> X
#> F9_06_DISCLOSURE_AVBL_OWN_WEB_X F9_12_FINSTAT_METHOD_ACC_OTH
#> 1: <NA> <NA>
#> 2: <NA> <NA>
#> 3: <NA> <NA>
#> 4: <NA> <NA>
#> 5: <NA> <NA>
#> 6: <NA> <NA>
#> F9_12_FINSTAT_METHOD_ACC_ACCRU_X F9_12_FINSTAT_METHOD_ACC_CASH_X
#> 1: <NA> X
#> 2: <NA> X
#> 3: X <NA>
#> 4: X <NA>
#> 5: X <NA>
#> 6: <NA> X
#> SM_01_REVIEW_PROCESS_UNUSUAL_X P12_LINE_1 P4_LINE_12 P4_LINE_28
#> 1: <NA> 0 0 1
#> 2: <NA> 0 0 1
#> 3: <NA> 1 1 1
#> 4: <NA> 1 1 0
#> 5: <NA> 1 1 1
#> 6: <NA> 0 0 1
#> P4_LINE_29_30 P6_LINE_1 P6_LINE_11A P6_LINE_12_13_14 P6_LINE_15A P6_LINE_18
#> 1: 1 0 0 0 0 0
#> 2: 1 1 1 0 0 0
#> 3: 1 1 1 0 0 0
#> 4: 1 1 1 1 1 0
#> 5: 1 1 1 0 0 0
#> 6: 1 1 0 0 0 0
#> P6_LINE_2 P6_LINE_3 P6_LINE_8A
#> 1: 1 1 0
#> 2: 0 1 1
#> 3: 0 1 1
#> 4: 0 1 1
#> 5: 1 1 1
#> 6: 1 1 1
Step 3: Calculate the Scores
Use the get_scores()
function to get the governance
scores.
scores_example <- get_scores(features_example)
head(scores_example) %>% data.table()
#> OBJECTID
#> 1: 3.380285e-295
#> 2: 3.367334e-295
#> 3: 3.380176e-295
#> 4: 3.418468e-295
#> 5: 3.383922e-295
#> 6: 3.418376e-295
#> URL
#> 1: https://s3.amazonaws.com/irs-form-990/201432979349300843_public.xml
#> 2: https://s3.amazonaws.com/irs-form-990/201403179349300430_public.xml
#> 3: https://s3.amazonaws.com/irs-form-990/201432729349300543_public.xml
#> 4: https://s3.amazonaws.com/irs-form-990/201520839349300612_public.xml
#> 5: https://s3.amazonaws.com/irs-form-990/201441349349301624_public.xml
#> 6: https://s3.amazonaws.com/irs-form-990/201520629349300707_public.xml
#> RETURN_VERSION ORG_EIN year RETURN_TYPE F9_04_AFS_IND_X F9_04_AFS_CONSOL_X
#> 1: 2013v3.1 770489324 2013 990 false false
#> 2: 2013v3.1 591829834 2013 990 false false
#> 3: 2013v3.0 362852762 2013 990 1 0
#> 4: 2013v4.0 200345249 2013 990 1 0
#> 5: 2013v3.1 232550366 2013 990 true false
#> 6: 2013v4.0 540788685 2013 990 0 0
#> F9_04_BIZ_TRANSAC_DTK_X F9_04_BIZ_TRANSAC_DTK_FAM_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 0 1
#> 5: false false
#> 6: 0 0
#> F9_04_BIZ_TRANSAC_DTK_ENTITY_X F9_04_CONTR_NONCSH_MT_25K_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 0 0
#> 5: false false
#> 6: 0 0
#> F9_04_CONTR_ART_HIST_X F9_06_GVRN_NUM_VOTING_MEMB
#> 1: false 5
#> 2: false 7
#> 3: 0 13
#> 4: 0 10
#> 5: false 8
#> 6: 0 9
#> F9_06_GVRN_NUM_VOTING_MEMB_IND F9_06_GVRN_DTK_FAMBIZ_RELATION_X
#> 1: 0 false
#> 2: 7 true
#> 3: 12 1
#> 4: 10 1
#> 5: 8 false
#> 6: 9 0
#> F9_06_GVRN_DELEGATE_MGMT_DUTY_X F9_06_GVRN_DOC_GVRN_BODY_X
#> 1: false false
#> 2: false true
#> 3: 0 1
#> 4: 0 1
#> 5: false true
#> 6: 0 1
#> F9_06_POLICY_FORM990_GVRN_BODY_X F9_06_POLICY_COI_X
#> 1: false false
#> 2: true false
#> 3: 1 0
#> 4: 1 1
#> 5: true true
#> 6: 0 0
#> F9_06_POLICY_COI_DISCLOSURE_X F9_06_POLICY_COI_MONITOR_X
#> 1: false false
#> 2: <NA> <NA>
#> 3: <NA> <NA>
#> 4: 1 1
#> 5: true true
#> 6: <NA> <NA>
#> F9_06_POLICY_WHSTLBLWR_X F9_06_POLICY_DOC_RETENTION_X
#> 1: false false
#> 2: false false
#> 3: 0 0
#> 4: 1 1
#> 5: true false
#> 6: 0 0
#> F9_06_POLICY_COMP_PROCESS_CEO_X F9_06_DISCLOSURE_AVBL_OTH_X
#> 1: false <NA>
#> 2: false <NA>
#> 3: 0 <NA>
#> 4: 1 <NA>
#> 5: false <NA>
#> 6: 0 <NA>
#> F9_06_DISCLOSURE_AVBL_OTH_WEB_X F9_06_DISCLOSURE_AVBL_REQUEST_X
#> 1: <NA> <NA>
#> 2: <NA> X
#> 3: <NA> X
#> 4: X X
#> 5: X X
#> 6: <NA> X
#> F9_06_DISCLOSURE_AVBL_OWN_WEB_X F9_12_FINSTAT_METHOD_ACC_OTH
#> 1: <NA> <NA>
#> 2: <NA> <NA>
#> 3: <NA> <NA>
#> 4: <NA> <NA>
#> 5: <NA> <NA>
#> 6: <NA> <NA>
#> F9_12_FINSTAT_METHOD_ACC_ACCRU_X F9_12_FINSTAT_METHOD_ACC_CASH_X
#> 1: <NA> X
#> 2: <NA> X
#> 3: X <NA>
#> 4: X <NA>
#> 5: X <NA>
#> 6: <NA> X
#> SM_01_REVIEW_PROCESS_UNUSUAL_X P12_LINE_1 P4_LINE_12 P4_LINE_28
#> 1: <NA> 0 0 1
#> 2: <NA> 0 0 1
#> 3: <NA> 1 1 1
#> 4: <NA> 1 1 0
#> 5: <NA> 1 1 1
#> 6: <NA> 0 0 1
#> P4_LINE_29_30 P6_LINE_1 P6_LINE_11A P6_LINE_12_13_14 P6_LINE_15A P6_LINE_18
#> 1: 1 0 0 0 0 0
#> 2: 1 1 1 0 0 0
#> 3: 1 1 1 0 0 0
#> 4: 1 1 1 1 1 0
#> 5: 1 1 1 0 0 0
#> 6: 1 1 0 0 0 0
#> P6_LINE_2 P6_LINE_3 P6_LINE_8A V1 V2 V3 V4
#> 1: 1 1 0 0.8325382 -2.9259074 -0.5902706 -0.4582607
#> 2: 0 1 1 -0.6448697 0.7312625 -1.4034831 -0.3713453
#> 3: 0 1 1 1.3598926 0.2140944 -1.7717910 -0.5622358
#> 4: 0 1 1 1.0819772 -0.2759980 0.2945735 -0.5700580
#> 5: 1 1 1 1.3425175 0.3457935 -1.4090468 -0.5709550
#> 6: 1 1 1 -0.5686197 1.0435046 -0.7928681 -0.3724342
#> V5 V6 total.score
#> 1: -0.6044349 -1.63897424 -5.38530968
#> 2: 0.3947381 0.60617695 -0.68752060
#> 3: -0.0817542 0.93277582 0.09098179
#> 4: 0.2251638 0.89678327 1.65244176
#> 5: -0.2402117 -0.06382375 -0.59572631
#> 6: -1.2646891 -0.31298162 -2.26808810
We can explore our governance scores.
hist(scores_example$total.score,
main = "Histogram of Goverance Scores",
xlab = "Total Score")