Skip to contents
# Needed for Functionality
library(governance)
library(dplyr)
# Needed for presentation
library(data.table)
library(ggplot2)
# Needed for additional visualizations
library(stringr)
library(readr)

Governance Workflow

Step 1: Get Input data

We assume the user already has the relevant 990 Efile data downloaded. See vignette("download-data") for instructions on downloading the need 990 efile data.

We will use a subset of a test set that was already created. See data-raw/01-get-example-data.R here for details on how this data set was created.

data("dat_example", package = "governance")

set.seed(57)
keep_rows <- sample(1:nrow(dat_example), 200)
dat_example <- dat_example[keep_rows, ]

Step 2: Get Features Matrix

Use the get_features() function to clean the data and transform it into a features matrix.

features_example <- get_features(dat_example)
head(features_example) %>% data.table()
##         OBJECTID
## 1: 3.380285e-295
## 2: 3.367334e-295
## 3: 3.380176e-295
## 4: 3.418468e-295
## 5: 3.383922e-295
## 6: 3.418376e-295
##                                                                    URL
## 1: https://s3.amazonaws.com/irs-form-990/201432979349300843_public.xml
## 2: https://s3.amazonaws.com/irs-form-990/201403179349300430_public.xml
## 3: https://s3.amazonaws.com/irs-form-990/201432729349300543_public.xml
## 4: https://s3.amazonaws.com/irs-form-990/201520839349300612_public.xml
## 5: https://s3.amazonaws.com/irs-form-990/201441349349301624_public.xml
## 6: https://s3.amazonaws.com/irs-form-990/201520629349300707_public.xml
##    RETURN_VERSION   ORG_EIN year RETURN_TYPE F9_04_AFS_IND_X F9_04_AFS_CONSOL_X
## 1:       2013v3.1 770489324 2013         990           false              false
## 2:       2013v3.1 591829834 2013         990           false              false
## 3:       2013v3.0 362852762 2013         990               1                  0
## 4:       2013v4.0 200345249 2013         990               1                  0
## 5:       2013v3.1 232550366 2013         990            true              false
## 6:       2013v4.0 540788685 2013         990               0                  0
##    F9_04_BIZ_TRANSAC_DTK_X F9_04_BIZ_TRANSAC_DTK_FAM_X
## 1:                   false                       false
## 2:                   false                       false
## 3:                       0                           0
## 4:                       0                           1
## 5:                   false                       false
## 6:                       0                           0
##    F9_04_BIZ_TRANSAC_DTK_ENTITY_X F9_04_CONTR_NONCSH_MT_25K_X
## 1:                          false                       false
## 2:                          false                       false
## 3:                              0                           0
## 4:                              0                           0
## 5:                          false                       false
## 6:                              0                           0
##    F9_04_CONTR_ART_HIST_X F9_06_GVRN_NUM_VOTING_MEMB
## 1:                  false                          5
## 2:                  false                          7
## 3:                      0                         13
## 4:                      0                         10
## 5:                  false                          8
## 6:                      0                          9
##    F9_06_GVRN_NUM_VOTING_MEMB_IND F9_06_GVRN_DTK_FAMBIZ_RELATION_X
## 1:                              0                            false
## 2:                              7                             true
## 3:                             12                                1
## 4:                             10                                1
## 5:                              8                            false
## 6:                              9                                0
##    F9_06_GVRN_DELEGATE_MGMT_DUTY_X F9_06_GVRN_DOC_GVRN_BODY_X
## 1:                           false                      false
## 2:                           false                       true
## 3:                               0                          1
## 4:                               0                          1
## 5:                           false                       true
## 6:                               0                          1
##    F9_06_POLICY_FORM990_GVRN_BODY_X F9_06_POLICY_COI_X
## 1:                            false              false
## 2:                             true              false
## 3:                                1                  0
## 4:                                1                  1
## 5:                             true               true
## 6:                                0                  0
##    F9_06_POLICY_COI_DISCLOSURE_X F9_06_POLICY_COI_MONITOR_X
## 1:                         false                      false
## 2:                          <NA>                       <NA>
## 3:                          <NA>                       <NA>
## 4:                             1                          1
## 5:                          true                       true
## 6:                          <NA>                       <NA>
##    F9_06_POLICY_WHSTLBLWR_X F9_06_POLICY_DOC_RETENTION_X
## 1:                    false                        false
## 2:                    false                        false
## 3:                        0                            0
## 4:                        1                            1
## 5:                     true                        false
## 6:                        0                            0
##    F9_06_POLICY_COMP_PROCESS_CEO_X F9_06_DISCLOSURE_AVBL_OTH_X
## 1:                           false                        <NA>
## 2:                           false                        <NA>
## 3:                               0                        <NA>
## 4:                               1                        <NA>
## 5:                           false                        <NA>
## 6:                               0                        <NA>
##    F9_06_DISCLOSURE_AVBL_OTH_WEB_X F9_06_DISCLOSURE_AVBL_REQUEST_X
## 1:                            <NA>                            <NA>
## 2:                            <NA>                               X
## 3:                            <NA>                               X
## 4:                               X                               X
## 5:                               X                               X
## 6:                            <NA>                               X
##    F9_06_DISCLOSURE_AVBL_OWN_WEB_X F9_12_FINSTAT_METHOD_ACC_OTH
## 1:                            <NA>                         <NA>
## 2:                            <NA>                         <NA>
## 3:                            <NA>                         <NA>
## 4:                            <NA>                         <NA>
## 5:                            <NA>                         <NA>
## 6:                            <NA>                         <NA>
##    F9_12_FINSTAT_METHOD_ACC_ACCRU_X F9_12_FINSTAT_METHOD_ACC_CASH_X
## 1:                             <NA>                               X
## 2:                             <NA>                               X
## 3:                                X                            <NA>
## 4:                                X                            <NA>
## 5:                                X                            <NA>
## 6:                             <NA>                               X
##    SM_01_REVIEW_PROCESS_UNUSUAL_X P12_LINE_1 P4_LINE_12 P4_LINE_28
## 1:                           <NA>          0          0          1
## 2:                           <NA>          0          0          1
## 3:                           <NA>          1          1          1
## 4:                           <NA>          1          1          0
## 5:                           <NA>          1          1          1
## 6:                           <NA>          0          0          1
##    P4_LINE_29_30 P6_LINE_1 P6_LINE_11A P6_LINE_12_13_14 P6_LINE_15A P6_LINE_18
## 1:             1         0           0                0           0          0
## 2:             1         1           1                0           0          0
## 3:             1         1           1                0           0          0
## 4:             1         1           1                1           1          0
## 5:             1         1           1                0           0          0
## 6:             1         1           0                0           0          0
##    P6_LINE_2 P6_LINE_3 P6_LINE_8A
## 1:         1         1          0
## 2:         0         1          1
## 3:         0         1          1
## 4:         0         1          1
## 5:         1         1          1
## 6:         1         1          1

Step 3: Calculate the Scores

Use the get_scores() function to get the governance scores.

scores_example <- get_scores(features_example)
head(scores_example) %>% data.table()
##         OBJECTID
## 1: 3.380285e-295
## 2: 3.367334e-295
## 3: 3.380176e-295
## 4: 3.418468e-295
## 5: 3.383922e-295
## 6: 3.418376e-295
##                                                                    URL
## 1: https://s3.amazonaws.com/irs-form-990/201432979349300843_public.xml
## 2: https://s3.amazonaws.com/irs-form-990/201403179349300430_public.xml
## 3: https://s3.amazonaws.com/irs-form-990/201432729349300543_public.xml
## 4: https://s3.amazonaws.com/irs-form-990/201520839349300612_public.xml
## 5: https://s3.amazonaws.com/irs-form-990/201441349349301624_public.xml
## 6: https://s3.amazonaws.com/irs-form-990/201520629349300707_public.xml
##    RETURN_VERSION   ORG_EIN year RETURN_TYPE F9_04_AFS_IND_X F9_04_AFS_CONSOL_X
## 1:       2013v3.1 770489324 2013         990           false              false
## 2:       2013v3.1 591829834 2013         990           false              false
## 3:       2013v3.0 362852762 2013         990               1                  0
## 4:       2013v4.0 200345249 2013         990               1                  0
## 5:       2013v3.1 232550366 2013         990            true              false
## 6:       2013v4.0 540788685 2013         990               0                  0
##    F9_04_BIZ_TRANSAC_DTK_X F9_04_BIZ_TRANSAC_DTK_FAM_X
## 1:                   false                       false
## 2:                   false                       false
## 3:                       0                           0
## 4:                       0                           1
## 5:                   false                       false
## 6:                       0                           0
##    F9_04_BIZ_TRANSAC_DTK_ENTITY_X F9_04_CONTR_NONCSH_MT_25K_X
## 1:                          false                       false
## 2:                          false                       false
## 3:                              0                           0
## 4:                              0                           0
## 5:                          false                       false
## 6:                              0                           0
##    F9_04_CONTR_ART_HIST_X F9_06_GVRN_NUM_VOTING_MEMB
## 1:                  false                          5
## 2:                  false                          7
## 3:                      0                         13
## 4:                      0                         10
## 5:                  false                          8
## 6:                      0                          9
##    F9_06_GVRN_NUM_VOTING_MEMB_IND F9_06_GVRN_DTK_FAMBIZ_RELATION_X
## 1:                              0                            false
## 2:                              7                             true
## 3:                             12                                1
## 4:                             10                                1
## 5:                              8                            false
## 6:                              9                                0
##    F9_06_GVRN_DELEGATE_MGMT_DUTY_X F9_06_GVRN_DOC_GVRN_BODY_X
## 1:                           false                      false
## 2:                           false                       true
## 3:                               0                          1
## 4:                               0                          1
## 5:                           false                       true
## 6:                               0                          1
##    F9_06_POLICY_FORM990_GVRN_BODY_X F9_06_POLICY_COI_X
## 1:                            false              false
## 2:                             true              false
## 3:                                1                  0
## 4:                                1                  1
## 5:                             true               true
## 6:                                0                  0
##    F9_06_POLICY_COI_DISCLOSURE_X F9_06_POLICY_COI_MONITOR_X
## 1:                         false                      false
## 2:                          <NA>                       <NA>
## 3:                          <NA>                       <NA>
## 4:                             1                          1
## 5:                          true                       true
## 6:                          <NA>                       <NA>
##    F9_06_POLICY_WHSTLBLWR_X F9_06_POLICY_DOC_RETENTION_X
## 1:                    false                        false
## 2:                    false                        false
## 3:                        0                            0
## 4:                        1                            1
## 5:                     true                        false
## 6:                        0                            0
##    F9_06_POLICY_COMP_PROCESS_CEO_X F9_06_DISCLOSURE_AVBL_OTH_X
## 1:                           false                        <NA>
## 2:                           false                        <NA>
## 3:                               0                        <NA>
## 4:                               1                        <NA>
## 5:                           false                        <NA>
## 6:                               0                        <NA>
##    F9_06_DISCLOSURE_AVBL_OTH_WEB_X F9_06_DISCLOSURE_AVBL_REQUEST_X
## 1:                            <NA>                            <NA>
## 2:                            <NA>                               X
## 3:                            <NA>                               X
## 4:                               X                               X
## 5:                               X                               X
## 6:                            <NA>                               X
##    F9_06_DISCLOSURE_AVBL_OWN_WEB_X F9_12_FINSTAT_METHOD_ACC_OTH
## 1:                            <NA>                         <NA>
## 2:                            <NA>                         <NA>
## 3:                            <NA>                         <NA>
## 4:                            <NA>                         <NA>
## 5:                            <NA>                         <NA>
## 6:                            <NA>                         <NA>
##    F9_12_FINSTAT_METHOD_ACC_ACCRU_X F9_12_FINSTAT_METHOD_ACC_CASH_X
## 1:                             <NA>                               X
## 2:                             <NA>                               X
## 3:                                X                            <NA>
## 4:                                X                            <NA>
## 5:                                X                            <NA>
## 6:                             <NA>                               X
##    SM_01_REVIEW_PROCESS_UNUSUAL_X P12_LINE_1 P4_LINE_12 P4_LINE_28
## 1:                           <NA>          0          0          1
## 2:                           <NA>          0          0          1
## 3:                           <NA>          1          1          1
## 4:                           <NA>          1          1          0
## 5:                           <NA>          1          1          1
## 6:                           <NA>          0          0          1
##    P4_LINE_29_30 P6_LINE_1 P6_LINE_11A P6_LINE_12_13_14 P6_LINE_15A P6_LINE_18
## 1:             1         0           0                0           0          0
## 2:             1         1           1                0           0          0
## 3:             1         1           1                0           0          0
## 4:             1         1           1                1           1          0
## 5:             1         1           1                0           0          0
## 6:             1         1           0                0           0          0
##    P6_LINE_2 P6_LINE_3 P6_LINE_8A         V1         V2         V3         V4
## 1:         1         1          0  0.8325382 -2.9259074 -0.5902706 -0.4582607
## 2:         0         1          1 -0.6448697  0.7312625 -1.4034831 -0.3713453
## 3:         0         1          1  1.3598926  0.2140944 -1.7717910 -0.5622358
## 4:         0         1          1  1.0819772 -0.2759980  0.2945735 -0.5700580
## 5:         1         1          1  1.3425175  0.3457935 -1.4090468 -0.5709550
## 6:         1         1          1 -0.5686197  1.0435046 -0.7928681 -0.3724342
##            V5          V6 total.score
## 1: -0.6044349 -1.63897424 -5.38530968
## 2:  0.3947381  0.60617695 -0.68752060
## 3: -0.0817542  0.93277582  0.09098179
## 4:  0.2251638  0.89678327  1.65244176
## 5: -0.2402117 -0.06382375 -0.59572631
## 6: -1.2646891 -0.31298162 -2.26808810

We can visualize the distribution of our governance scores.

hist(scores_example$total.score,
     main = "Histogram of Goverance Scores",
     xlab = "Total Score")

Additional Visualizations

We can also join our data with the relevance Business Master File (BMF) data here to explore other aspects of the organizations relative to their scores.

Loading in the BMF Data

Since our test data set is from 2013, we first load in the 2013 BMF and only keep data from the organizations that are in our test set.

# Read in BMF data
link2013 <- "https://nccsdata.s3.us-east-1.amazonaws.com/legacy/bmf/BMF-2013-12-501CX-NONPROFIT-PX.csv"
bmf.2013 <- readr::read_csv(link2013)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 1412018 Columns: 48
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (37): EIN, FIPS, NTEECC, FILER, ZFILER, NAME, ADDRESS, CITY, STATE, ZIP5...
## dbl (10): TAXPER, ASSETS, INCOME, RandNum, cTaxPer, cAssets, cTotRev, FISYR_...
## lgl  (1): OUTREAS
## 
##  Use `spec()` to retrieve the full column specification for this data.
##  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#format EIN's correctly 
bmf.2013$EIN <- as.numeric(stringr::str_pad(bmf.2013$EIN, 9, pad = "0"))

# Get needed EIN's 
EIN.keep <- dat_example$ORG_EIN
bmf.keep <- bmf.2013[ bmf.2013$EIN %in% EIN.keep, ]

#join them 
plot_example <- 
  scores_example %>%
  left_join(bmf.keep, by = join_by(ORG_EIN == EIN))

We can plot the distribution of scores by various characteristics of the organizations.

plot_example %>% 
  select(total.score, ASSETS) %>% 
  ggplot(aes(y = total.score, x = log(ASSETS))) +
  geom_point() +
  ggtitle("Governance Scores vs. Total Assests") +
  xlab("Total Governance Score") +
  ylab("log(Total Assests)")
## Warning: Removed 7 rows containing missing values (`geom_point()`).