knitr::opts_chunk$set(echo = TRUE)

# Data analysis libraries
library(tidyverse)
library(magrittr) # extended pipe functionality
library(lubridate) # dealing with date objects
library(here) # for OS-agnostic filepaths
library(readxl) # for reading Excel files

# Visualization libraries
library(ggrepel) # for neater text labels
library(gghighlight) # for easier highlighting
library(geofacet) # for making facets looks like the USA
library(ggridges)
library(extrafont) # for... extra fonts
library(gridExtra) # for combining plots

# Geospatial tools and related libraries
library(sf)
library(zipcode) # for geocoding zipcodes
library(noncensus) # for more geocoding data 
library(rnaturalearth)
library(rnaturalearthdata)

# Load previously-imported data
# Data assemby process captured in /scripts/data-import.R
load(here('data', 'solar_data_master.RData'))
# Set custom theme
theme_custom <- function(base_size = 12, base_family = "", 
                         base_line_size = base_size/22,
                         base_rect_size = base_size/22) {
  theme_minimal(base_size = base_size, base_family = base_family,
             base_line_size = base_line_size,
             base_rect_size = base_rect_size) %+replace%
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_line(color = "#EEEEEE"),
          plot.title = element_text(family = "Asap SemiBold",
                                    size = 16,
                                    hjust = 0,
                                    margin = margin(5, 0, 5, 0),
                                    color = "#0F0F0F"),
          plot.subtitle = element_text(family = "Asap",
                                       size = 12,
                                       hjust = 0,
                                       margin = margin(0, 0, 10, 0),
                                       color = "#3F3F3F"),
          plot.caption = element_text(family = "Asap",
                                      face = "italic",
                                      size = 8,
                                      hjust = 1,
                                      margin = margin(5, 0, 0, 0)),
          axis.ticks = element_blank(),
          axis.title = element_text(family = "Asap SemiBold",
                                   size = 12,
                                   face = "plain",
                                   margin = margin(10, 10, 10, 10)),
          axis.text.x = element_text(family = "Asap",
                                     size = 10,
                                     face = "plain",
                                     margin = margin(0, 0, 5, 0)),
          axis.text.y = element_text(family = "Asap",
                                     size = 10,
                                     face = "plain",
                                     margin = margin(0, 0, 0, 5)),          
          legend.title = element_text(family = "Asap SemiBold",
                                      size = 10,
                                      color = "#3F3F3F"),
          legend.text = element_text(family = "Asap",
                                     size = 10,
                                     color = "#3F3F3F"),
          strip.text = element_text(family = "Asap Medium",
                                    size = 10,
                                    color = "#3F3F3F",
                                    margin = margin(10, 10, 10, 10)))
}

# Set custom colors
theme_purple <- "#5D2BF0"
theme_orange <- "#FF810F"
theme_green <- "#4DAF4A"
theme_blue <- "#377EB8"
theme_bg_gray <- "#DDDDDD"
text_black <- "#000000"

Introduction

Investment in renewable energy is such a fascinating policy area. It’s one of the few that feature the interplay of energy as a national utility, yet with semi-reliance on individuals and corporations as contributors to industry supply - e.g. in installing solar panels or, at a greater scale, solar farms.

Much of the national conversation around renewable energy today tends to revolve around switching from gasoline power to low-emission alternatives (say, a regular petrol car vs. a Tesla, or a city transit system switching to electric buses). In many cases, this makes sense - the Department of Energy states that “electricity is less expensive than gasoline and [electric vehicles] are more efficient than gasoline vehicles”.

More efficient or not, however, this still shifts the energy load from gasoline to the electrical grid, which itself is mostly generated from non-renewable sources. The generation of energy from renewable sources is a critical part of reducing emissions when we as a species increasingly consume more and more energy (looking at you, bitcoin miners). The question I’m more preoccupied with is: where is all this energy coming from?

Where all this energy is coming from

First, we’ll take a look at data on solar photovoltaic energy generated by the commercial, industrial, power and residential sectors, courtesy of the U.S. Energy Information Administration’s State Energy Data Systems (SEDS) database.

Why focus on solar energy? Of all the common sources of renewable energy - solar, wind, hydroelectric, geothermal, etc. - solar energy fascinates me because it’s so participatory. You can’t install a wind turbine in your backyard or dam up the local river, but you could invest in a set of solar panels for your house fairly easily. In the SEDS data, solar energy is also the only renewable source of energy to have a line item for “generation by small-scale applications in the residential sector”.

gen %>%
  group_by(Year, source, energy_type) %>%
  summarize(total_btu = sum(Data)) %>% 
  mutate(full_source = paste0(energy_type, ", ", source)) %>%
  mutate(highlight_col = ifelse(source == "Residential", "2",
                              ifelse(energy_type == "Solar", "1", "0"))) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = Year, y = total_btu, group = full_source, color = highlight_col)) +
  geom_line(show.legend = FALSE, 
            na.rm = TRUE) +
  scale_color_manual(values = c(theme_bg_gray, theme_orange, theme_purple)) +
  labs(title = "Residential systems are the 2nd-largest source of solar energy",
       subtitle = "While utility-owned solar farms were the largest generators of solar energy in 2016, small-scale \nresidential systems overtook the commercial sector in 2014 to become the 2nd-largest source \nof solar energy and the 5th-largest source of renewable energy overall.",
       x = "Year",
       y = "Log Annual Energy Generated (BTUs)",
       caption = "Data source: SEDS (U.S. Energy Information Administration)") +
  geom_text_repel(data = . %>% filter(Year == 2016),
                  aes(x = Year, y = total_btu, label = full_source),
                  size = 3,
                  hjust = 0,
                  nudge_x = 1,
                  family = "Asap Medium",
                  show.legend = FALSE,
                  na.rm = TRUE) +
  scale_y_log10(breaks = c(10, 100, 1000, 10000, 100000),
                labels = c("10", "100", "1K", "10K", "100K")) +
  scale_x_continuous(limits = c(1960, 2030),
                     breaks = seq(1960, 2010, 10)) +
  annotate(geom = "text",
           x = 2017, y = 5,
           label = "Solar energy is the only \nrenewable energy source in \nthe data with a Residential \ncategory.",
           size = 3,
           lineheight = 1,
           color = "#000000",
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic") +
  theme_custom() +
  theme(panel.grid.major.x = element_blank())

ggsave(here('output', '01-gen-by-source.png'), width = 8, height = 6)

A logical next question is: who’s actually capturing all this solar energy?

gen %>%
  filter(Year == 2016) %>%
  filter(source == "Residential") %>%
  group_by(StateCode) %>%
  summarize(total_btu = sum(Data)) %>%
  left_join(pop %>%
              filter(year == 2016) %>%
              ungroup() %>%
              select(state, total_pop),
            by = c("StateCode" = "state")) %>%
  mutate(btu_per_10k = (total_btu / total_pop) * 10000) %>%
  mutate(dev_fr_median = btu_per_10k / median(btu_per_10k, na.rm = TRUE)) %>%
  filter(dev_fr_median != 0) %>% # SD and ND mess with log scale; omit
  mutate(highlight = ifelse(dev_fr_median > 1, TRUE, FALSE)) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = reorder(StateCode, dev_fr_median), y = dev_fr_median, color = highlight)) +
  geom_point(stat = 'identity', 
             show.legend = FALSE,
             na.rm = TRUE) + 
  geom_text(aes(label = StateCode, 
                 y = ifelse(dev_fr_median > 1, dev_fr_median * 1.2, dev_fr_median / 1.2)),
            show.legend = FALSE,
            size = 3) +
  scale_y_log10(position = "right",
                breaks = c(0.01, 0.1, 1, 10, 100),
                labels = c("0.01x", "0.1x", "1x", "10x", "100x")) +
  scale_x_discrete(expand = c(0.03, 0)) +
  scale_color_manual(values = c(theme_purple, theme_orange)) +
  labs(title = "Hawaii generates over 50x more solar energy per capita than natl. median",
       subtitle = "California was the single largest generator of solar energy from small-scale residential systems, \nproducing 48.6% of all solar energy in the US in 2016. After controlling for state population, however, \nHawaii emerges as the state with the largest solar energy generation per capita.",
       x = "", 
       y = "Solar Energy per Capita Generated, Relative to U.S. Median",
       caption = "Data source: SEDS (U.S. Energy Information Administration)") +
  annotate("segment", 
           x = "HI", xend = "HI", 
           y = 1.2, yend = 3.2, 
           arrow = arrow(length = unit(0.2, "cm")),
           color = text_black) +
  annotate("segment", 
           x = "HI", xend = "HI",
           y = 0.8, yend = 0.3, 
           arrow = arrow(length = unit(0.2, "cm")),
           color = text_black) +
  annotate("text", 
           x = "AZ", y = 1.2, 
           label = "Generated \nmore solar energy \nthan median",
           family = "Asap",
           fontface = "italic",
           color = text_black, 
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1) +
  annotate("text", 
           x = "AZ", y = 0.8, 
           label = "Generated \nless solar energy \nthan median",
           family = "Asap",
           fontface = "italic",
           color = text_black, 
           size = 3,
           lineheight = 1,
           hjust = 1, vjust = 1) +
  annotate("text",
           x = "NE", y = 12,
           label = "Note: \nSouth Dakota and North Dakota \nare omitted above, having \nproduced no solar energy in 2016.",
           family = "Asap",
           fontface = "italic",
           color = text_black,
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1) +
  annotate("text",
           x = "NC", y = 1.2,
           label = "Iowa, the median state, \ngenerated 0.064 BTUs \nof solar energy per \n10k people in 2016.",
           family = "Asap",
           fontface = "italic",
           color = text_black,
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1) +
  annotate("segment",
           x = 22.5, xend = "IA",
           y = 1.2, yend= 1,
           color = text_black,
           size = 0.2) +
  annotate("text",
           x = "VT", y = 20,
           label = "Hawaii generated 3.35 BTUs \nof solar energy per 10k \npeople in 2016 - about \n52 times that of Iowa.",
           family = "Asap",
           fontface = "italic",
           color = text_black,
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1) +
  annotate("segment",
           x = 45.5, xend = "HI",
           y = 40, yend = 52.55365894,
           color = text_black,
           size = 0.2) +
  theme_custom() + 
  theme(panel.grid.major.y = element_blank(),
        axis.text.y = element_blank()) +
  coord_flip()

ggsave(here('output', '02-gen-by-state-mul.png'), width = 8, height = 8)

Unsurprisingly, California was the single largest residential generator of solar energy in 2016 - in fact, California and Arizona combined generated more than half of the national total. What is a bit more of a surprise is how high on the list the Northeast states like Vermont (#5), Massachussetts (#6), and New Jersey (#7) lie, especially in comparison to known sunny states like Texas (#23) or Florida (#27).

That brings me to my next question: does the amount of sunlight each state receives correlate with the solar energy it generates?

Do sunny states generate more solar energy?

For this, we’ll use solar irradiance data from the Department of Energy’s National Renewable Energy Laboratory. Specifically, we’re looking at Global Horizontal Irradiance (GHI), which is the amount of solar radiation a surface receives if placed horizonal to the ground, measured in kilowatt-hours per square meter per day (\(kWh/m^2/day\)). Since we’re dealing with residential installations here, roofs might be sloped in a particular direction, but we’ll assume that they more or less average out to horizontal position in the full dataset.

gen %>%
  filter(Year == 2016) %>%
  filter(source == "Residential") %>%
  group_by(StateCode) %>%
  summarize(total_btu = sum(Data)) %>%
  left_join(ghi %>% 
               mutate(StateCode = state.abb[match(State, state.name)]) %>%
               filter(period == "Ann", !is.na(avg)) %>%
               select(StateCode, avg),
             by = c("StateCode" = "StateCode")) %>% 
  left_join(pop %>%
              filter(year == 2016) %>%
              ungroup() %>%
              select(state, total_pop),
            by = c("StateCode" = "state")) %>%
  filter(total_btu != 0) %>%  # SD and ND mess with log scale; omit
  mutate(btu_per_10k = (total_btu / total_pop) * 10000) %>%
  mutate(region = state.region[match(StateCode, state.abb)]) %>%
  mutate(highlight = ifelse(region == "Northeast", "Northeast", "North Central, \nSouth & West")) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = avg, y = btu_per_10k, label = StateCode, color = highlight)) +
  geom_point(show.legend = FALSE,
             na.rm = TRUE) +
  geom_text_repel(show.legend = FALSE,
                  na.rm = TRUE,
                  family = "Asap") +
  geom_smooth(aes(x = avg, y = btu_per_10k),
              method = "lm",
              color = "black",
              size = 0.5,
              linetype = "dashed",
              se = FALSE,
              inherit.aes = FALSE,
              na.rm = TRUE) +
  scale_y_log10(expand = c(0.1, 0),
                breaks = c(0.01, 0.1, 1, 10),
                labels = c("0,01", "0.1", "1", "10")) +
  scale_color_manual(values = c(theme_bg_gray, theme_purple),
                     name = "Region") +
  labs(title = "Northeast states generate more solar energy from less sunlight",
       subtitle = "Relative to states in the North Central, South, and West regions, states in the Northeast generated \nmore solar energy in 2016 than what their solar irradiance levels might suggest.",
       x = "Average Annual Global Horizonal Irradiance (kWh/m^2/day)",
       y = "Solar Energy Generated in 2016 (BTUs per 10K people)",
       caption = "Data sources: NREL (U.S. Dept of Energy), SEDS (U.S. Energy Information Administration)") +
  annotate("segment", 
           x = 3.8, xend = 3.55, y = 8, yend = 8,
           arrow = arrow(length = unit(0.2, "cm")),
           color = text_black) +
  annotate("segment", 
           x = 5.45, xend = 5.7, y = 8, yend = 8,
           arrow = arrow(length = unit(0.2, "cm")),
           color = text_black) +
  annotate("text",
           x = 3.8, y = 6, 
           label = "Gets less sun",
           color = text_black,
           family = "Asap",
           fontface = "italic",
           size = 3, 
           hjust = 1) +
  annotate("text", 
           x = 5.45, y = 6, 
           label = "Gets more sun",
           color = text_black,
           family = "Asap",
           fontface = "italic",
           size = 3, 
           hjust = 0) +
  annotate("text",
           x = 3.7, y = 2,
           label = "Northeast states",
           color = theme_purple,
           family = "Asap Bold",
           size = 4,
           hjust = 0) +
  annotate("text",
           x = 5.0, y = 0.035,
           label = "North Central, \nSouth & West \nstates",
           lineheight = 1,
           color = theme_bg_gray,
           family = "Asap Bold",
           size = 4,
           hjust = 0.5, vjust = 1) +
  theme_custom() +
  theme(panel.grid.major.x = element_blank())

ggsave(here('output', '03-ghi-vs-gen.png'), width = 8, height = 6)

A rough upward trend seems to hold for the Western states, although there seems to be a lot of unexplained variation towards the lower end of the irradiance range. Among the Northeast states in red above, Vermont, Massachussetts, and New Jersey in particular are generating way more solar energy than neighboring states that get just as much sunlight, if not more.

While there are plenty of factors that impact the efficiency with which you generate solar energy - tilt, weather, temperature, age of technology, etc. - my immediate guess is that the most straightforward way to ramp up your output is to have more solar panels.

Which states have the most solar panels?

To answer this question, we’ll use historical solar panel installation data from OpenPV, the open-source database maintained by NREL.

An easy way for a state to have lots of solar panels is to have lots of people, but that’s not a particularly novel insight. Thus, a critical second step is to normalize our solar panel counts by population size - this lets us identify the states with high numbers of solar panels relative to how many people they have.

This data is normally collected by the Census bureau, but the National Cancer Institute has a nicely-bundled dataset that combines census counts and inter-census estimates from 1969 to 2016. So that is the dataset we’ll use to wrangle both solar panel data into count-per-10,000-people estimates.

Now, let’s briefly check which states have the largest number of residential solar panels installed relative to the number of residents they have.

panels_by_pop %>%
  mutate(region = state.region[match(state, state.abb)]) %>%
  mutate(region = factor(region,
                         levels = c("West", "Northeast", "South", "North Central"))) %>%
  mutate(highlight = ifelse(region == "Northeast", TRUE, FALSE)) %>%
  filter(year == 2016) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = reorder(state, -panels_per_10k), 
             y = panels_per_10k,
             fill = highlight)) +
  geom_bar(stat = 'identity') +
  geom_text(aes(label = state,
                y = ifelse(panels_per_10k >= 1, 
                           panels_per_10k * 1.6,
                           panels_per_10k * 0.6),
                color = highlight),
            angle = 90,
            show.legend = FALSE) +
  scale_fill_manual(name = "Region",
                    breaks = TRUE,
                    labels = "Northeast",
                    values = c(theme_bg_gray, theme_purple),
                    na.value = theme_bg_gray) +
  scale_color_manual(values = c(theme_bg_gray, theme_purple),
                    na.value = theme_bg_gray) +
  scale_y_log10(breaks = c(0.01, 0.1, 1, 10, 100),
                labels = c("0.01", "0.1", "1", "10", "100")) +
  labs(title = 'Northeast states are surprisingly well-represented in solar panel counts',
       subtitle = 'Adjusting for state population size, MA, NJ, and CT have the 3rd, 5th and 7th-most residential \nsolar panel installations per capita respectively.',
       x = 'State',
       y = 'Log Panels per 10,000 people',
       caption = 'Data sources: NREL (U.S. Dept of Energy), National Cancer Institute') +
  annotate("segment", 
           x = 38.5, xend = Inf,
           y = 0.1, yend = 0.1,
           color = "#FFFFFF") +
  annotate("segment",
           x = 0.5, xend = 13.5,
           y = 10, yend = 10,
           color = "#FFFFFF") +
  annotate("segment",
           x = 0.5, xend = 2.5,
           y = 100, yend = 100,
           color = "#FFFFFF") +
  theme_custom() +
  theme(axis.text.x = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.position = c(0.85, 0.97),
        legend.direction = "horizontal")

ggsave(here('output', '04-panels-by-pop-state.png'), width = 8, height = 6)

Sure enough - Northeast states like Massachussetts, New Jersey, and Connecticut are among states with the most solar panels per capita in 2016. The question now becomes - why? As we saw before, the Northeast region as a group receives the lowest annual solar irradiance among all the states. I definitely wouldn’t go to Boston for a beach weekend.

# Specify map bounds 
bounds <- tibble(LONG = c(-122, -60),
                 LAT = c(22, 46)) %>%
  st_as_sf(coords = c("LONG", "LAT"),
           crs = 4326) %>%
  st_transform(crs = 2163) %>%
  st_coordinates()

# Plot
county_map %>%
  st_transform(2163) %>%
  mutate(STATEFP = as.character(STATEFP),
         GEOID = as.character(GEOID)) %>%
  select(STATEFP, COUNTYFP, GEOID, geometry) %>%
  left_join(us_states %>%
              select(fips, postal) %>%
              mutate(fips = substr(fips, 3, 4)) %>%
              st_set_geometry(NULL),
            by = c("STATEFP" = "fips")) %>%
  left_join(ghi %>%
              filter(period == "Ann") %>%
              select('State FIPS', avg) %>%
              rename(StateFIPS = 'State FIPS') %>%
              mutate(StateFIPS = as.character(
                ifelse(nchar(StateFIPS) == 1, 
                       paste0('0', StateFIPS),
                       StateFIPS)
              )),
            by = c("STATEFP" = "StateFIPS")) %>%
  left_join(zip_codes %>%
              select(fips, zip),
            by = c("GEOID" = "fips")) %>%
  left_join(panels %>%
              group_by(zipcode) %>%
              count(),
            by = c("zip" = "zipcode")) %>%
  left_join(counties %>%
              mutate(geoid = paste0(state_fips, county_fips)) %>%
              select(geoid, population),
            by = c("GEOID" = "geoid")) %>%
  group_by(GEOID) %>%
  mutate(total_n = sum(n, na.rm = TRUE)) %>%
  mutate(panels_per_10k = 10000 * (total_n / population)) %>%
  mutate(panels_per_10k = ifelse(is.na(panels_per_10k), 0, panels_per_10k)) %>%
  select(GEOID, avg, panels_per_10k, geometry) %>%
  unique() %>%
  mutate(cent = st_centroid(geometry)) %>%
  arrange(desc(panels_per_10k)) %>% 
  ##### PLOT BEGINS
  ggplot() +
  geom_sf(fill = "#FFFFFF",
          color = theme_bg_gray,
          size = 0.1) +
  geom_sf(aes(geometry = cent,
              size = panels_per_10k,
              fill = avg),
          colour = "#FFFFFF",
          alpha = 0.8,
          pch = 21,
          show.legend = "point",
          na.rm = TRUE) +
  scale_fill_gradient(low = theme_purple, high = theme_orange,
                      na.value = "#FFFFFF",
                      name = "Annual Solar \nRadiation \n(kWh/m^2/day)",
                      guide = guide_colorbar(
                        barheight = unit(40, units = "mm"),
                        barwidth = unit(3, units = "mm"),
                        title.hjust = 0,
                        ticks.linewidth = 1
                      )) +
  scale_size_area(name = "Panels per 10K \nper county",
             guide = guide_legend(
               override.aes = list(shape = 16, color = "#000000"),
               title.hjust = 0
             )) +
  coord_sf(xlim = bounds[, 1], 
         ylim = bounds[, 2],
         expand = FALSE) +
  labs(title = "Large panels-per-capita in the Northeast, despite low annual solar radiation",
       subtitle = "While the Northeast region receives the least solar irradiance in the US, it also has counties with \ndisproportionately high solar panel installation rates.",
       caption = "Data source: NREL (U.S. Dept of Energy) \nNote: HI and AK are omitted due to lack of comparable solar resource data available.") +
  annotate("text",
           x = 2300000, y = -150000,
           label = "Dukes County, MA \nhas 460 panels per \n10K residents, vs. \nnational avg. of 28",
           family = "Asap",
           fontface = "italic",
           size = 2,
           lineheight = 1,
           hjust = 0, vjust = 1) +
  annotate("segment",
           x = 2500000, xend = 2400000, y = -100000, yend = 0,
           color = "#000000",
           size = 0.2) +
  theme_custom() +
  theme(legend.title = element_text(size = 8),
        panel.grid.major = element_line(color = "#FFFFFF"),
        axis.text = element_blank(),
        axis.title = element_blank())

ggsave(here('output', '05-panels-vs-ghi-map.png'), width = 8, height = 6)

A simple hypothesis is that some of these states pursue more “green” policies compared to other states. In order to compare them, however, how do we quantify a state’s commitment to environmental policy?

One way we’ll explore below is to look at how they spend their money. Specifically, many states create financial incentives for residents to invest in solar panels by offering loan programs, surplus solar energy buybacks (more on this later), and otherwise allowing for tax exemptions or credits. Which states do this, and to what extent?

Have state tax incentives boosted solar panel adoption?

Here, I’m interested in the effect of state and federal policies that provide tax incentives for installing renewable energy systems. We know that the federal solar tax credit incentive was implemented in 2005 and is still in effect. For individual states, we’ll have to look at data from the Database of State Incentives for Renewables & Efficiency (DSIRE) maintained by N.C. State University.

Let’s look at which states provided renewable tax incentives specifically for residential photovoltaic systems (i.e. household-scale solar panels) and when:

policy %>%
  filter(sector == "Residential") %>% 
  filter(!program_type %in% c("Corporate Tax Credit")) %>%
  select(state, start_date, end_date) %>%
  mutate(start_year = year(start_date),
         end_year = ifelse(end_date > now(), NA, year(end_date))) %>% 
  select(state, start_year, end_year) %>%
  group_by(state) %>% 
  mutate(start_year = min(start_year), 
         end_year = max(end_year)) %>%
  mutate(end_year = ifelse(is.na(end_year), 2019, end_year)) %>%
  unique() %>% 
  ungroup() %>% 
  rowwise() %>%
  do(data.frame(state = .$state, 
                year = seq(.$start_year, .$end_year, by = 1))) %>%
  mutate(policy = 1) %>%
  complete(nesting(state), year, fill = list(policy = 0)) %>%
  mutate(region = state.region[match(state, state.name)]) %>%
  mutate(region = factor(region, levels = c("West", "Northeast", "South", "North Central"))) %>%
  mutate(stateabb = state.abb[match(state, state.name)]) %>%
  filter(!is.na(stateabb)) %>%
  group_by(state) %>%
  mutate(num_years = sum(policy)) %>%
  mutate(policy = ifelse(policy == 1, TRUE, FALSE)) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = year, y = reorder(stateabb, num_years))) +
  geom_tile(aes(fill = policy),
            color = "white",
            size = 0.5) +
  scale_fill_manual(breaks = c(TRUE),
                    values = c(theme_bg_gray, theme_orange),
                    name = "Residential ITCs in Effect?") +
  scale_x_continuous(position = "top",
                     breaks = c(1980, 1990, 2000, 2010)) +
  labs(title = "Renewable tax incentives only took off in mid-2000s",
       subtitle = "Massachussetts, New York, and Iowa were the first states to offer tax incentives to households \nthat installed solar panels. Most other states followed after the federal Solar Investment Tax \nCredits (ITCs) began in 2006.",
       x = "Year",
       y = "State",
       caption = "Data source: DSIRE (NC State Univ.)") +
  annotate("rect",
           xmin = 1975.5, xmax = 1987.5, ymin = 36.5, ymax = 40.5,
           color = "#FFFFFF",
           fill = "#FFFFFF", 
           alpha = 0.7) +
  annotate("text",
           x = 1976, y = "WY",
           label = "MA, NH, and IA first introduced \nproperty tax exemptions for \nsolar panel owners between \n1975 and 1978.",
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic",
           color = text_black) +
  annotate("rect",
           xmin = 1992.5, xmax = 2003.5, ymin = 21.5, ymax = 25.5,
           color = "#FFFFFF",
           fill = "#FFFFFF",
           alpha = 0.7) +
  annotate("text",
           x = 1993, y = "WA",
           label = "More states began offering \ntax incentives in the early \n2000s, most of which have \ncontinued to present day.",
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic",
           color = text_black) +
  annotate("rect",
           xmin = 1995.5, xmax = 2007.5, ymin = 5.5, ymax = 9.5,
           color = "#FFFFFF",
           fill = "#FFFFFF",
           alpha = 0.7) +
  annotate("text",
           x = 1996, y = "WI",
           label = "In other states, budgets for \nincentives have run out and \nthe bills providing funding for \nthem have not been renewed.",
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic",
           color = text_black) +
  theme_custom() +
  theme(panel.grid = element_blank(),
        axis.title.y = element_blank(),
        legend.position = c(0.8, 0.93),
        legend.background = element_rect(fill = "#FFFFFF",
                                         linetype = "blank"),
        axis.text.y.left = element_text(margin = margin(0, 0, 0, 20)),
        axis.ticks.x.top = element_line(color = theme_bg_gray))

ggsave(here('output', '06-policy-by-state-year.png'), width = 8, height = 10)

It appears that the availability of renewable ITCs only tell part of the story. Among the states that we were previously interested in, Massachussetts (along with Iowa and New Hampshire) was among the earliest states in the country to implement tax incentive programs in the form of property tax exemptions between 1975 and 1980. New York only implemented a similar property tax exemption in 1988. Meanwhile, the earliest incentive program available to the residential sector in New Jersey was the adoption of Solar Renewable Energy Certificates (SREC) in 2004, where the state could purchase surplus solar energy from households via SRECs in order to meet the renewable energy production quota set in its Renewable Portfolio Standard.

How does the availability of these incentives correspond with solar panel installation rates in each state? Do installation rates increase when they’re financially incentivized?

# Plot of solar panel installation growth vs. 
panels_by_pop %>%
  select(state, year, panels_per_10k) %>%
  mutate(region = state.region[match(state, state.abb)]) %>%
  ##### PLOT BEGINS 
  ggplot() +
  geom_step(aes(x = year, y = panels_per_10k)) +
  geom_rect(data = policy %>%
              filter(sector == "Residential") %>%
              filter(!program_type %in% c("Corporate Tax Credit")) %>%
              select(state, start_date, end_date) %>%
              mutate(state = state.abb[match(state, state.name)]) %>%
              filter(!is.na(state)) %>%
              mutate(region = state.region[match(state, state.abb)]) %>%
              mutate(region = factor(region,
                       levels = c("North Central", "West", "Northeast", "South"))) %>%
              mutate(start_year = year(start_date),
                     end_year = ifelse(end_date > now(), NA, year(end_date))) %>%
              select(state, start_year, end_year, region) %>%
              group_by(state) %>%
              mutate(start_year = min(start_year),
                     end_year = max(end_year)) %>%
              mutate(end_year = ifelse(is.na(end_year), 2019, end_year)) %>%
              unique(),
            aes(xmin = start_year, xmax = end_year, ymin = 0, ymax = Inf,
                fill = region), 
            alpha = 0.5) +
  facet_geo(~ state) +
  scale_y_log10(breaks = c(0.01, 1, 100),
                labels = c(0.01, 1, 100)) +
  scale_x_continuous(breaks = c(1970, 1990, 2010),
                     labels = c("'70", "'90", "'10")) +
  scale_fill_manual(name = "Region",
                    values = c(theme_orange, theme_blue, theme_purple, theme_green),
                    guide = guide_legend(nrow = 2,
                                         title.position = "top",
                                         title.hjust = 0)) +
  labs(title = "No clear link between residential tax incentives and solar panel uptake",
       subtitle = "Most states appear to have periods of solar panel growth unrelated to the period at which state-based tax incentives \nfor residential renewable energy systems are available in that state. These periods are highlighted below.",
       x = "Year",
       y = "Log Panels per 10,000 people",
       caption = "Data sources: NREL (US Dept of Energy), DSIRE (NC State Univ.)") +
  theme_custom() +
  theme(strip.text = element_text(margin = margin(2, 0, 2, 0)),
        legend.direction = "horizontal",
        legend.position = c(0.12, 0.97))

ggsave(here('output', '07-panels-vs-policy.png'), width = 10, height = 8)

It appears that the periods where state-based financial incentives were available do not meaningfully coincide with periods of rapid solar panel installation growth. We are, however, painting with a very broad brush - the periods we’ve been looking at have been for all types of state incentives, be they property tax exemptions, tax credits, low-interest financing plans, etc. We’ll need a bit more contextual research to be able to meaningfully distinguish between them.

Meanwhile, installing a solar panel system may or may not be environmentally-motivated, but we can safely say it is a decision with a large financial component. Buyers put up with a high fixed cost upfront in order to enjoy long-term savings in the future, so the cheaper the cost of installation, the more likely installing solar panels will make financial sense. That suggests we should look at other factors impacting the cost of installing solar panels. We can start with the most obvious one - the cost of the panels themselves.

How have solar panel costs changed?

As with any technology, we can generally expect efficiency and capacity to rise over time. Since solar panels have at least grown in their literal efficiency (i.e. amount of energy generated from the same amount of sunlight), it makes more sense to look at how much it costs to generate a single watt of electricity and how that’s changed over time.

panels %>%
  filter(install_type == "residential") %>%
  filter(year_installed >= 1997) %>% 
  mutate(year_installed = factor(year_installed)) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = cost_per_watt, y = fct_rev(year_installed))) +
  geom_density_ridges_gradient(aes(fill = ..x..),
                      color = "#FFFFFF",
                      na.rm = TRUE,
                      show.legend = FALSE,
                      scale = 5) +
  scale_x_continuous(limits = c(0, 25),
                     breaks = seq(0, 30, 5),
                     labels = c("$0", "$5", "$10", "$15", "$20", "$25", "$30")) +
  scale_fill_gradient(low = theme_purple, high = theme_orange,
                      name = "Cost per watt",
                      breaks = seq(0, 30, 10),
                      labels = c("$0", "$10", "$20", "$30")) + 
  labs(title = "Residential solar panels have become steadily cheaper",
       subtitle = "When accounting for a solar panel's generative capacity (using cost-per-watt), the cost of \nresidential solar panels have decreased and become more consistent over the last 20 years.",
       x = "Cost per watt",
       caption = "Data sources: NREL (U.S. Dept of Energy)") +
  annotate("text",
           x = 18.5, y = 18,
           label = "Prices in the late 1990s and \nearly 2000s varied wildly.",
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic",
           color = text_black) +
  annotate("text",
           x = 11, y = 5,
           label = "By the late 2010s, prices decreased to \nunder $5 per watt and their spread \ndiminished, suggesting more \nstandardized pricing.",
           size = 3,
           lineheight = 1,
           hjust = 0, vjust = 1,
           family = "Asap",
           fontface = "italic",
           color = text_black) +
  theme_custom() +
  theme(panel.grid.major.y = element_blank(),
        axis.title.y = element_blank())

ggsave(here('output', '08-panel-cpw-by-year.png'), width = 8, height = 6)

While panels have gotten cheaper overall, what does this change look like for specific states? In particular, how has cost-per-watt changed for the three Northeast states we identified above - Connecticut, Massachussetts, and New Jersey - that generated disproportionately high levels of solar energy relative to the solar irradiance they received?

panels %>%
  filter(install_type == "residential") %>%
  filter(year_installed >= 1990) %>%
  group_by(state, year_installed) %>%
  summarize(med_cpw = median(cost_per_watt, na.rm = TRUE)) %>%
  ##### PLOT BEGINS
  ggplot(aes(x = year_installed, y = med_cpw)) +
  geom_step(aes(color = state),
            alpha = 0.8,
            na.rm = T,
            show.legend = FALSE) +
  gghighlight(state %in% c("CT", "MA", "NJ"),
              unhighlighted_colour = theme_bg_gray,
              use_group_by = F,
              use_direct_label = F) +
  facet_wrap(~ state) +
  scale_color_manual(values = c(theme_orange, theme_green, theme_purple)) +
  scale_y_continuous(labels = c("$0", "$5", "$10", "$15", "$20")) +
  labs(title = "CT, MA, NJ still among more expensive states for solar panels",
       subtitle = "Looking at how the median cost-per-watt has changed for each state over the last 20 years, \nsolar generation in CT, MA and NJ - three of the Northeast states that showed solar panel \nownership disproportionate to their solar irradiance - is also relatively high.",
       x = "Year",
       y = "Median cost per watt",
       caption = "Data source: NREL (U.S. Dept of Energy)") +
  theme_custom() +
  theme(panel.grid.major.x = element_blank())

ggsave(here('output', '09-panel-cpw-nestates.png'), width = 8, height = 6)

Interestingly enough, when looking at the path of each state’s median cost-per-watt over the course of the past 20 years, CT, MA, and NJ all have had median costs on the high end of each year’s distribution. This places a wrench in my original hypothesis. It seems like we might need different data - say, cost-per-watt relative to each state’s median income.

Next steps

As an ongoing project, I’d next like to look at the following factors as possible correlates of solar panel installation rates:

  • Cost of electricity in each state

  • A deeper look into the types of state tax incentives enacted, i.e. whether it was a tax deduction, a tax credit, a tax exemption, a low-interest loan program, size of the state’s renewable portfolio standard, and so on.