Covid19 cases by US State

The chart above shows how average daily cases per 100,000 people have changed in different parts of the United States. The state with the highest recent average cases per 100,000 people is included for reference.

This chart was modelled after cases by region plot from the New York Times. For this visualisation we've used open sources to get our data along with a few calculations for the aggregates.

All credit for the design and data goes to the amazing people over at NYT, any mistakes are by us.

import pandas as pd     # everything is easier with pandas
import numpy as np      # numpy for some number manipulation
from novem import Plot  # get the novem plot

# N.B. below we are fetching live data hosted by others
#      be aware that this information can change at any time
#      so consider hosting a local copy

# get US covid data standardized by NYT
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/"
  "covid-19-data/master/rolling-averages/us-states.csv")

# get state to region mapping
cmap = pd.read_csv("https://raw.githubusercontent.com/cphalpert/"
  "census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv")

# enrich nyt rolling with state code and region
data = pd.merge(df, cmap[['State','Region']], 
        left_on='state', 
        right_on='State',
        how='left')

# per usual we're missing some geo mapping, but for now we will ignore 
# those locations. You can use the below commented out code to review the
# missing states
# missing = data.loc[pd.isna(data.Region),'state'].unique()

# remove missing data
data = data.loc[~pd.isna(data.Region)].copy()

# let's use our cases and cases per 100k as a rough proxy for state 
# population size
data['pop'] = data['cases_avg'] / (data['cases_avg_per_100k']/1e5)

# populate missing datapoints with na
data.loc[np.isinf(data['pop']), 'pop'] = np.nan

# no case data is no cases
data = data.fillna(0)

# carry population backwards
data['pop'] = data.groupby('state')['pop'].fillna(method='bfill')

# let's create region aggregates
pdata = pd.pivot_table(data,
  index='date',
  columns='Region',
  values=['cases_avg', 'pop'],
  aggfunc='sum'
)

# create aggregate cases avg per 100k
df = ((pdata['cases_avg'] / pdata['pop'])*1e5).dropna()

# let's create state overview as well
sdata = pd.pivot_table(data,
  index='date',
  columns='State',
  values='cases_avg_per_100k',
  aggfunc='mean' # there should only be one value
).fillna(0)

# create our filter date
sd = '2020-03-10'
ed = '2021-12-20'


df = df.loc[sd:ed]
sdf = sdata.loc[sd:ed]

# find the current worst state and add it for reference
ws = sdf.iloc[-1].sort_values(ascending=False).index[0]

# add the worst state to the vis for reference
adf = pd.concat([df, sdf[ws]], axis=1)



# construct novem plot, if the name already exists it will
# be updated
linechart = Plot("covid_us_trend",
  type="line",
  title = "Covid19 cases by US State",
  name = "Covid19 cases by US State",
  caption = "This chart shows how average daily cases per capita"
            "have changed in different parts of the United States."
            "The state with the highest recent average cases per "
            f"capita ({ws}) is shown. Data from the New York Times, "
            "calculations by New York Times, Novem. "
            "Data last updated 20 December 2021"
)

# send data to novem
adf.pipe(linechart)

# get novem url 
print(linechart.url) # https://novem.no/p/Kwjdv