I am using BERTopic to perform the topic modelling, everything works perfectly fine. However, since I am forcing the algorithm to give me 10 topics using nr_topics=10 as output, and when I visualize the topics overtime using
topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=10, width=1250, height=450), some colors are repeated for topics as there are only 7 colors mentioned in the function visualize_topics_over_time. I tried executing the same function in my python notebook with additional color values, but it gives me the following error:
Can someone please help me update the function with additional four colors?
To add colors to the function, you will indeed have to copy the function and change it to include more colors:
import pandas as pd
from typing import List
import plotly.graph_objects as go
from sklearn.preprocessing import normalize
def visualize_topics_over_time(topic_model,
topics_over_time: pd.DataFrame,
top_n_topics: int = None,
topics: List[int] = None,
normalize_frequency: bool = False,
width: int = 1250,
height: int = 450) -> go.Figure:
""" Visualize topics over time
Arguments:
topic_model: A fitted BERTopic instance.
topics_over_time: The topics you would like to be visualized with the
corresponding topic representation
top_n_topics: To visualize the most frequent topics instead of all
topics: Select which topics you would like to be visualized
normalize_frequency: Whether to normalize each topic's frequency individually
width: The width of the figure.
height: The height of the figure.
Returns:
A plotly.graph_objects.Figure including all traces
Usage:
To visualize the topics over time, simply run:
```python
topics_over_time = topic_model.topics_over_time(docs, topics, timestamps)
topic_model.visualize_topics_over_time(topics_over_time)
```
Or if you want to save the resulting figure:
```python
fig = topic_model.visualize_topics_over_time(topics_over_time)
fig.write_html("path/to/file.html")
```
<iframe src="../../getting_started/visualization/trump.html"
style="width:1000px; height: 680px; border: 0px;""></iframe>
"""
colors = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#D55E00", "#0072B2", "#CC79A7"] # DEFINE MORE COLORS HERE
# Select topics
if topics:
selected_topics = topics
elif top_n_topics:
selected_topics = topic_model.get_topic_freq().head(top_n_topics + 1)[1:].Topic.values
else:
selected_topics = topic_model.get_topic_freq().Topic.values
# Prepare data
topic_names = {key: value[:40] + "..." if len(value) > 40 else value
for key, value in topic_model.topic_names.items()}
topics_over_time["Name"] = topics_over_time.Topic.map(topic_names)
data = topics_over_time.loc[topics_over_time.Topic.isin(selected_topics), :]
# Add traces
fig = go.Figure()
for index, topic in enumerate(data.Topic.unique()):
trace_data = data.loc[data.Topic == topic, :]
topic_name = trace_data.Name.values[0]
words = trace_data.Words.values
if normalize_frequency:
y = normalize(trace_data.Frequency.values.reshape(1, -1))[0]
else:
y = trace_data.Frequency
fig.add_trace(go.Scatter(x=trace_data.Timestamp, y=y,
mode='lines',
marker_color=colors[index % 7],
hoverinfo="text",
name=topic_name,
hovertext=[f'<b>Topic {topic}</b><br>Words: {word}' for word in words]))
# Styling of the visualization
fig.update_xaxes(showgrid=True)
fig.update_yaxes(showgrid=True)
fig.update_layout(
yaxis_title="Normalized Frequency" if normalize_frequency else "Frequency",
title={
'text': "<b>Topics over Time",
'y': .95,
'x': 0.40,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(
size=22,
color="Black")
},
template="simple_white",
width=width,
height=height,
hoverlabel=dict(
bgcolor="white",
font_size=16,
font_family="Rockwell"
),
legend=dict(
title="<b>Global Topic Representation",
)
)
return fig
Then, you can use the function as follows:
import re
import pandas as pd
from bertopic import BERTopic
# Prepare data
trump = pd.read_csv('https://drive.google.com/uc?export=download&id=1xRKHaP-QwACMydlDnyFPEaFdtskJuBa6')
trump.text = trump.apply(lambda row: re.sub(r"http\S+", "", row.text).lower(), 1)
trump.text = trump.apply(lambda row: " ".join(filter(lambda x:x[0]!="#", row.text.split())), 1)
trump.text = trump.apply(lambda row: " ".join(re.sub("[^a-zA-Z]+", " ", row.text).split()), 1)
trump = trump.loc[(trump.isRetweet == "f") & (trump.text != ""), :]
timestamps = trump.date.to_list()
tweets = trump.text.to_list()
# Create topics over time
model = BERTopic(verbose=True)
topics, probs = model.fit_transform(tweets)
topics_over_time = model.topics_over_time(tweets, topics, timestamps)
# Visualize topics over time with the updated colors
visualize_topics_over_time(model, topics_over_time)
Related
I have made a scatter plot of the word2vec model using plotly.
I want functionality of highlighting the specific data point on hover along with the top 3 nearest vectors to that.
It would be of great help if anyone can guide me with this or suggest any other option
model
csv
Code:
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_df.head()
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
ic_fig.show()
In plotly-python, I don't think there's an easy way of retrieving the location of the cursor. You can attempt to use go.FigureWidget to highlight a trace as described in this answer, but i think you're going to be limited with with plotly-python and i'm not sure if highlighting the closest n points will be possible.
However, I believe that you can accomplish what you want in plotly-dash since callbacks are supported - meaning you would be able to retrieve location of your cursor and then calculate the n closest data points to your cursor and highlight the data points as needed.
Below is an example of such a solution. If you haven't seen it before, it looks complicated, but what is happening is that I am taking the point where you clicked as an input. plotly is plotly.js under the hood so it comes us in the form of a dictionary (and not some kind of plotly-python object). Then I calculate the closest three data points to the clicked input point by comparing the coordinates of every other point in the dataframe, add the information from the three closest points as traces to the input with the color teal (or any color of your choosing), and send this modified input back as the output, and update the figure.
I am using click instead of hover because hover would cause the highlighted points to flicker too much as you drag your mouse through the points.
Also the dash app doesn't work perfectly as I believe there is some issue when you double click on points (you can see me click once in the gif below before getting it to start working), but this basic framework is hopefully close enough to what you want. Cheers!
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
import json
import dash
from dash import dcc, html, Input, Output
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
def get_2d_coordinates(model, words):
arr = np.empty((0,100), dtype='f')
labels = []
for wrd_score in words:
try:
wrd_vector = model.wv.get_vector(wrd_score)
arr = np.append(arr, np.array([wrd_vector]), axis=0)
labels.append(wrd_score)
except:
pass
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
Y = tsne.fit_transform(arr)
x_coords = Y[:, 0]
y_coords = Y[:, 1]
return x_coords, y_coords
ic_model = gensim.models.Word2Vec.load("w2v_IceCream.model")
ic = pd.read_csv('ic_prods.csv')
icx, icy = get_2d_coordinates(ic_model, ic['ITEM_DESC'])
ic_data = {'Category': ic['SUB_CATEGORY'],
'Words':ic['ITEM_DESC'],
'X':icx,
'Y':icy}
ic_df = pd.DataFrame(ic_data)
ic_fig = px.scatter(ic_df, x=icx, y=icy, color=ic_df['Category'], hover_name=ic_df['Words'], title='IceCream Data')
NUMBER_OF_TRACES = len(ic_df['Category'].unique())
ic_fig.update_layout(clickmode='event+select')
app.layout = html.Div([
dcc.Graph(
id='ic_figure',
figure=ic_fig)
])
## we take the 4 closest points because the 1st closest point will be the point itself
def get_n_closest_points(x0, y0, df=ic_df[['X','Y']].copy(), n=4):
"""we can save some computation time by looking for the smallest distance^2 instead of distance"""
"""distance = sqrt[(x1-x0)^2 + (y1-y0)^2]"""
"""distance^2 = [(x1-x0)^2 + (y1-y0)^2]"""
df["dist"] = (df["X"]-x0)**2 + (df["Y"]-y0)**2
## we don't return the point itself which will always be closest to itself
return df.sort_values(by="dist")[1:n][["X","Y"]].values
#app.callback(
Output('ic_figure', 'figure'),
[Input('ic_figure', 'clickData'),
Input('ic_figure', 'figure')]
)
def display_hover_data(clickData, figure):
print(clickData)
if clickData is None:
# print("nothing was clicked")
return figure
else:
hover_x, hover_y = clickData['points'][0]['x'], clickData['points'][0]['y']
closest_points = get_n_closest_points(hover_x, hover_y)
## this means that this function has ALREADY added another trace, so we reduce the number of traces down the original number
if len(figure['data']) > NUMBER_OF_TRACES:
# print(f'reducing the number of traces to {NUMBER_OF_TRACES}')
figure['data'] = figure['data'][:NUMBER_OF_TRACES]
# print(figure['data'])
new_traces = [{
'marker': {'color': 'teal', 'symbol': 'circle'},
'mode': 'markers',
'orientation': 'v',
'showlegend': False,
'x': [x],
'xaxis': 'x',
'y': [y],
'yaxis': 'y',
'type': 'scatter',
'selectedpoints': [0]
} for x,y in closest_points]
figure['data'].extend(new_traces)
# print("after\n")
# print(figure['data'])
return figure
if __name__ == '__main__':
app.run_server(debug=True)
I am trying to build a visual that tracks widget counts by category using hbar. The source data is not aggregated. This is what it looks like:
This data is aggregated at MktCatKey level, but I want to group by category and then perform a calculation on the counts. Lets say if the category is Category_A, I want to add +10 to the counts. Finally, I want to display both current and projected on a visual.
This is how far I have gotten:
query = open('workingsql.sql')
dataset = pd.read_sql_query(query.read(), cnxn)
query.close()
p = figure()
CurrentCount = dataset.Current
ProjCount = dataset.Projected
Cat = dataset.Category
grouped = dataset.groupby('Category')['Current','Projected'].sum()
source = ColumnDataSource(grouped)
p = figure(y_range=Cat)
p.hbar(y=Cat, right = CurrentCount, left = 0, height = 0.5,source=source, fill_color="#D7D7D7")
p.hbar(y=Cat, right = ProjCount, left = 0, height = 0.5,source=source, fill_color="#E21150")
hover = HoverTool()
hover.tooltips = [("Totals", "#Current Current Count")]
hover.mode = 'hline'
p.add_tools(hover)
show(p)
I was able to get this to work if I source directly from the dataset. But since I’m trying to perform a calculation, I cant use the source directly. I’m not fully familiar on how to do an if statement on CurrentCount to see if it’s for Category_A or not but that’s where I’m at.
I have additional things I want to do on this dataset (like bring in a goals dataset and plot against that), but taking small steps for now. Any help is appreciated.
Working code below:
import pyodbc
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, Div, Select, Slider, TextInput
from bokeh.embed import components
from bokeh.models.tools import HoverTool
query = open('workingsql.sql')
dataset = pd.read_sql_query(query.read(), cnxn)
query.close()
p = figure()
CurrentCount = dataset.Current
ProjCount = dataset.Projected
Cat = dataset.Category
grouped = dataset.groupby('Category')['Current','Projected'].sum()
source = ColumnDataSource(pd.DataFrame(grouped))
Category = source.data['Category'].tolist()
p = figure(y_range=Category)
p.hbar(y='Category', right = 'Current', left = 0, height = 0.5,source=source, fill_color="#D7D7D7")
p.hbar(y='Category', right = 'Projected', left = 0, height = 0.5,source=source, fill_color="#E21150")
hover = HoverTool()
hover.tooltips = [("Totals", "#Current Current Count")]
hover.mode = 'hline'
p.add_tools(hover)
show(p)
I would like to create a scattermapbox for indonesia for various statistics (population, GDP, etc.) on a regional basis.
I am working with a geopandas file from github.
The example on the plotly website creates multiple files for each layer and then uses the github link as source.
#republican counties
source = 'https://raw.githubusercontent.com/plotly/datasets/master/florida-red-data.json'
#democrat counties
source = 'https://raw.githubusercontent.com/plotly/datasets/master/florida-blue-data.json'
My question therefore is, how can I use the pandas dataframe to create layer dict for every region and use that as a source (also colouring of each region by specific values in other dataframes).
Should that not be possible at all and it is necessary to create a seperate file for each region how would I do that? My attempt (lines 16-20) doesn't seem to work
import pandas as pd
import json
import string
import plotly
from plotly.graph_objs import Scattermapbox, Layout
ID_regions = pd.read_json('https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/indonesia-edit.geojson')
region_names = []
for region in ID_regions['features']:
region_names.append(state['properties']['name'])
print(region_names)
#This shit creates json and doesn't work
def create_region_files():
for i in range(len(ID_regions)):
region_data = ID_regions.iloc[i,:]
region_data.to_json(f'C:\\Users\\nicho\\Desktop\\Waste Management\\Map_Maker\\ID_regions\\{region_names[i]}.json')
i += 1
def create_Chloropleth():
mapbox_access_token = 'My Access Key'
data = [
Scattermapbox(
lat=['45.5017'],
lon=['-73.5673'],
mode='markers',
)
]
layout = Layout(
height=900,
autosize=True,
showlegend=False,
hovermode='closest',
mapbox=dict(
layers=[
dict(
sourcetype = 'geojson',
source = 'https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/indonesia-edit.geojson',
type = 'fill',
color = 'green'
),
dict(
sourcetype = 'geojson',
source = 'https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/west-sulawesi.json',
type = ' fill',
color = 'red',
)
],
accesstoken=mapbox_access_token,
bearing=0,
center=dict(
lat=0.7893,
lon=113.9213
),
pitch=0,
zoom=4.5,
style='light'
),
)
fig = dict(data=data, layout=layout)
plotly.offline.plot(fig, filename='Chloropleth_Province_Population.html')
create_Chloropleth()
Thank you for the help!
Ok took me a while but i figured it all out. Big thanks to Emma Grimaldi over at Medium and Vince Pota. Their posts were what helped me through most of it.
So here the answers to my own question in order:
It is not necessary to create an individual file for each region. I.e. you can use a pandas dataframe to match names of the regions in the json and that'll work just fine.
with open('indonesia-en.geojson') as f:
geojson = json.load(f)
def make_sources(downsample = 0):
sources = []
geojson_copy = copy.deepcopy(geojson['features']) # do not overwrite the original file
for feature in geojson_copy:
if downsample > 0:
coords = np.array(feature['geometry']['coordinates'][0][0])
coords = coords[::downsample]
feature['geometry']['coordinates'] = [[coords]]
sources.append(dict(type = 'FeatureCollection',
features = [feature])
)
return sources
So you just extract the coordinates from the geojson and append them to a a list of dicts[{}].
How to use this list to dynamically create layers:
MAPBOX_APIKEY = "Your API Key"
data = dict(type='scattermapbox',
lat=lats,
lon=lons,
mode='markers',
text=hover_text,
marker=dict(size=1,
color=scatter_colors,
showscale = True,
cmin = minpop/1000000,
cmax = maxpop/1000000,
colorscale = colorscale,
colorbar = dict(
title='Population in Millions'
)
),
showlegend=False,
hoverinfo='text'
)
layers=([dict(sourcetype = 'geojson',
source =sources[k],
below="water",
type = 'line', # the borders
line = dict(width = 1),
color = 'black',
) for k in range(n_provinces) # where n_provinces = len(geojson['features'])
] +
[dict(sourcetype = 'geojson',
source =sources[k],
type = 'fill', # the area inside the borders
color = scatter_colors[k],
opacity=0.8
) for k in range(n_provinces) # where n_provinces = len(geojson['features'])
]
)
So the solution here is too set sources = sources[k] I.e. the list with the dict of lat/long values created in make_sources()
How to color the layers accordingly color=scatter_colors[k]
Using the linked example I used 3 functions
3.1 scalarmappable
#sets colors based on min and max values
def scalarmappable(cmap, cmin, cmax):
colormap = cm.get_cmap(cmap)
norm = Normalize(vmin=cmin, vmax=cmax+(cmax*0.10)) #vmax get's increased 10 percent because otherwise the most populous region doesnt get colored
return cm.ScalarMappable(norm=norm, cmap=colormap)
3.2 scatter_colors
#uses matplotlib to create colors based on values and sets grey for isnan value
def get_scatter_colors(sm, df):
grey = 'rgba(128,128,128,1)'
return ['rgba' + str(sm.to_rgba(m, bytes = True, alpha = 1)) if not np.isnan(m) else grey for m in df]
3.3 colorscale
#defines horizontal range and corresponding values for colorscale
def get_colorscale(sm, df, cmin, cmax):
xrange = np.linspace(0, 1, len(df))
values = np.linspace(cmin, cmax, len(df))
return [[i, 'rgba' + str(sm.to_rgba(v, bytes = True))] for i,v in zip(xrange, values) ]
Then variables using the functions are set
#assigning values
colormap = 'nipy_spectral'
minpop = stats['population'].min()
maxpop = stats['population'].max()
sources = make_sources(downsample=0)
lons, lats = get_centers()
sm = scalarmappable(colormap, minpop, maxpop)
scatter_colors = get_scatter_colors(sm, stats['population'])
colorscale = get_colorscale(sm, stats, minpop, maxpop)
hover_text = get_hover_text(stats['population'])
So if anyone had some problems with this answer can help you progress :)
I'm trying to create an interactive timeseries chart with more than 20 lines of data using the Altair module in Python.
The code to create the dataframe of the shape I'm looking at is here:
import numpy as np
import altair as alt
year = np.arange(1995, 2020)
day = np.arange(1, 91)
def gen_next_number(previous, limit, max_reached):
if max_reached:
return np.NAN, True
increment = np.random.randint(0, 10)
output = previous + increment
if output >= 100:
output = 100
max_reached = True
return output, max_reached
def gen_list():
output_list = []
initial = 0
limit = 100
max_reached = False
value = 0
for i in range(1, 91):
value, max_reached = gen_next_number(value, limit, max_reached)
if max_reached:
value = np.NAN
output_list.append(value)
return output_list
df = pd.DataFrame(index = day, columns=year )
for y in year:
data = gen_list()
df[y] = data
df['day'] = df.index
df = df.melt("day")
df = df.dropna(subset=["value"])
I can use the following Altair code to produce the initial plot, but it's not pretty:
alt.Chart(df).mark_line().encode(
x='day:N',
color="variable:N",
y='value:Q',
tooltip=["variable:N", "value"]
)
But when I've tried this code to create something interactive, it fails:
highlight = alt.selection(type='single', on='mouseover',
fields='variable', nearest=True, empty="none")
alt.Chart(plottable).mark_line().encode(
x='day:N',
color="variable:N",
y=alt.condition(highlight, 'value:Q', alt.value("lightgray")),
tooltip=["variable:N", "value"]
).add_selection(
highlight
)
It fails with the error:
TypeError: sequence item 1: expected str instance, int found
Can someone help me out?
Also, is it possible to make the legend interactive? So a hover over a year highlights a line?
Two issues:
In alt.condition, you need to provide a list of fields rather than a single field
The y encoding does not accept a condition. I suspect you meant to put the condition on color.
With these two fixes, your chart works:
highlight = alt.selection(type='single', on='mouseover',
fields=['variable'], nearest=True, empty="none")
alt.Chart(df).mark_line().encode(
x='day:N',
y='value:Q',
color=alt.condition(highlight, 'variable:N', alt.value("lightgray")),
tooltip=["variable:N", "value"]
).add_selection(
highlight
)
Because the selection doesn't change z-order, you'll find that the highlighted line is often hidden behind other gray lines. If you want it to pop out in front, you could use an approach similar to the one in https://stackoverflow.com/a/55796860/2937831
I would like to create a multi-line plot similar to the one above
without a legend
without hovering or mouseover.
Would simply like to pass a highlighted_value and have a single line be highlighted.
I have modified the code because I am not terribly familiar with the proper use of "selection" and recognize that it is somewhat kludgy to get the result that I want.
Is there a cleaner way to do this?
highlight = alt.selection(type='single', on='mouseover',
fields=['variable'], nearest=True, empty="none")
background = alt.Chart(df[df['variable'] != 1995]).mark_line().encode(
x='day:N',
y='value:Q',
color=alt.condition( highlight, 'variable:N', alt.value("lightgray")),
tooltip=["variable:N", "value"],
).add_selection(
highlight
)
foreground = alt.Chart(df[df['variable'] == 1995]).mark_line(color= "blue").encode(
x='day:N',
y='value:Q',
color=alt.Color('variable',legend=None)
)
foreground + background
I am new to plotly and working on a script to generate a graph based on some results pulled from a database. However when I send the data over to plotly, only the first data point for each of the three traces is being graphed. I've verified that the lists contain the right data, I've even simply pasted the lists in instead of dynamically creating the variables. Unfortunately each time only the first data point is being graphed. Does anyone know what I am missing here? I am also open to another library if needed.
Is it also possible to have the x axis show as a string?
import plotly.plotly as py
import plotly.graph_objs as go
# Custom database class, works fine.
from classes.database import DatabaseConnection
# Database Connections and instances
db_instance = DatabaseConnection()
db_conn = db_instance.conn
db_cur = db_instance.cur
def main():
# Get a list of versions and their stats.
db_cur.execute(
"""
select row_to_json(x) from
(SELECT
versions.version_number,
cast(AVG(results.average) as double precision) as average,
cast(AVG(results.minimum) as double precision) as minimum,
cast(AVG(results.maximum) as double precision) as maximum
FROM versions,results
WHERE
versions.version_number = results.version_number
GROUP BY
versions.version_number) x;
"""
)
versions = []
average = []
minimum = []
maximum = []
unclean = db_cur.fetchall()
# Create lists for x and y coordinates.
for row in unclean:
versions.append(row[0]['version_number'])
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))
grph_average = go.Scatter(
x=versions,
y=average,
name = 'Average',
mode='lines',
)
grph_minimum = go.Scatter(
x=versions,
y=minimum,
name = 'Minimum',
mode='lines',
)
grph_maximum = go.Scatter(
x=versions,
y=maximum,
name = 'Maximum',
mode='lines',
)
data = go.Data([grph_average, grph_minimum, grph_maximum])
# Edit the layout
layout = dict(title = 'Responses',
xaxis = dict(title = 'Versions'),
yaxis = dict(title = 'Ms'),
)
fig = dict(data=data, layout=layout)
py.plot(fig, filename='response-times', auto_open=False)
if __name__ == '__main__':
main()
The data that query returns is as follows, if you want to plug in the values :
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
Here is some matplotlib that might get you started on this:
import matplotlib.pyplot as plt
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
plt.plot(minimum)
plt.plot(average)
plt.plot(maximum)
plt.xticks(range(len(versions)), versions)
It looks like it was an issue with my x axis. By adding some text before the version number and specifically type casting to a string I was able to get the graphs to generate properly.
# Create lists for x and y coordinates.
for row in unclean:
versions.append("Version: " + str(row[0]['version_number']))
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))