How can i make this matplotlib "word cloud" graph better looking? - python

For the sake of learning, I'm working on this word cloud program, that counts the number of times a word appears in a text, and prints it out in a kind of "word-cloud" image.
The program works fine, but I would like to address a couple of esthetical things like:
How do I remove the numbers on the x-axis and the y-axis?
Is it possible to remove the axes completely?
Sometimes a word positions itself over another. Does anyone know how to make the word not print over each other and make the words position themselves nicely next to each other instead?
The out print is
... and...
I would like it to look something like this (or as least as close to it as possible)
The code in question is
filename = "adventure.txt"
infile = open(filename)
wordcounts = {}
for line in infile:
words = line.split()
for word in words:
w = "".join([e for e in word if e.isalpha()])
w = w.lower()
if w in wordcounts:
wordcounts[w] = wordcounts[w] + 1
else:
wordcounts[w] = 1
#Put all words in list and sort counts
words = list(wordcounts.keys())
words.sort(key=lambda x:wordcounts[x], reverse=True)
import matplotlib.pyplot as plt
import numpy as np
#Set maximum fornt size to 50
scale = 50/wordcounts[words[1]]
#Set up empty plot with limits on x-axis and y-axis
plt.axes(xlim=(0,100), ylim=(0,100) )
#Plot 50 most frequent words with size=frequency
N = min(len(words), 50)
for i in range(0,N):
x = np.random.uniform(0,90)
y = np.random.uniform(0,90)
freq = wordcounts[words[i]]
col =["r", "g", "b", "m", "c", "k"][i % 5]
plt.text(x, y, words[i], fontsize=scale * freq, color=col)
plt.show()
All help is welcomed and appreciated.

Define the figure object without axes' ticks and labels:
fig = plt.figure(figsize = (10, 10), num = 1, clear = True)
ax = plt.subplot(1, 1, 1, xticks = [], yticks = [], frameon = False)
Remove this line:
plt.axes(xlim=(0,100), ylim=(0,100) )
Concluding lines:
for i in range(0,N):
x = np.random.uniform(0,90)
y = np.random.uniform(0,90)
freq = wordcounts[words[i]]
col =["r", "g", "b", "m", "c", "k"][i % 5]
ax.text(x, y, words[i], fontsize=scale * freq, color=col)
plt.show()
To make your plot look similar to the example you provided... it'll take a lot of manual plug&chug, trial&error, whatever you want to call it; you'll have to plug in coordinates for each word and decide where you think each word looks best in terms of x-y coordinates--one tip being that the largest word should be plotted last (i.e. when i == N - 1), while the smallest text should be plotted first (i.e. when i == 0); in that way the larger text won't have smaller text overlaying it. You could also focus on having non-overlapping coordinates with enough distance so that the words aren't too close to one another--alternatively, if you want the words to be touching, you could scale the degree to which they're overlapping one another. Have a colormap that randomizes the RGB list so that the colors are more "scattered" (rather than having all of the yellow text in the top right corner for example). Maybe center the larger text and have the smaller text more towards the periphery. The list goes on, but I think you get the idea.

Related

Plotly.py: fill to zero, different color for positive/negative

With Plotly, I can easily plot a single lines and fill the area between the line and y == 0:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(
x=[1, 2, 3, 4],
y=[-2, -1.5, 1, 2.5],
fill='tozeroy',
mode='lines',
))
fig.show()
How can I split the filled area in two? In particular, filling with red where y < 0 and with green where y > 0.
I would like to maintain the line as continuous. That means, I am not interested in just drawing two separate filled polygons.
Note that the line does not necessarily have values at y == 0.
I was in need of such a graph so I wrote a function.
def resid_fig(resid, tickvalues):
"""
resid: The y-axis points to be plotted. x-axis is assumed to be linearly increasing
tickvalues: The values you want to be displayed as ticklabels on x-axis. Works for hoverlabels too. Has to be
same length as `resid`. (This is necessary to ignore 'gaps' in graph where two polygons meet.)
"""
#Adjusting array with paddings to connect polygons at zero line on x-axis
index_array = []
start_digit = 0
split_array = np.split(resid,np.where(np.abs(np.diff(np.sign(resid)))==2)[0]+1)
split_array = [np.append(x,0) for x in split_array]
split_array = [np.insert(x,0,0) for x in split_array]
split_array[0] = np.delete(split_array[0],0)
split_array[-1] = np.delete(split_array[-1],-1)
for x in split_array:
index_array.append(np.arange(start_digit,start_digit+len(x)))
start_digit += len(x)-1
#Making an array for ticklabels
flat = []
for x in index_array:
for y in x:
flat.append(y)
flat_counter = Counter(flat)
none_indices = np.where([(flat_counter[x]>1) for x in flat_counter])[0]
custom_tickdata = []
neg_padding = 0
start_pos = 0
for y in range(len(flat)):
for x in range(start_pos,flat[-1]+1):
if x in none_indices:
custom_tickdata.append('')
break
custom_tickdata.append(tickvalues[x-neg_padding])
neg_padding +=1
start_pos = 1+x
#Making an array for hoverlabels
custom_hoverdata=[]
sublist = []
for x in custom_tickdata:
if x == '':
custom_hoverdata.append(sublist)
sublist = []
sublist.append(x)
continue
sublist.append(x)
sublist2 = sublist.copy()
custom_hoverdata.append(sublist2)
#Creating figure
fig = go.Figure()
idx = 0
for x,y in zip(split_array,index_array):
color = 'rgba(219,43,57,0.8)' if x[1]<0 else 'rgba(47,191,113,0.8)'
if (idx==0 and x[0] < 0):
color= 'rgba(219,43,57,0.8)'
fig.add_scatter(y=x, x=y, fill='tozeroy', fillcolor=color, line_color=color, customdata=custom_hoverdata[idx],
hovertemplate='%{customdata}<extra></extra>',legendgroup='mytrace',
showlegend=False if idx>0 else True)
idx += 1
fig.update_layout()
fig.update_xaxes(tickformat='', hoverformat='',tickmode = 'array',
tickvals = np.arange(index_array[-1][-1]+1),
ticktext = custom_tickdata)
fig.update_traces(mode='lines')
fig.show()
Example-
resid_fig([-2,-5,7,11,3,2,-1,1,-1,1], [1,2,3,4,5,6,7,8,9,10])
Now, for the caveats-
It does use separate polygons but I have combined all the traces into a single legendgroup so clicking on legend turns all of them on or off. About the legend colour, one way is to change 0 to 1 in showlegend=False if idx>0 in the fig.add_scatter() call. It then shows two legends, red and green still in the same legendgroup though so they still turn on and off together.
The function works by first separating continuous positive and negative values into arrays, adding 0 to the end and start of each array so the polygons can meet at the x-axis. This means the figure does not scale as well but depending on the use-case, it might not matter as much. This does not affect the hoverlabels or ticklabels as they are blank on these converging points.
The most important one, the graph does not work as intended when any of the point in the passed array is 0. I'm sure it can be modified to work for it but I have no use for it and the question doesn't ask for it either.

matplotlib widget updates the wrong data

I'm making a plot to compare band structure calculations from two different methods. This means plotting multiple lines for each set of data. I want to have a set of widgets that controls each set of data separately. The code below works if I only plot one set of data, but I can't get the widgets to work properly for two sets of data.
#!/usr/bin/env python3
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, TextBox
#cols = ['blue', 'red', 'green', 'purple']
cols = ['#3f54bf','#c14142','#59bf3f','#b83fbf']
finam = ['wan_band.dat','wan_band.pwx.dat']
#finam = ['wan_band.dat'] # this works
lbot = len(finam)*0.09 + 0.06
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=lbot)
ax.margins(x=0) # lines go to the edge of the horizontal axes
def setlines(lines, txbx1, txbx2):
''' turn lines on/off based on text box values '''
try:
mn = int(txbx1) - 1
mx = int(txbx2) - 1
for ib in range(len(lines)):
if (ib<mn) or (ib>mx):
lines[ib].set_visible(False)
else :
lines[ib].set_visible(True)
plt.draw()
except ValueError as err:
print('Invalid range')
#end def setlines(cnt, lines, txbx1, txbx2):
def alphalines(lines, valin):
''' set lines' opacity '''
maxval = int('ff',16)
maxval = hex(int(valin*maxval))[2:]
for ib in range(bcnt):
lines[ib].set_color(cols[cnt]+maxval)
plt.draw()
#end def alphalines(lines, valtxt):
lines = [0]*len(finam) # 2d list to hold Line2Ds
txbox1 = [0]*len(finam) # list of Lo Band TextBoxes
txbox2 = [0]*len(finam) # lsit of Hi Band TextBoxes
alslid = [0]*len(finam) # list of Line Opacity Sliders
for cnt, fnam in enumerate(finam):
ptcnt = 0 # point count
fid = open(fnam, 'r')
fiit = iter(fid)
for line in fiit:
if line.strip() == '' :
break
ptcnt += 1
fid.close()
bandat_raw = np.loadtxt(fnam)
bcnt = int(np.round((bandat_raw.shape[0] / (ptcnt))))
print(ptcnt)
print(bcnt)
# get views of the raw data that are easier to work with
kbandat = bandat_raw[:ptcnt,0] # k point length along path
ebandat = bandat_raw.reshape((bcnt,ptcnt,2))[:,:,1] # band energy # k-points
lines[cnt] = [0]*bcnt # point this list element to another list
for ib in range(bcnt):
#l, = plt.plot(kbandat, ebandat[ib], c=cols[cnt],lw=1.0)
l, = ax.plot(kbandat, ebandat[ib], c=cols[cnt],lw=1.0)
lines[cnt][ib] = l
y0 = 0.03 + 0.07*cnt
bxht = 0.035
axbox1 = plt.axes([0.03, y0, 0.08, bxht]) # x0, y0, width, height
axbox2 = plt.axes([0.13, y0, 0.08, bxht])
txbox1[cnt] = TextBox(axbox1, '', initial=str(1))
txbox2[cnt] = TextBox(axbox2, '', initial=str(bcnt))
txbox1[cnt].on_submit( lambda x: setlines(lines[cnt], x, txbox2[cnt].text) )
txbox2[cnt].on_submit( lambda x: setlines(lines[cnt], txbox1[cnt].text, x) )
axalpha = plt.axes([0.25, y0, 0.65, bxht])
alslid[cnt] = Slider(axalpha, '', 0.1, 1.0, valinit=1.0)
salpha = alslid[cnt]
alslid[cnt].on_changed( lambda x: alphalines(lines[cnt], x) )
#end for cnt, fnam in enumerate(finam):
plt.text(0.01, 1.2, 'Lo Band', transform=axbox1.transAxes)
plt.text(0.01, 1.2, 'Hi Band', transform=axbox2.transAxes)
plt.text(0.01, 1.2, 'Line Opacity', transform=axalpha.transAxes)
plt.show()
All the widgets only control the last data set plotted instead of the individual data sets I tried to associate with each widget. Here is a sample output:
Here the bottom slider should be changing the blue lines' opacity, but instead it changes the red lines' opacity. Originally the variables txbox1, txbox2, and alslid were not lists. I changed them to lists though to ensure they weren't garbage collected but it didn't change anything.
Here is the test data set1 and set2 I've been using. They should be saved as files 'wan_band.dat' and 'wan_band.pwx.dat' as per the hard coded list finam in the code.
I figured it out, using a lambda to partially execute some functions with an iterator value meant they were always being evaluated with the last value of the iterator. Switching to functools.partial fixed the issue.

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift.
The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure.
import numpy as np
import matplotlib.pyplot as py
import csv
z = open("Sheet4CSV.csv","rU")
data = csv.reader(z)
x = []
y = []
ylow = []
yupp = []
xlow = []
xupp = []
redshift = []
for r in data:
x.append(float(r[2]))
y.append(float(r[5]))
xlow.append(float(r[3]))
xupp.append(float(r[4]))
ylow.append(float(r[6]))
yupp.append(float(r[7]))
redshift.append(float(r[1]))
from operator import sub
xerr_l = map(sub,x,xlow)
xerr_u = map(sub,xupp,x)
yerr_l = map(sub,y,ylow)
yerr_u = map(sub,yupp,y)
py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$")
py.ylabel("$Iterative\ Tx\ pipeline\ keV$")
py.xlim(0,12)
py.ylim(0,12)
py.title("Redmapper Clusters comparison of Tx pipelines")
ax1 = py.subplot(111)
##Problem starts here after the previous line##
for p in redshift:
for i in xrange(84):
p=redshift[i]
if 0.1<=p<0.2:
ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$")
exit
if 0.2<=p<0.25:
ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$")
exit
if 0.25<=p<=0.3:
ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$")
exit
##There seems nothing wrong after this point##
py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars")
cof = np.polyfit(x,y,1)
p = np.poly1d(cof)
l = np.linspace(0,12,100)
py.plot(l,p(l),"black",label="Best fit")
py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$")
py.grid()
box = ax1.get_position()
ax1.set_position([box.x1,box.y1,box.width, box.height])
py.legend(loc='center left',bbox_to_anchor=(1,0.5))
py.show()
In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!!
I essentially need 1 label for each bin...
Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-)
Sorry I cannot post my image here due to low reputation!
The data 'appended' for x, y and redshift lists from the csv file are as follows:
x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]
y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]
redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy.
The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below.
It's also not clear why you call exit after each plotting action.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547])
y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677])
redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19])
bin3 = 0.25 <= redshift
bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25)
bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2)
plt.ion()
labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$")
colors = ('r', 'g', 'b')
for bin, label, co in zip( (bin1, bin2, bin3), labels, colors):
plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label)
plt.legend()
plt.show()

how to visualize high volumn 3 dimensional data

I have a data set like the following:
import numpy as np
from pandas import DataFrame
mypos = np.random.randint(10, size=(100, 2))
mydata = DataFrame(mypos, columns=['x', 'y'])
myres = np.random.rand(100, 1)
mydata['res'] = myres
The res variable is continous, the x and y variables are integers representing
positions (therefore largely repetitive),
and res represents kind of correlations between pairs of positions.
I am wondering what are the best ways of visualizing this data set?
Possible approaches already considered:
Scatter plot, with the res variable visualized by a color gradient.
Parallel coordinates plot.
The first approach is problematic when the number of positions get large,
because high values (which are the values we care about) of the res variable would be drowned in a sea of
small dots.
The second approach could be promising, but I am having trouble producing it.
I have tried the parallel_coordinates function from the pandas module,
but it's not behaving as I would like it to. (see this question here:
parallel coordinates plot for continous data in pandas
)
I hope this helps to find a solution in R. Good luck.
# you need this package for the colour palette
library(RColorBrewer)
# create the random data
dd <- data.frame(
x = round(runif(100, 0, 10), 0),
y = round(runif(100, 0, 10), 0),
res = runif(100)
)
# pick the number of colours (granularity of colour scale)
nColors <- 100
# create the colour pallete
cols <-colorRampPalette(colors=c("white","blue"))(nColors)
# get a zScale for the colours
zScale <- seq(min(dd$res), max(dd$res), length.out = nColors)
# function that returns the nearest colour given a value of res
findNearestColour <- function(x) {
colorIndex <- which(abs(zScale - x) == min(abs(zScale - x)))
return(cols[colorIndex])
}
# the first plot is the scatterplot
### this has problems because points come out on top of eachother
plot(y ~ x, dd, type = "n")
for(i in 1:dim(dd)[1]){
with(dd[i,],
points(y ~ x, col = findNearestColour(res), pch = 19)
)
}
# this is your parallel coordinates plot (a little better)
plot(1, 1, xlim = c(0, 1), ylim = c(min(dd$x, dd$y), max(dd$x, dd$y)),
type = "n", axes = F, ylab = "", xlab = "")
for(i in 1:dim(dd)[1]){
with(dd[i,],
segments(0, x, 1, y, col = findNearestColour(res))
)
}

understanding default usage of `pylab.legend`

I am using pylab to produce this image:
where the legend is not what I wanted. The dots represent actual data points, the lines are made with polyfit. I would like the legend to contain either ten entries with the lines and dots merged together for each color or just the ten dot-lines.
The associated piece of code:
for i in range(start, start + size*chunks):
colorVal = scalarMap.to_rgba(values[i])
slc1, slc2 = start + i*size, start + (i+1)*size
mylegend.append(" = ".join([self.dtypes[v1],
"%.2f" %data[v1, slc1]]))
jx = data[x, slc1:slc2]
jy = data[y, slc1:slc2]
p = np.polyfit(jx, jy, deg = 2)
lx = np.linspace(jx[0], jx[-1], 1000)
ly = p[0]*lx**2 + p[1]*lx + p[2]
pl.plot(jx, jy, "o", color = colorVal)
pl.plot(lx, ly, color = colorVal)
pl.xlabel(self.dtypes[x])
pl.ylabel(self.dtypes[y])
pl.title(title)
pl.axis(axis)
pl.legend(my_legend, loc = "upper left", shadow = True)
pl.grid("on")
pl.show()
I realize what the mistake is: I add ten points to the my_legend list, and the legend function of pylab is then reading from it until the list ends. Therefore, only half of them make it. However, I don't know how to fix it. Is there a way I can make the legend function only register one entry for each iteration of the loop?
Also, I would like the points listed in reverse order. I tried
pl.legend(my_legend[::-1])
but that didn't work.
Any ideas to these two issues?
The behavior of pylab.legend is appropriated, once you understand how does it work. When you call pylab.legend(my_legend, ...), the list of strings of the labels is associated to the first 10 lines drawn. The way you do, the first 10 lines are the one added in the first 5 loops.
To show just the dots you can do this:
for i in range(start, start + size*chunks):
[...]
label = " = ".join([self.dtypes[v1], "%.2f" %data[v1, slc1]])
[...]
pl.plot(jx, jy, "o", color = colorVal, label=label)
pl.plot(lx, ly, color = colorVal)
[...]
pl.legend(loc = "upper left", shadow = True)
If you want the legend for the lines, you just put the label=label into the other plot command.
An alternative approach is to create a mylines list (similar to mylegend) and to append just one of the plot command and then call the pl.legend(mylines, mylegend, ...)

Categories