Why matplotlib draws me the new graphic superimposing the old one? - python

I'm working on django project and using the matplotlib library. Theoretically I have created a filter where you can choose the day and and "node" that you want to graph and with this information a pythonscript is executed that together with pandas and matplotlib creates a graph.
The values ​​of "node" and "day" arrive correctly to the script, and this generates the graphic well. But the only thing wrong is that instead of overwriting the old image (with the previous graphic), draw the new lines on it. Next I show an image of how it looks.
As you can see, each line is equivalent to a different day, because it has been overlapping the different tests I have done. Can anyone tell me where I fail?
Below I attach code
def bateria2(node, day):
csv_path = os.path.join(os.path.dirname(__file__), '..\\data\\csv\\dataframe.csv')
df = pd.read_csv(csv_path)
mes, anyo = 12, 2019
new_df = df[(df['Dia'] == day) & (df['Mes'] == mes) & (df['Año'] == anyo) & (df['Node name'] == node)]
if len(new_df) > 0:
#os.remove('static\\img\\bateria2.png')
x = new_df['Hora[UTC]'].tolist()
y = new_df['Bateria'].tolist()
title = 'Carga/Descarga de la batería día '+str(day)+'/'+str(mes)+'/'+str(anyo)+' de '+str(node)
plt.title(title)
plt.xlabel('Hora [UTC]')
plt.ylabel('Batería')
#plt.legend((y)(node))
plt.plot(x,y)
plt.xticks(x, rotation='vertical')
plt.savefig('static\\img\\bateria2.png',transparent=True)
return 1
else:
return 0
Basically what I'm doing it is to access the .csv file that contains the info, filter according to the data that I want. And if the new dataframe generated has data, create the graph to finally save it.
Regards thank you very much.

Try to clear the current figure, plt.clf() after your savefig command. This should keep your plots from stacking up on top of each other.

Related

How to make subplots from multiple files? Python matplot lib

I'm a student researcher who's running simulations on exoplanets to determine if they might be viable for life. The software I'm using, outputs a file with several columns of various types of data. So far, I've written a python script that goes through one file and grabs two columns of data. In this case, time and global temperature of the planet.
What I want to do is:
Write a python script that goes through multiple files, and grabs the same two columns that my current script does.
Then, I want to create subplots of all these files
The things that will stay consistent across all of the files, is the fact that times doesn't change, the x axis will always be time (from 0 to 1 million years). The y axis values will changes across simulations though.
This is what I got so far for my code:
import math as m
import matplotlib.pylab as plt
import numpy as np
## Set datafile equal to the file I plan on using for data, and then open it
datafile = r"C:\Users\sasuk\OneDrive\Desktop\GJ 229 Planet Data\Gj 229 b - [SemiMajor 0.867][Ecc][Mass][Luminosity]\solarsys.Earth.forward"
file = open(datafile, "r")
# Create two empty arrays for my x and y axis of my graphs
years = [ ]
GlobalT = [ ]
# A for loop that looks in my file, and grabs only the 1st and 8th column adding them to the respective arrays
for line in file:
data = line.split(' ')
years.append(float(data[0]))
GlobalT.append(float(data[7]))
# Close the file
file.close()
# Plot my graph
fig = plt.matplotlib.pyplot.gcf()
plt.plot(years, GlobalT)
plt.title('Global Temperature of GJ 229 b over time')
fig.set_size_inches(10, 6, forward=True)
plt.figtext(0.5, 0.0002, "This shows the global temperature of GJ 229 b when it's semi-major axis is 0.929 au, \n"
" and it's actual mass relative to the sun (~8 Earth Masses)", wrap=True, horizontalalignment='center', fontsize=12)
plt.xlabel(" Years ")
plt.ylabel("Global Temp")
plt.show()
I think the simplest thing to do is to turn your code for one file into a function, and then call it in a loop that iterates over the files.
from pathlib import Path
def parse_datafile(pth):
"""Parses datafile"""
results = []
with pth.open('r') as f:
for line in f:
data = line.split(' ')
results.append({'f': pth.stem,
'y': data[0],
't': data[7]})
return results
basedir = Path(r'C:\Users\sasuk\OneDrive\Desktop\GJ 229 Planet Data\Gj 229 b - [SemiMajor 0.867][Ecc][Mass][Luminosity]')
# assuming you want to parse all files in directory
# if not, can change glob string for files you want
all_results = [parse_datafile(pth) for pth in basedir.glob('*')]
df = pd.DataFrame(all_results)
df['y'] = pd.to_numeric(df['y'], errors='coerce')
df['t'] = pd.to_numeric(df['t'], errors='coerce')
This will give you a dataframe with three columns - f (the filename), y (the year), and t (the temperature). You then have to convert y and t to numeric dtypes. This will be faster and handle errors more gracefully than your code, which will raise an error with any malformed data.
You can further manipulate this as needed to generate your plots. Definitely check if there are any NaN values and address them accordingly, either by dropping those rows or using fillna.

Filtering CSV data using python and storing different values in array

I am trying to filter CSV file where I need to store prices of different commodities that are > 1000 in different arrays, I can able to get only 1 commodity values perfectly but other commodity array just a duplicate of the 1st commodity.
CSV file looks like below figure:
CODE
import matplotlib.pyplot as plt
import csv
import pandas as pd
import numpy as np
# csv file name
filename = "CommodityPrice.csv"
# List gold price above 1000
gold_price_above_1000 = []
palladiun_price_above_1000 = []
gold_futr_price_above_1000 = []
cocoa_future_price_above_1000 = []
df = pd.read_csv(filename)
commodity = df["Commodity"]
price = df['Price']
for gold_price in price:
if (gold_price <= 1000):
break
else:
for gold in commodity:
if ('Gold' == gold):
gold_price_above_1000.append(gold_price)
break
for palladiun_price in price:
if (palladiun_price <= 1000):
break
else:
for palladiun in commodity:
if ('Palladiun' == palladiun):
palladiun_price_above_1000.append(palladiun_price)
break
for gold_futr_price in price:
if (gold_futr_price <= 1000):
break
else:
for gold_futr in commodity:
if ('Gold Futr' == gold_futr):
gold_futr_price_above_1000.append(gold_futr_price)
break
for cocoa_future_price in price:
if (cocoa_future_price <= 1000):
break
else:
for cocoa_future in commodity:
if ('Cocoa Future' == cocoa_future):
cocoa_future_price_above_1000.append(cocoa_future_price)
break
print(gold_price_above_1000)
print(palladiun_price_above_1000)
print(gold_futr_price_above_1000)
print(cocoa_future_price_above_1000)
plt.ylim(1000, 3000)
plt.plot(gold_price_above_1000)
plt.plot(palladiun_price_above_1000)
plt.plot(gold_futr_price_above_1000)
plt.plot(cocoa_future_price_above_1000)
plt.title('Commodity Price(>=1000)')
y = np.array(gold_price_above_1000)
plt.ylabel("Price")
plt.show()
print("SUCCESS")
Here is my question in detail,
Please use pandas and matplotlib to sort out the data in the csv and output and store the sorted data into the process chart. The output results are shown in the following figures.
Figure 1 The upper picture is to take all the products with Price> = 1000 in csv, mark all their prices in April and May and draw them into a linear graph. When outputting, the year in the date needs to be removed. The label name is marked and displayed. The title names of the chart, x-axis, and y- axis need to be marked. The range of the y-axis falls within 1000 ~ 3000, and the color of the line is not specified.
Figure 1 The picture below is from all the products with Price> = 1000 in csv. Mark their Change% in April and May and draw them into a dotted line graph. The dots need to be in a dot style other than '.' And 'o'. To mark, please mark the line with a line other than a solid line. When outputting, you need to remove the year from the date. You need to mark and display the label name of each line. The title names of the chart, x-axis, and y-axis must be marked. You need to add a grid line, the y-axis range falls from -15 to +15, and the color of the line is not specified.
The upper and lower two pictures in Figure 2 are changed to 1000> Price> = 500. The other conditions are basically the same as in Figure 1, except that the points and lines of the dot and line diagrams below Figure 2 need to use different styles from Figure 1.
The first and second pictures in Figure 1 must be displayed in the same window, as is the picture in Figure 2.
All of your blocks of code are doing the exact same thing. Changing the same of the iterator doesn't change what it does.
for gold_price in price:
for palladiun_price in price:
for gold_futr_price in price:
for cocoa_future_price in price:
This is going through the exact same data. You haven't subsetted for specific commodities.
Using the break statement in that loop doesn't make sense either. It should be a pass.
Basically for every number above 1000, you iterate through your entire Commodities column and add number to the list for every time you see a specific commodity.
Read how to index and select data in pandas.
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html
gold_price_above_1000 = df[(df.Commodity=='Gold') & (df.Price>1000)]

Separate TAP and HOVER tool for Edges of hv.Graph. Edge description data is missing

Trying to get hv graph with ability to tap edges separately from nodes. In my case - all meaningful data bound to edges.
gNodes = hv.Nodes((nodes_data.x,nodes_data.y, nodes_data.nid, nodes_data.name),\
vdims=['name'])
gGraph = hv.Graph(((edges_data.source, edges_data.target, edges_data.name),gNodes),vdims=['name'])
opts = dict(width=1200,height=800,xaxis=None,yaxis=None,bgcolor='black',show_grid=True)
gEdges = gGraph.edgepaths
tiles = gv.tile_sources.Wikipedia()
(tiles * gGraph.edgepaths * gGraph.nodes.opts(size=12)).opts(**opts)
If I use gGraph.edgepaths * gGraph.nodes - where is no edge information displayed with Hover tool.
Inspection policy 'edges' for hv.Graph is not suitable for my task, because no single edge selection available.
Where did edge label information in edgepaths property gone? How to add it?
Thank you!
I've created separate dataframe for each link, then i grouped it by unique link label, and insert empty row between each group (two rows for edge - source and target), like in this case: Pandas: Inserting an empty row after every 2nd row in a data frame
emty_row = pd.Series(np.NaN,edges_data.columns)
insert_f = lambda d: d.append(emty_row, ignore_index=True)
edges_df = edges_test.groupby(by='name', group_keys=False).apply(insert_f).reset_index(drop=True)
and create hv.EdgesPaths from df:
gPaths2= hv.EdgePaths(edges_df, kdims=['lon_conv_a','lat_conv_a'])
TAP and HOVER works fine for me.

Matplotlib Spyder3.2.6 Plotting a lot of information and creating space

I am just starting out using Spyder and doing some simple data analysis. I have some census data that I have filtered. The data is pretty large 32k entries. As you will see I have filtered the census data into age, and hours per week. But when I went to plot it, the information is really scrunched together. I have been searching the internet trying to find a way to separate the values, but I am just coming up short. Any help would be great! Thanks
Picture of Plot
Data information
df = pd.read_csv('adult.data.csv', header=None, delimiter=',')
native_country = np.array(df[13])
united_states = native_country[0]
native_country_us = df.loc[(df[13] == united_states)]
native_country_us_hours_per_week = np.array(native_country_us[12])
native_country_us_age = np.array(native_country_us[0])
plt.plot(native_country_us_hours_per_week, native_country_us_age, "go")
plt.xlabel('Hours per week')
plt.ylabel('Age of US Citizen')
plt.title('Hours Per Week US Citizen Works')
plt.show()
I don't see you plugging the Y values into the plot.
try:
plt.plot(native_country_us_hours_per_week, native_country_us_age , "go")

Python: Creating Excel worksheets with charts

Is there any module for creating Excel charts with embedded charts in Python? The modules mentioned in this question don't seem to have that capability.
I prefer a generic module that would work under Ubuntu, not a Windows-dependent one.
EDIT: I will also appreciate ways to embed images within the created charts, as I can create the charts in an external program and place them within the right sheet.
Thanks,
Adam
I recently found xlsxwriter. It's the most capable xlsx python module I've found and works with charts and graphs. It also doesn't require any non standard python modules and works on any type of box. No need for windows or to have charting software installed.
On Windows, you'd need to use pywin32 and COM. On a *x box, you may find that a combination of Iron Python, Mono, and an Excel-manipulation library written for .NET may do the job. In either case, good luck.
It's a little bit convoluted (and/or evil), but something like this will work cross-platform (including under Linux) using JPype to wrap the SmartXLS Excel Java library.
This example uses the simple chart creation (in Charts/ChartSample.class) example from SmartXLS.
#!/usr/bin/env python
import os
import os.path
import jpype
# or wherever your java is installed
os.environ['JAVA_HOME'] = "/usr/lib64/jvm/default-java"
root = os.path.abspath(os.path.dirname(__file__))
SX_JAR = os.path.join(root, 'SX.jar')
options = [
'-Djava.class.path=%s' % SX_JAR
]
jpype.startJVM(jpype.getDefaultJVMPath(), *options)
WorkBook = jpype.JClass('com.smartxls.WorkBook')
ChartShape = jpype.JClass('com.smartxls.ChartShape')
ChartFormat = jpype.JClass('com.smartxls.ChartFormat')
Color = jpype.JClass('java.awt.Color')
workbook = WorkBook()
workbook.setText(0,1,"Jan")
workbook.setText(0,2,"Feb")
workbook.setText(0,3,"Mar")
workbook.setText(0,4,"Apr")
workbook.setText(0,5,"Jun")
workbook.setText(1,0,"Comfrey")
workbook.setText(2,0,"Bananas")
workbook.setText(3,0,"Papaya")
workbook.setText(4,0,"Mango")
workbook.setText(5,0,"Lilikoi")
for col in range(1, 5 + 1):
for row in range(1, 5 + 1):
workbook.setFormula(row, col, "RAND()")
workbook.setText(6, 0, "Total")
workbook.setFormula(6, 1, "SUM(B2:B6)")
workbook.setSelection("B7:F7")
# auto fill the range with the first cell's formula or data
workbook.editCopyRight()
left = 1.0
top = 7.0
right = 13.0
bottom = 31.0
# create chart with it's location
chart = workbook.addChart(left,top,right,bottom)
chart.setChartType(ChartShape.Column)
# link data source, link each series to columns(true to rows).
chart.setLinkRange("Sheet1!$a$1:$F$6", False)
# set axis title
chart.setAxisTitle(ChartShape.XAxis, 0, "X-axis data")
chart.setAxisTitle(ChartShape.YAxis, 0, "Y-axis data")
# set series name
chart.setSeriesName(0, "My Series number 1")
chart.setSeriesName(1, "My Series number 2")
chart.setSeriesName(2, "My Series number 3")
chart.setSeriesName(3, "My Series number 4")
chart.setSeriesName(4, "My Series number 5")
chart.setTitle("My Chart")
# set plot area's color to darkgray
chartFormat = chart.getPlotFormat()
chartFormat.setSolid()
chartFormat.setForeColor(Color.DARK_GRAY.getRGB())
chart.setPlotFormat(chartFormat)
# set series 0's color to blue
seriesformat = chart.getSeriesFormat(0)
seriesformat.setSolid()
seriesformat.setForeColor(Color.BLUE.getRGB())
chart.setSeriesFormat(0, seriesformat)
# set series 1's color to red
seriesformat = chart.getSeriesFormat(1)
seriesformat.setSolid()
seriesformat.setForeColor(Color.RED.getRGB())
chart.setSeriesFormat(1, seriesformat)
# set chart title's font property
titleformat = chart.getTitleFormat()
titleformat.setFontSize(14*20)
titleformat.setFontUnderline(True)
chart.setTitleFormat(titleformat)
workbook.write("./Chart.xls")
jpype.shutdownJVM()

Categories