import pandas as pd
import quandl as qndl
import datetime as dt
import matplotlib.pyplot as plt
import QuandlAPIKey (My QUANDL API KEY, IGNORE THIS.)
data = qndl.get_table('AUSBS/D')
dataframe = pd.DataFrame(data)
sorteddataframe = dataframe.sort_values(by='date')
dfdate = sorteddataframe[['date']]
dfvalue = sorteddataframe[['value']]
dfx = dfdate
dfy = dfvalue
values_to_read = 100
print(sorteddataframe.head(values_to_read))
plt.plot(dfx.head(values_to_read),dfy.head(values_to_read))
plt.xlabel("Years")
plt.ylabel("Stock Values (Scaled down by 10%)")
plt.show()
I have checked the dataframes (dfx, dfy, sorteddataframe), all of them are properly sorted but the graph being generated is just a simple vertical line. Pic is posted.
Related
I have this part of the code done, but I would like to be able to add more columns like the Volume, Open, High.
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
assets = ["LAC", "NIO"]
weights = np.array([0.5, 0.5])
stockStartDate = "2016-01-01"
today = datetime.today().strftime("%Y-%m-%d")
today
df = pd.DataFrame()
for stock in assets:
df[stock] = web.DataReader(stock, data_source ="yahoo", start = stockStartDate, end = today)["Adj Close"]
Not very sure you want to do, but in essence if you want to be flexible for visualizing more variables, it's easier to keep the data in a long format, with a column (i used stock below) indicating the data source :
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import datetime
assets = ["LAC", "NIO"]
stockStartDate = "2016-01-01"
today = datetime.date.today().strftime("%Y-%m-%d")
df = []
for stock in assets:
x = web.DataReader(stock, data_source ="yahoo",
start = stockStartDate, end = today)
x['stock'] = stock
df.append(x)
df = pd.concat(df)
Then pivot it on the fly to plot:
df.pivot(values='High',columns='stock').plot.line()
Or use seaborn:
import seaborn as sns
sns.lineplot(x = df.index,y = "High",hue = 'stock',data=df)
I have categorized data. At specific dates I have data (A to E) that is counted every 15 minutes.
When I want to plot with seaborn I get this:
Bigger bubbles cover smaller ones and the entire thing is not easy readable (e.g. 2020-05-12 at 21:15). Is it possible to display the bubbles for each 15-minute-class next to each other with a little bit of overlap?
My code:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
df = pd.read_csv("test_df.csv")
#print(df)
sns.set_theme()
sns.scatterplot(
data = df,
x = "date",
y = "time",
hue = "category",
size = "amount",sizes=(15, 200)
)
plt.gca().invert_yaxis()
plt.show()
My CSV file:
date,time,amount,category
2020-05-12,21:15,13,A
2020-05-12,21:15,2,B
2020-05-12,21:15,5,C
2020-05-12,21:15,1,D
2020-05-12,21:30,4,A
2020-05-12,21:30,2,C
2020-05-12,21:30,1,D
2020-05-12,21:45,3,B
2020-05-12,22:15,4,A
2020-05-12,22:15,2,D
2020-05-12,22:15,9,E
2020-05-12,00:15,21,D
2020-05-12,00:30,11,E
2020-05-12,04:15,7,A
2020-05-12,04:30,1,B
2020-05-12,04:30,2,C
2020-05-12,04:45,1,A
2020-05-14,21:15,1,A
2020-05-14,21:15,5,C
2020-05-14,21:15,3,D
2020-05-14,21:30,4,A
2020-05-14,21:30,1,D
2020-05-14,21:45,5,B
2020-05-14,22:15,4,A
2020-05-14,22:15,11,E
2020-05-14,00:15,2,D
2020-05-14,00:30,11,E
2020-05-14,04:15,9,A
2020-05-14,04:30,11,B
2020-05-14,04:30,5,C
2020-05-14,05:00,7,A
You can use a seaborn swarmplot for this. You first have to separate the "amount" column into separate entries, using .reindex and .repeat. Then you can plot.
Here is the code:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
df = pd.read_csv("test.csv")
df = df.reindex(df.index.repeat(df.amount))
sns.swarmplot(data = df, x = "date", y = "time", hue = "category")
plt.gca().invert_yaxis()
plt.show()
Here is the output:
I am trying to make a simple bar graph out of a 2 column CSV file. One column is the x axis names, the other column is the actual data which will be used for the bars. The CSV looks like this:
count,team
21,group1
15,group2
63,group3
22,group4
42,group5
72,group6
21,group7
23,group8
24,group9
31,group10
32,group11
I am using this code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.read_csv("sampleData.csv",sep=",").set_index('count')
d = dict(zip(df.index,df.values.tolist()))
df.plot.bar(x = 'count', y = 'team')
print(d)
However, I get an error
KeyError: 'count' from this line :
df.plot.bar(x = 'count', y = 'team')
I don't understand how there is an error for something that exists.
When you set the count as index, you just have a single column left in your DataFrame, i.e., team. Don't set the count as index and switch the order of x and y values for plotting the bar chart
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.read_csv("sampleData.csv", sep=",")
df.plot.bar(x = 'team', y = 'count')
Matplotlib solution
plt.bar(df['team'], df['count'])
plt.xticks(rotation=45) # Just rotating for better visualizaton
import sys
import ConfigParser
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as DT
import bokeh
sys.path.extend(['..\..\myProj\SOURCE'])
fullfilepath = "../../myProj/SOURCE/" + 'myparts.txt'
ohg_df = pd.read_csv(fullfilepath, sep="\t" )
temp_df = temp_df[['as_on_date', 'ohg_qty']]
temp_df = temp_df.sort(['as_on_date'], ascending=[1])
temp_df.set_index('as_on_date')
plt.plot(temp_df.index, temp_df.ohg_qty)
plt.show()
This is my dataframe after importing.
I am trying to plot the line graph with x axis as date mentioned in the dataframe.
Can someone guide me... I am new to pandas.
dataframe picture
output pitcure
Easier:
# Set index directly
ohg_df = pd.read_csv(fullfilepath, sep="\t", index='as_on_date')
# Convert string index to dates
ohg_df.index = pd.to_datetime(ohg_df.index)
# Get a column and plot it (taking a column keeps the index)
plt.plot(ohg_df.ohg_qty)
Hi all so I'm trying to work with this set of data that has two columns, one is names and the other is the number of births for each name. What I want to do is import a csv file, perform some basic functions on it such as finding the baby name with the maximum number of births, and then plotting the data in a bar graph. But, when I have an index value for the dataframe, the bar graph prints that as the x axis instead of the names. So I removed the index and now I get all kinds of errors. Below is my code, first the one with the index and then the one without. Thanks in advance. This is really driving me crazy
import pandas as pd
import matplotlib.pyplot as plt
import pdb
import matplotlib as p
import os
from pandas import DataFrame
Location = os.path.join(os.path.sep,'Users', 'Mark\'s Computer','Desktop','projects','data','births1880.csv')
a = pd.read_csv(Location, index_col = False)
print(a) #print the dataframe just to see what I'm getting.
MaxValue = a['Births'].max()
MaxName = a['Names'][a['Births'] == MaxValue].values
print(MaxValue, ' ', MaxName)
a.plot(kind ='bar')
plt.show()
This code works but spits out a bar graph with the index as the x axis instead of the names?
import pandas as pd
import matplotlib.pyplot as plt
import pdb
import matplotlib as p
import os
from pandas import DataFrame
Location = os.path.join(os.path.sep,'Users', 'Mark\'s Computer','Desktop','projects','data','births1880.csv')
a = pd.read_csv(Location, index_col = True) #why is setting the index column to true removing it?
print(a) #print the dataframe just to see what I'm getting.
MaxValue = a['Births'].max()
MaxName = a['Names'][a['Births'] == MaxValue].values
print(MaxValue, ' ', MaxName)
a.plot(kind ='bar', x='Names', y = 'Births' )
plt.show()
edited for solution.
It would be nice if you'd provided a sample csv file, so I made one up, took me a while to figure out what format pandas expects.
I used a test.csv that looked like:
names,briths
mike,3
mark,4
Then my python code:
import pandas
import numpy
import matplotlib.pyplot as plt
a = pandas.read_csv('test.csv', index_col = False)
a.plot(kind='bar')
indices = numpy.arange(len(a['names']))
plt.xticks( indices+0.5, a['names'].values)
plt.show()