I am using Pandas to create 3 HTML tables out of 3 dataframes. The output I want is an HTML file. The code I'm currently using prints tables one under the other. I want to print one table on top, and then the other two tables side by side. What could I change in the code to achieve that?
import numpy as np
from numpy.random import randn
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(randn(5,4),columns='W X Y Z'.split())
df1 = pd.DataFrame(randn(5,4),columns='A B C D'.split())
df2 = pd.DataFrame(randn(5,4),columns='E F G K'.split())
with open("a.html", 'w') as _file:
_file.write(df.head().to_html() + "\n\n" + df1.head().to_html()+ "\n\n" + df2.head().to_html())
Here's my proposal based on your original code:
import numpy as np
from numpy.random import randn
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(randn(5,4),columns='W X Y Z'.split())
df1 = pd.DataFrame(randn(5,4),columns='A B C D'.split())
df2 = pd.DataFrame(randn(5,4),columns='E F G K'.split())
html = """
{table1}
<table>
<tr>
<td>{table2}</td>
<td>{table3}</td>
</tr>
</table>
""".format(
table1=df.head().to_html(),
table2=df1.head().to_html(),
table3=df2.head().to_html()
)
with open("a.html", 'w') as _file:
_file.write(html)
Related
I have the below arrays and I need to calculate req[i] and JJ[i]. I'm getting allow the values zeros expect req[2] and req[8] but with wrong values. why that's happening?
the second issue is with the second line of JJ calculation ( JJ[i]= 0.00633*(2np.pimath.sqrt(kz[i]*ky[i])dx[i])/(pvt['muo'](np.log(req/well['rw']) + PP['s_h']))), it says invalid syntax. what wrong there?
these are the arrays I'm using:
and this is the code:
import numpy as np
#%% Importing modules Loading .yml/.yaml file
import yaml
import numpy as np
import matplotlib.pyplot as plt
import math
import scipy.special as sc
import pandas as pd
# Productivity index:
# i=len(n)
# req=np.zeros((n))
# JJ=np.zeros((n))
def Prod_index(res, pvt,num,well, PP, w, well_drc,i):
for i in range(n):
if well_drc== 'v':
req[i]= 0.28*math.sqrt(math.sqrt(ky[i]/kx[i])*dx[i]**2 + math.sqrt(kx[i]/ky[i])*dy[i]**2)/((ky[i]/kx[i])**0.25 + (kx[i]/ky[i])**0.25)
JJ[i]= 0.00633*(2*np.pi*math.sqrt(kx[i]*kz[i])*res['h'])/(pvt['muo']*(np.log(req[i]/well['rw']))+ PP['s_v'])
elif well_drc=='h' :
req[i]=0.28*math.sqrt(math.sqrt(kx[i]*ky[i])*dz[i]**2 + math.sqrt(kz[i]/ky[i]*dy[i]**2)/((ky[i]/kz[i])**0.25 + (kz[i]/ky[i])**0.25)
JJ[i]= 0.00633*(2*np.pi*math.sqrt(kz[i]*ky[i])*dx[i])/(pvt['muo']*(np.log(req/well['rw']) + PP['s_h']))
return req, JJ
My code runs properly but it will not provide output as it should. I am not sure where the issue is occurring. Could someone help me correct it? Do you need the CSV too?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv("/content/drive/MyDrive/replicates/Replicate 3 Gilts just measures.csv")
df.info()
df.head()
# removing the irrelevant columns
cols_to_drop = ["animal"]
df = df.drop(columns=cols_to_drop,axis=1)
# first five rows of data frame after removing columns
df.head()
deep_df = df.copy(deep = True)
numerical_columns = [col for col in df.columns if (df[col].dtype=='int64' or
df[col].dtype=='float64')]
df[numerical_columns].describe().loc[['min','max', 'mean','50%'],:]
df[df['i1000.0'] == df['i1000.0'].min()]
This is where the issue occurs
i1000_bucket = df.groupby(pd.cut(df["i1000.0"],bins=[10,20,30,40,50,60,70,80,90,100]))
number_bucket = df.groupby(pd.cut(df["i1000.0"],bins=[10,20,30,40,50,60,70,80,90,100]))
i1000_bucket = ((i1000_bucket.sum()["i1000.0"] / i1000_bucket.size())*100 , 2)
number_bucket = round((number_bucket.sum()["i1000.0"] / number_bucket.size())*100 , 2)
The graph appears but nothing actually plots
x = [str(i)+"-"+str(i+10) for i in range(10,91,10)]
plt.plot(x,number_bucket.values)
plt.xlabel("i1000.0")
plt.ylabel("p1000.0")
plt.title("1000.0 comparisons")
I'm not sure if my graphs are done properly, what will happen if I'd want to go with upside down. I'd like also to print and generate file as .pdf. But I'm not quite sure how to accomplish that task, please give me some advice if you have any. I'd appreciate that, all best.
Changing variables countlessly
import numpy as np
np.__version__
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from operator import itemgetter
sns.set(style="darkgrid")
# t 1
m1 = np.array([[1,2,2],[-4,3,8],[-1,0,1]])
m2 = np.array([[1,4],[-2,2],[3,-6]])
print(m1.dot(m2));
# t 2
G = nx.Graph()
G.add_edges_from([
('A','D'),('A','B'),('B','D'),('B','C'),('B','E'),('C','D'),('C','E'),('D','E')
])
nx.draw(G, with_labels=True)
array = nx.betweenness_centrality(G)
array['B']
# t 3
df = pd.read_csv('xxx.csv')
df.set_index('OBJECTID', inplace=True)
df.head(1)
# t 4
sorted = df.groupby('NAME')['PT_ENROLL'].sum().sort_values(ascending=False)
sorted.head(7)
# t 5
df.groupby('NAICS_DESC')['NAME'].count().sort_values(ascending=False)
# t 6
df1 = df['TOT_ENROLL']
df2 = df['POPULATION']
plt.scatter(df1,df2)
# T3
df = pd.read_csv('Hospitals.csv')
df.set_index('OBJECTID', inplace=True)
df.head(5)
# T4
sorted = df.groupby('CITY')['NAME'].count().sort_values(ascending=False)
sorted.head(6)
# T5
sorted = df.groupby('NAME')['Y'].max().sort_values(ascending=False)
sorted.head(5)
# T6
df.groupby('OWNER')['BEDS'].sum().sort_values(ascending=False).plot(kind='bar')
Sorry, I can't google how to get my aim so I am here.
see some sandbox datatable:
mode X Y
0 1 3 10
1 1 4 11
2 1 3 12
3 1 4 13
4 2 3 14
5 2 4 15
6 2 3 16
7 2 4 17
I created following sandbox code. So here, I want plot with TWO lines corresponding to the two different modes ('mode 1' and 'mode 2'). X-axis should be 3,4. And here I want to get two lines (3,(10+12)/2)--(4,(11+13)/2) for mode 1 with averaged Y and analogical (3,15)--(4,16) for mode 2.
But this code even doesn't work.
#!/usr/bin/python3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[1,1,1,1,2,2,2,2],[3,4,3,4,3,4,3,4],list(range(10,18))]).T
df.columns = ['mode','X','Y']
mode = df.groupby(['mode'])['mode'].mean()
Ox = df.groupby(['X'])['X'].mean()
Oy = df.groupby(['mode','X'])['Y'].mean()
for x in mode:
plt.plot(Ox, Oy[Oy['mode'== x]] , label = 'test' + x)
plt.savefig('testpandas.pdf')
You might want to try the seaborn package, which has a lot of functionality for stuff like this
import seaborn as sns
sns.lmplot(data=df,hue='mode',x='X',y='Y',x_estimator=np.mean)
Here's one way to do it in plain pandas:
y_means=df.groupby(['mode','X'],as_index=False).mean()
for mode,g in y_means.groupby('mode'):
plt.plot(g['X'],g['Y'],'o-',label = 'mode = ' + str(mode))
It's an answer of asking person.
Actually I've found solution by myself.
#!/usr/bin/python3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[1,1,1,1,2,2,2,2],[3,4,3,4,3,4,3,4],list(range(10,18))]).T
df.columns = ['mode','X','Y']
mode = df.groupby(['mode'])['mode'].mean()
Ox = df.groupby(['X'])['X'].mean()
Oy = df.groupby(['mode','X'])['Y'].mean()
for x in mode:
plt.plot(Ox, Oy[mode[x]] , label = 'test' + str(x))
plt.savefig('testpandas.png')
I would guess the easiest way to do this is to use a pivot_table. This reduces the whole thing to two lines:
piv = pd.pivot_table(df, columns="mode", index="X")
plt.plot(piv)
or even only one, if you use pandas integrated plotting functionality:
pd.pivot_table(df, columns="mode", index="X").plot()
The complete solution using matplotlib:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[1,1,1,1,2,2,2,2],[3,4,3,4,3,4,3,4],list(range(10,18))]).T
df.columns = ['mode','X','Y']
piv = pd.pivot_table(df, columns="mode", index="X")
print piv
plt.plot(piv)
plt.legend(labels=["mode {}".format(c[1]) for c in piv.columns.values])
plt.show()
which prints the pivot table as
Y
mode 1 2
X
3 11 15
4 12 16
and creates the plot
If I have a multiindex set up like:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from io import StringIO
csv = u"""string,date,number
a string1,2/5/11 9:16am,1.0
a string2,3/5/11 10:44pm,2.0
a string3,4/22/11 12:07pm,3.0
a string4,4/22/11 12:10pm,4.0
a string5,4/29/11 11:59am,1.0
a string6,5/2/11 1:41pm,2.0
a string7,5/2/11 2:02pm,3.0
a string8,5/2/11 2:56pm,4.0
a string9,5/2/11 3:00pm,5.0
a string10,5/2/14 3:02pm,6.0
a string11,5/2/14 3:18pm,7.0"""
df = pd.read_csv(StringIO(csv))
df['date']=pd.to_datetime(df['date'],format='%m/%d/%y %I:%M%p')
df.index = df['date']
df.index = pd.MultiIndex.from_tuples(zip(df['date'], df['string']), names=['alpha', 'bravo'])
How can I do a groupby on the alpha index by month and then sum? What I've tried is:
df.groupby(level='alpha').sum().groupby(df.index.month).sum()
which clearly doesn't work.
Like this?
df.groupby(df.index.get_level_values('alpha').month).number.sum()