I'm trying to measure distance between CA atoms in a particular PDB file. The output appears as it should in Spyder, but the written text file appears strange and I don't know how to search for what the output is because I honestly do not know what it is written in. Below is my code:
import sys
import os
from Bio.PDB.PDBParser import PDBParser
import numpy as np
import warnings
def plot(data):
# import matplotlib # matplotlib.use( 'Agg' )
import pylab
fig = pylab.figure()
ax = fig.add_subplot(111)
cax = ax.imshow(data, interpolation='nearest')
ax.set_title('Ca-Ca distance plot')
# Add colorbar, make sure to specify tick locations to match desired ticklabels
min = np.min(data)
max = np.max(data)
cbar = fig.colorbar(cax, ticks=[min, max])
cbar.set_ticks([min,max])
pylab.show()
# pylab.savefig( 'distmat.png', format='png' )
if __name__ == '__main__':
fnam = 'myfile.pdb'
if not os.path.exists(fnam):
print "file not found"
raise SystemExit
id = 'myfile'
warnings.simplefilter('ignore')
parser = PDBParser()
s = parser.get_structure(id, fnam)
chains = [c for c in s.get_chains()]
x = []
for r in chains[0]:
if 'CA' in r.child_dict:
ca = r.child_dict['CA']
x.append(ca.get_coord())
coords = np.array(x)
print (coords)
import scipy.spatial
data = scipy.spatial.distance.cdist(coords, coords, 'euclidean')
print data.shape
plot(data)
print(data)
with open('some file.txt', 'w') as f:
f.write(data)
f.close()
Below is part of the output. What is it and how do I fix it? Thank you for your help!:
‹ñ:(tO#m f\⁄l#cÿÖ∂r!#²«1pi"#ß!•=E6'#=∫;oÆ)#cQ¥Ë¢a(#O)[hÉû/#Æ$≠€781#‹∞'å©X1#x±Mjû+#…‘0™û¥)#Ø1”P/#·uùπ„,#SrÆ¡·˙%#-‰Ÿ^>Â*#QCy‘O/#B?˜+#͵øµÇ|'#e∫≤òÌÄ.#ú?1#Õú…È.#«wX˜¥,#´i¥Ç–è+#◊s°(¯%#βø¶·+#¨ÉSίz.#k`rÕ=C(#åܪÆ'#ï¯ÛU’.#kc÷é.#(¢·F£%*#óqÖîMµ0#Xö1Â
[
Related
So i have an issue. I have two Scripts running one which is a CPU and Time logger of every second to record the CPU usage to a text file. The other is a Script that reads the text file into a graph but the graph is not a uniform axis and does not increase in units and i get the wrong output view.
Script1: logs PSU and time to txt file
import psutil import time
print(str(time.strftime("%H:%M:%S", time.localtime())) + ", " +
str(psutil.cpu_percent(interval=1)))
f = open("example.txt", "a+")
f.write(str(time.strftime("%H:%M:%S", time.localtime())) +
", " + str(psutil.cpu_percent(interval=1)) + " \n")
Program 2: plots to a graph
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
style.use('fivethirtyeight')
fig = plt.figure() ax1 = fig.add_subplot(1,1,1)
def animate(i):
graph_data = open ('example.txt','r').read()
lines = graph_data.split('\n')
xs = []
ys = []
for line in lines:
if len(line) > 1:
x, y = line.split(',')
xs.append(x)
ys.append(y)
ax1.clear()
ax1.plot(xs, ys)
ani = animation.FuncAnimation(fig, animate, interval=1000) plt.show()
Script2 output
If this can be made into one Script then great but i am trying to learn the basics. I think it is a String issue with writing to txt files but dont know why a string would matter in a txt file.
I suppose that the string you need to write in the TXT o CSV file will be generated by (in CSV is much easier to read before):
import time
import psutil
import csv
num_measures = 10
with open("cpu_pcnt.csv", mode='w') as file:
for _ in range(num_measures):
str_time = time.strftime("%H:%M:%S")
cpu_pcnt = psutil.cpu_percent(interval=1)
str_data = f"{str_time},{cpu_pcnt}\n"
file.write(str_data)
Then, convert the time in datetime object to the plot and look to cast the pcu percent into float:
def animate(i):
xs = []
ys = []
with open("cpu_pcnt.csv") as file:
reader = csv.reader(file)
for line in reader:
xs.append(datetime.strptime(line[0], "%H:%M:%S"))
ys.append(float(line[1]))
ax1.clear()
ax1.plot(xs, ys)
I have a file with a table. I am trying to plot a velDisp vs. ABSMAG. Here is my code:
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.io.fits import getdata
from astropy.table import Table
data = getdata("Subset.fits")
data, hdr = getdata("Subset.fits",1,header = True)
table = fits.open('Subset.fits')
data1 = Table(table[1].data)
#print("Columnns:", data1[0].columns)
graph = Table.read('Subset.fits')
mag = data1['ABSMAG']
r_mag = mag[:,2]
x = graph['ABSMAG']
y = graph['velDisp']
plt.scatter(x, y, color = 'r')
plt.title('Velocity Dispersion vs Absolute Magnitude')
plt.xlabel('Abs Mag(r_band)')
plt.ylabel('Velocity Dispersion')
plt.grid()
plt.show()
It's giving me the error that x and y must be the same size.The velDisp I believe is in 3D so this may need to be done in log space. Any idea how to do this?
Here is my code so far:
with open(logfile,'rb') as f:
while True:
lines = sum(1 for line in f)
print lines
X = np.arange(lines)
data = []
for line in f:
a = line.split(',')
data.append(a[1][:-2])
print data
Y = np.array(data)
plt.ion()
graph = plt.plot(X,Y)[0]
graph.set_y_data(Y)
plt.plot(data)
plt.draw()
plt.pause(0.01)
Right now when I print data or Y, it prints an empty array. Then it complains about X not being the same dimension as Y of course. I wonder if perhaps this is because data is not filled quickly enough before the print command is called? But python is supposed to execute sequentially, right?
In any case, I think the logic itself here is probably at fault. This is my best guess - open the file, and while True, try and read everything in and send it into the plot for plot.draw to use. Then as the file is growing as log files do, the chart data and the chart itself will update. How can I ensure that this works?
Use matplotlibs animation features
You need to make an animation like this example.
Version updating the data
Create an empty plot first and update along the way:
import time
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def read(logfile):
with open(logfile) as f:
data = []
while True:
line = f.readline()
time.sleep(0.1)
if line:
data.append(float(line.split(',')[1][:-2]))
yield data
def animate(values):
x = list(range(len(values)))
line.set_data(x, values)
ax.set_xlim(x[0], x[-1])
ax.set_ylim(min(values), max(values))
return line,
fig, ax = plt.subplots()
line, = ax.plot([])
ani = FuncAnimation(fig, animate, frames=read('log.txt'), interval=10)
plt.show()
Version creating a new plot each time
Less code, but works only for a few steps:
import time
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def read(logfile):
with open(logfile) as f:
data = []
while True:
line = f.readline()
time.sleep(0.1)
if line:
data.append(float(line.split(',')[1][:-2]))
yield data
def animate(values):
line, = plt.plot(values, color='blue')
return line,
fig = plt.figure(figsize = (5,5))
ani = FuncAnimation(fig, animate, frames=read('log.txt'), interval=10)
plt.show()
Here is the plot I have currently:
The 'time' strings I import are like this: 08:12:46, so I would like to cut the zeros at the end, but I can't seem to find the problem. Also, is there a way to show the floats on the Y axis in the exponential format, which is the one I am importing from the csv?
I just started to look into matplotlib and numpy for work, so if you have some advice it would be fantastic.
Thank you in advance!
import numpy as np
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
print(plt.style.available)
style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
def animate(i):
graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\VMI WIP - Copia (2)\\Cycle info\\_Current Cycle.csv','r').read()
#graph_data = open('C:\\Users\\arzuffi pc test\\Desktop\\Visual Machine Interface Alpha 1.4.3\\Cycle info\\_Current Cycle.csv','r').read()
lines = graph_data.split('\n')
xs = []
ys = []
skip = 0
for line in lines:
if skip < 7:
skip += 1
else:
if len(line) > 1:
time, cycle, pc, pd, co, hv, cr, ph, gd_volt, gd_amp, gd_power, eva_amp, eva_volt, p_rpm, p_amp, r1_rpm, r1_amp, r2_rpm, r2_amp, hmdso, gas, ahc, diff_l, diff_r = line.split(';')
#x, y = line.split(';')
print(time)
print(pc)
xs.append(dt.datetime.strptime(time,'%H:%M:%S'))#.date())
ys.append(pc)
#print(i)
#xs = matplotlib.dates.date2num(xs)
print(xs)
if len (xs) > 100:
xs = xs[-100:]
if len (ys) > 100:
ys = ys[-100:]
ax1.clear()
ax1.plot(xs, ys)
plt.gcf().autofmt_xdate()
ani = animation.FuncAnimation(fig, animate,interval = 1000)
plt.show()
these are the data:
You can specify the format to be used as follows:
xs = matplotlib.dates.date2num(xs) # You need to keep this line
hfmt = matplotlib.dates.DateFormatter('%H:%M:%S')
ax1.xaxis.set_major_formatter(hfmt)
ax1.plot(xs, ys) # You have this already
This would give you an output as follows:
I have lot of binary and ascii files in one folder. I am reading them using glob module. Doing processing of the binary data so that I can plot them. And finally, I am trying to plot simplified binary data in one subplot and normal ascii file in another subplot. The problem I am facing is that it can generate plots for the corresponding binary files. But for the ascii files it just simply override the previous files and always generates the same plot. Here is the simplied version of the code for an example-
import glob
import numpy as np
from struct import unpack
import matplotlib.pyplot as plt
chi = sorted(glob.glob('C:/Users/Desktop/bin/*.chi'))
for index,fh in enumerate(chi):
data = np.genfromtxt(fh, dtype = float)
x = [row[0] for row in data]
y = [row[1] for row in data]
binary = sorted(glob.glob('C:/Users/Desktop/bin/*.bin'))
for count,FILE in enumerate(binary):
F = open(FILE,'rb')
B = unpack('f'*1023183, F.read(4*1023183))
A = np.array(B).reshape(1043, 981)
F.close()
#a = something column 1 # some further processing
#b = something column 2 # and generates 1D data
fig = plt.figure(figsize=(11, 8.0))
ax1 =fig.add_subplot(211,axisbg='w')
ax1.plot(a,b)
ax2 =fig.add_subplot(212, axisbg ='w')
ax2.plot(x,y)
plt.show()
Can somebody please explain why the files are replacing each other during plotting only for one set of data where the other set is plotting correctly?
the structures of the loops is not correct in your example, you must have the plot command inside the loop over the ascii file, else only the last one is plotted. This should work:
try it like this:
import glob
import numpy as np
from struct import unpack
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(11, 8.0))
chi = sorted(glob.glob('C:/Users/Desktop/bin/*.chi'))
for index,fh in enumerate(chi):
data = np.genfromtxt(fh, dtype = float)
x = [row[0] for row in data]
y = [row[1] for row in data]
ax1 =fig.add_subplot(211, axisbg ='w')
ax1.plot(x,y)
binary = sorted(glob.glob('C:/Users/Desktop/bin/*.bin'))
for count,FILE in enumerate(binary):
F = open(FILE,'rb')
B = unpack('f'*1023183, F.read(4*1023183))
A = np.array(B).reshape(1043, 981)
F.close()
#a = something column 1 # some further processing
#b = something column 2 # and generates 1D data
ax2 =fig.add_subplot(212,axisbg='w')
ax2.plot(a,b)
plt.show()