We are getting this below error while migrating the data from slack channel to a file, when we execute the script for fetching the data for one day, it executing perfectly.
But when we execute the script for 2 months data, it gives 10 days data in separate file but getting throwing an error on particular date. It might be possible that the source data on slack is bit different from expected
Traceback (most recent call last):
File "C:\Users\Slack SCript\script.py", line 218, in <module>
main()
File "C:\Users\Slack SCript\script.py", line 201, in main
parse(message['text'])
File "C:\Users\Slack SCript\script.py", line 114, in parse
size = float(elements[1])
ValueError: could not convert string to float:
As per the source data we found that some value is 0 maybe the error we got because of this value. is there any way to skip or continue future.
from slackclient import SlackClient
import time
import os
import sys
import datetime
from dateutil.relativedelta import relativedelta
servers = ("fd2a", "ff1a", "hh3b", "kw1a", "kw1b", "lo8a", "os5a", "os5b", "sg2a", "sg2b", 'sy1a', 'va1a', 'va1b')
types = ("", "nfs", "cluster")
currser = "d"
currtype = ""
used = {}
total = {}
available = {}
ts = 0
dir_name = "data"
def savedata(dir_path, filename, data):
f = open(dir_path + filename, "w") # opens file with name of "test.txt"
print(dir_path + filename)
f.write(data)
f.close()
def reset_data():
print("datareset")
for i in range(0, len(servers)):
for j in range(0, len(types)):
used[servers[i] + types[j]] = 0
total[servers[i] + types[j]] = 0
available[servers[i] + types[j]] = 0
def write_data(ts):
datastr = ''
global used
global total
ttotaltotalsum = 0
for j in range(0, len(types)):
datastr += types[j] + '\n'
datastr += "Name\t" + "Region\t" + "total(TB)\t" + "used(TB)\t" + "available(TB)\t" + "Used(%)\n"
for i in range(0, len(servers)):
tused = used[servers[i] + types[j]]
ttotal = total[servers[i] + types[j]]
ttotaltotalsum += ttotal
if (ttotal != 0):
datastr += (
servers[i][0:len(servers[i]) - 1] + "\t\t" +
servers[i][len(servers[i]) - 1] + "\t\t" +
"{:.1f}".format(ttotal / 1024) + " \t\t" +
"{:.1f}".format(tused / 1024) + " \t\t" +
"{:.1f}".format((ttotal - tused) / 1024) +"\t\t"+
"{:.1f}".format(tused / ttotal * 100) + " \t\t" +
" \n")
print("..")
if (ttotaltotalsum > 0):
hour= datetime.datetime.fromtimestamp(int(ts)).hour
day= datetime.datetime.fromtimestamp(int(ts)).day
month= datetime.datetime.fromtimestamp(int(ts)).month
year=datetime.datetime.fromtimestamp(int(ts)).year
if hour < 12:
savedata("data/", "Storage-Update-M-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
else:
savedata("data/", "Storage-Update-E-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
def parse(text):
global currser
global currtype
global used
global total
global available
global ts
content = text.split("\n")
for line in content:
line = line[:len(line)]
if line.__contains__("Netapp Cluster"):
for server in servers:
if line.__contains__(server):
currser = server
for type in types:
if line.__contains__(type):
currtype = type
# print(line)
if line.__contains__("Total available capacity"):
# print(line)
# print ("contains","Total available capacity------")
elements = line.split(":")
# print (elements)
size = float(elements[1])
# print(size)
total[currser + currtype] += size
# print(size,"TOTAL capacity",total)
elif line.__contains__("size provisioned"):
# print(line)
# print("contains", "Total LUN size provisioned------- ")
elements = line.split(":")
# print(elements)
size = float(elements[1])
# print(size)
used[currser + currtype] += size
# print(size, "Used", used)
# print( currser)
# print( currtype)
# print( used)
# print(total)
# print(available)
return (used, total)
def make_dir(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def main():
slack_token = ""
channel_name = ''
time_on_last_message = time.time()
channel_id = ""
ts = 0.000
threshmins = 20
channels_call = SlackClient(slack_token).api_call("channels.list")
print(channels_call)
print(channels_call.keys())
for channel in channels_call["channels"]:
if channel["name"] == channel_name:
channel_id = channel["id"]
print(channel)
make_dir(dir_name)
print(channel_id)
reset_data()
time_since_last_update = time.time() - time_on_last_message
print("Waiting for new data....", time.time() - time_on_last_message)
if time_since_last_update > threshmins * 60:
write_data(ts)
reset_data()
sc = SlackClient(slack_token)
date_after_month = datetime.datetime.now() + relativedelta(months=-6)
date_after_month=date_after_month.timestamp()
while True:
breakflag=0
data = sc.api_call(
"channels.history",
channel=channel_id,
oldest=date_after_month,
count=1000,
)
if (data['ok'] == True):
messages = data['messages']
for message in reversed(messages):
# print(message['ts'])
if float(message['ts']) > ts:
print("difference=", float(message['ts']) - ts)
if float(message['ts']) - ts > (threshmins * 60):
print("greater diffrrece>reset................")
write_data(ts)
print(ts)
reset_data()
time_on_last_message = time.time()
ts = float(message['ts'])
parse(message['text'])
if (data["has_more"] == True):
print("has more")
date_after_month=message['ts']
else:
breakflag=1
else:
print("No data returned or error")
time.sleep(1) # in Seconds
if(breakflag==1):
break
main()
Based on the error message, elements[1] is empty. And Python cannot convert an empty string to float:
>>> float("")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: could not convert string to float:
The elements[1] element is a string that can't be parsed to a float. The easiest way would be to attach a debugger and investigate what is being parsed. Then change your code to parse it better.
The second easiest way would be to binary search for the record that makes it fail and fix your code to parse it better.
The totally absolutely preferred way would be to, when you found what the case was that your code didn't support, you would write a test that proves that that case was added:
def test_parse_xyz():
assert [("blablabla", None)] == parse(["blablabla: -certainly_not_a_float"])
These tests can automatically be detected by e.g. pytest:
$ pytest parser.py
Related
Writing a python script to parse an incoming string from a text file and output word by word to serial (serial parts commented out)
Getting the following error:
Traceback (most recent call last):
File "/Users/di/Desktop/RBDS/rbdsScroll.py", line 23, in <module>
wordLength = len(wordList[stringInc])
IndexError: list index out of range
I know it has to do with the new list (when the contents of the text file have far less words than the previous list) not having a high enough index number to work. I'm not quite sure how to go about remedying this. Any help would be appreciated. Full code below:
import time
#import serial
from config import dataPath
from config import scrollTime
from config import preText
from config import postText
from config import port
stringInc=wordFirst=wordLast=wordList=wordLength=word2Length=0
while True:
f = open(dataPath, 'r')
file_contents = f.read()
f.close()
wordList = file_contents.split()
maxLength = len(wordList)
wordLength = len(wordList[stringInc])
if stringInc < maxLength - 1:
word2Length = len(wordList[stringInc + 1])
wordFirst = 0
wordLast = 8
if wordLength > 8:
longString = (wordList[stringInc])
while wordLength + 1 > wordLast:
# ser = serial.Serial(port)
# ser.write(preText + longString[wordFirst:wordLast] + postText)
# ser.close()
print(preText + longString[wordFirst:wordLast] + postText)
wordFirst = wordFirst + 1
wordLast = wordLast + 1
time.sleep(scrollTime / 1000)
elif (wordLength + word2Length < 8) and (stringInc + 1 < maxLength):
# ser = serial.Serial(port)
# ser.write(preText + wordList[stringInc] + " " + wordList[stringInc + 1] + postText)
# ser.close()
print(preText + wordList[stringInc] + " " + wordList[stringInc + 1] + postText)
stringInc = stringInc + 1
time.sleep(scrollTime / 1000)
else:
# ser = serial.Serial(port)
# ser.write(preText + wordList[stringInc] + postText)
# ser.close()
print(preText + wordList[stringInc] + postText)
time.sleep(scrollTime / 1000)
stringInc = stringInc + 1
if stringInc == maxLength:
stringInc = 0
If the file content is fixed, you should read it only once, and move the while just before setting wordLength. That sends all the read words one after the other.
In case the file content changes, which would explain the error, if the new content is shorter than the current value of stringInc, you lower maxLength (just after reading the file) but never force stringInc to be lower than this new value, hence the error message...
Adding a check like if stringInc >= maxLength: after changing maxLength should help correcting the code (in that case set stringInc to 0, I guess).
But the behavior is still strange, and I would rather send all the words of the file after reading it once, not reading it in the while, and then later would read the file again, to send if only if modified...
I've a problem with my Python code: when I call the function from another class, the class where I am calling the function restart, and the compiler gives this error message:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
The class where I launch the code is this:
finestre = creatore_finestre()
print(finestre[0])
The code of the function is:
DIR_DATA = '../../../data/'
SIGNALS_INDEX = {
'HR': 0,
'ABPSys': 1,
'ABPDias': 2,
'ABPMean': 3,
'CVP': 4,
'PULSE': 5,
'RESP': 6,
'SpO2': 7,
'NBPSys': 8,
'NBPDias': 9,
'NBPMean': 10,
}
def download_information_database(id_patient):
wfdb.dldatabase('mimic2db/numerics', DIR_DATA + id_patient, records=[id_patient])
def create_csv(id_patient, signal):
# Download the patient information
download_information_database(id_patient)
# Firstly, we read the patient information
patient_dir = DIR_DATA + id_patient + "/"
record = wfdb.rdsamp(patient_dir + id_patient, channels=[SIGNALS_INDEX[signal]])
# We calculate the datetime base
date_str = record.basedate + ' ' + record.basetime
date = datetime.datetime.strptime(date_str, '%d/%m/%Y %H:%M:%S')
# We read the signal values
signal = record.p_signals
# We write the csv file
with open(patient_dir + id_patient + '.csv', 'w+') as csvfile:
writer = csv.writer(csvfile, delimiter=',',lineterminator="\n")
for s in signal:
date = date + datetime.timedelta(minutes=1)
if not math.isnan(float(s)):
writer.writerow([date.strftime("'[%H:%M:%S %d/%m/%Y]'"),str(int(s[0]) * 1000)])
def creatore_finestre():
#Open the file of information for each patients
in_file = open("../../../data/CodePatientsTraining.csv", "r+")
lettore_file = csv.reader(in_file)
#Create dictionary of list
finestre = defaultdict(list)
for nomeFile in lettore_file:
print(nomeFile[0])
create_csv(nomeFile[0],"ABPMean")
f = open("../../../data/" + nomeFile[0] + "/" + nomeFile[0] + ".csv", "r+")
reader = csv.reader(f)
line = in_file.readline()
lista = list(reader)
i = 0
for timestamp in lista:
if (timestamp[0] != nomeFile[1]):
i += 1
else:
print(timestamp[0], nomeFile[1], i)
break
decade = 0
somma = 0
arrivo = 1
minute = 10
while (i != 0):
i -= 1
if (lista[i][1] != '0' and lista[i][1] != '-' and int(lista[i][1]) > 0):
somma += float(lista[i][1])
decade += 1
if (decade == minute):
f = SlidingWindows((somma / minute), nomeFile[4])
finestre[arrivo].append(f)
print("T[" + str(arrivo) + "]:")
for value in finestre[arrivo]:
print(value)
decade = 0
arrivo += 1
somma = 0
return finestre
My idea is to create a SlidingWindows for each CSV file in the function, and take all the sliding windows from the other class.
import your_module
if __name__ == '__main__':
extractor = your_Module.your_function()
extractor.runInParallel(numProcesses=2, numThreads=4)
The script, originally taken and modified from (http://globplot.embl.de/):
#!/usr/bin/env python
# Copyright (C) 2003 Rune Linding - EMBL
# GlobPlot TM
# GlobPlot is licensed under the Academic Free license
from string import *
from sys import argv
from Bio import File
from Bio import SeqIO
import fpformat
import sys
import tempfile
import os
from os import system,popen3
import math
# Russell/Linding
RL = {'N':0.229885057471264,'P':0.552316012226663,'Q':-0.187676577424997,'A':-0.261538461538462,'R':-0.176592654077609, \
'S':0.142883029808825,'C':-0.0151515151515152,'T':0.00887797506611258,'D':0.227629796839729,'E':-0.204684629516228, \
'V':-0.386174834235195,'F':-0.225572305974316,'W':-0.243375458622095,'G':0.433225711769886,'H':-0.00121743364986608, \
'Y':-0.20750516775322,'I':-0.422234699606962,'K':-0.100092289621613,'L':-0.337933495925287,'M':-0.225903614457831}
def Sum(seq,par_dict):
sum = 0
results = []
raws = []
sums = []
p = 1
for residue in seq:
try:
parameter = par_dict[residue]
except:
parameter = 0
if p == 1:
sum = parameter
else:
sum = sum + parameter#*math.log10(p)
ssum = float(fpformat.fix(sum,10))
sums.append(ssum)
p +=1
return sums
def getSlices(dydx_data, DOM_join_frame, DOM_peak_frame, DIS_join_frame, DIS_peak_frame):
DOMslices = []
DISslices = []
in_DOMslice = 0
in_DISslice = 0
beginDOMslice = 0
endDOMslice = 0
beginDISslice = 0
endDISslice = 0
for i in range( len(dydx_data) ):
#close dom slice
if in_DOMslice and dydx_data[i] > 0:
DOMslices.append([beginDOMslice, endDOMslice])
in_DOMslice = 0
#close dis slice
elif in_DISslice and dydx_data[i] < 0:
DISslices.append([beginDISslice, endDISslice])
in_DISslice = 0
# elseif inSlice expandslice
elif in_DOMslice:
endDOMslice += 1
elif in_DISslice:
endDISslice += 1
# if not in slice and dydx !== 0 start slice
if dydx_data[i] > 0 and not in_DISslice:
beginDISslice = i
endDISslice = i
in_DISslice = 1
elif dydx_data[i] < 0 and not in_DOMslice:
beginDOMslice = i
endDOMslice = i
in_DOMslice = 1
#last slice
if in_DOMslice:
DOMslices.append([beginDOMslice, endDOMslice])
if in_DISslice:
DISslices.append([beginDISslice,endDISslice])
k = 0
l = 0
while k < len(DOMslices):
if k+1 < len(DOMslices) and DOMslices[k+1][0]-DOMslices[k][1] < DOM_join_frame:
DOMslices[k] = [ DOMslices[k][0], DOMslices[k+1][1] ]
del DOMslices[k+1]
elif DOMslices[k][1]-DOMslices[k][0]+1 < DOM_peak_frame:
del DOMslices[k]
else:
k += 1
while l < len(DISslices):
if l+1 < len(DISslices) and DISslices[l+1][0]-DISslices[l][1] < DIS_join_frame:
DISslices[l] = [ DISslices[l][0], DISslices[l+1][1] ]
del DISslices[l+1]
elif DISslices[l][1]-DISslices[l][0]+1 < DIS_peak_frame:
del DISslices[l]
else:
l += 1
return DOMslices, DISslices
def SavitzkyGolay(window,derivative,datalist):
SG_bin = 'sav_gol'
stdin, stdout, stderr = popen3(SG_bin + '-D' + str(derivative) + ' -n' + str(window)+','+str(window))
for data in datalist:
stdin.write(`data`+'\n')
try:
stdin.close()
except:
print stderr.readlines()
results = stdout.readlines()
stdout.close()
SG_results = []
for result in results:
SG_results.append(float(fpformat.fix(result,6)))
return SG_results
def reportSlicesTXT(slices, sequence, maskFlag):
if maskFlag == 'DOM':
coordstr = '|GlobDoms:'
elif maskFlag == 'DIS':
coordstr = '|Disorder:'
else:
raise SystemExit
if slices == []:
#by default the sequence is in uppercase which is our search space
s = sequence
else:
# insert seq before first slide
if slices[0][0] > 0:
s = sequence[0:slices[0][0]]
else:
s = ''
for i in range(len(slices)):
#skip first slice
if i > 0:
coordstr = coordstr + ', '
coordstr = coordstr + str(slices[i][0]+1) + '-' + str(slices[i][1]+1)
#insert the actual slice
if maskFlag == 'DOM':
s = s + lower(sequence[slices[i][0]:(slices[i][1]+1)])
if i < len(slices)-1:
s = s + upper(sequence[(slices[i][1]+1):(slices[i+1][0])])
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + lower(sequence[(slices[i][1]+1):(len(sequence))])
elif maskFlag == 'DIS':
s = s + upper(sequence[slices[i][0]:(slices[i][1]+1)])
#insert untouched seq between disorder segments, 2-run labelling
if i < len(slices)-1:
s = s + sequence[(slices[i][1]+1):(slices[i+1][0])]
#last slice
elif slices[i][1] < len(sequence)-1:
s = s + sequence[(slices[i][1]+1):(len(sequence))]
return s,coordstr
def runGlobPlot():
try:
smoothFrame = int(sys.argv[1])
DOM_joinFrame = int(sys.argv[2])
DOM_peakFrame = int(sys.argv[3])
DIS_joinFrame = int(sys.argv[4])
DIS_peakFrame = int(sys.argv[5])
file = str(sys.argv[6])
db = open(file,'r')
except:
print 'Usage:'
print ' ./GlobPipe.py SmoothFrame DOMjoinFrame DOMpeakFrame DISjoinFrame DISpeakFrame FASTAfile'
print ' Optimised for ELM: ./GlobPlot.py 10 8 75 8 8 sequence_file'
print ' Webserver settings: ./GlobPlot.py 10 15 74 4 5 sequence_file'
raise SystemExit
for cur_record in SeqIO.parse(db, "fasta"):
#uppercase is searchspace
seq = upper(str(cur_record.seq))
# sum function
sum_vector = Sum(seq,RL)
# Run Savitzky-Golay
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
dydx_vector = SavitzkyGolay('smoothFrame',1, sum_vector)
#test
sumHEAD = sum_vector[:smoothFrame]
sumTAIL = sum_vector[len(sum_vector)-smoothFrame:]
newHEAD = []
newTAIL = []
for i in range(len(sumHEAD)):
try:
dHEAD = (sumHEAD[i+1]-sumHEAD[i])/2
except:
dHEAD = (sumHEAD[i]-sumHEAD[i-1])/2
try:
dTAIL = (sumTAIL[i+1]-sumTAIL[i])/2
except:
dTAIL = (sumTAIL[i]-sumTAIL[i-1])/2
newHEAD.append(dHEAD)
newTAIL.append(dTAIL)
dydx_vector[:smoothFrame] = newHEAD
dydx_vector[len(dydx_vector)-smoothFrame:] = newTAIL
globdoms, globdis = getSlices(dydx_vector, DOM_joinFrame, DOM_peakFrame, DIS_joinFrame, DIS_peakFrame)
s_domMask, coordstrDOM = reportSlicesTXT(globdoms, seq, 'DOM')
s_final, coordstrDIS = reportSlicesTXT(globdis, s_domMask, 'DIS')
sys.stdout.write('>'+cur_record.id+coordstrDOM+coordstrDIS+'\n')
print s_final
print '\n'
return
runGlobPlot()
My input and output files are here: link
This script takes a input (input1.fa) and gives following output output1.txt
But when I try to run this script with similar type but larger input file (input2.fa) .. It shows following error:
Traceback (most recent call last):
File "final_script_globpipe.py", line 207, in <module>
runGlobPlot()
File "final_script_globpipe.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "final_script_globpipe.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 22] Invalid argument
I have no idea where the problem is. Any type of suggestion is appriciated.
I am using python 2.7 in windows 7 machine. I have also attached the Savitzky Golay module which is needed to run the script.
Thanks
UPDATE:
After trying to reproduce the error on linux it's showing a similar behavior, working fine with the first file but with the second is returning Errno32.
Traceback:
Traceback (most recent call last):
File "Glob.py", line 207, in <module>
runGlobPlot()
File "Glob.py", line 179, in runGlobPlot
smooth = SavitzkyGolay('smoothFrame',0, sum_vector)
File "Glob.py", line 105, in SavitzkyGolay
stdin.write(`data`+'\n')
IOError: [Errno 32] Broken pipe
Update:
Some calls of the SG_bin return that the -n parameter is the wrong type.
Wrong type of parameter for flag -n. Has to be unsigned,unsigned
This parameter comes from the window variable that is passed to the SavitzkyGolay function.
Surrounding the stdin.write with a trycatch block reveals that it breaks a hadnfull of times.
try:
for data in datalist:
stdin.write(repr(data)+'\n')
except:
print "It broke"
I put trailing print() methods right next to my write() method lines at the end of my code to test why my output files were incomplete. But, the print() output is "all the stuff" I expect; while the write() output is off by a confusing amount (only 150 out of 200 'things'). Reference Image of Output: IDLE versus external output file
FYI: Win 7 64 // Python 3.4.2
My modules take an SRT captions file ('test.srt') and returns a list object I create from it; in particular, one with 220 list entries of the form: [[(index), [time], string]]
times = open('times.txt', 'w')
### A portion of Riobard's SRT Parser: srt.py
import re
def tc2ms(tc):
''' convert timecode to millisecond '''
sign = 1
if tc[0] in "+-":
sign = -1 if tc[0] == "-" else 1
tc = tc[1:]
TIMECODE_RE = re.compile('(?:(?:(?:(\d?\d):)?(\d?\d):)?(\d?\d))?(?:[,.](\d?\d?\d))?')
match = TIMECODE_RE.match(tc)
try:
assert match is not None
except AssertionError:
print(tc)
hh,mm,ss,ms = map(lambda x: 0 if x==None else int(x), match.groups())
return ((hh*3600 + mm*60 + ss) * 1000 + ms) * sign
# my code
with open('test.srt') as f:
file = f.read()
srt = []
for line in file:
splitter = file.split("\n\n")
# SRT splitter
i = 0
j = len(splitter)
for items in splitter:
while i <= j - 2:
split_point_1 = splitter[i].index("\n")
split_point_2 = splitter[i].index("\n", split_point_1 + 1)
index = splitter[i][:split_point_1]
time = [splitter[i][split_point_1:split_point_2]]
time = time[0][1:]
string = splitter[i][split_point_2:]
string = string[1:]
list = [[(index), [time], string]]
srt += list
i += 1
# time info outputter
i = 0
j = 1
for line in srt:
if i != len(srt) - 1:
indexer = srt[i][1][0].index(" --> ")
timein = srt[i][1][0][:indexer]
timeout = srt[i][1][0][-indexer:]
line_time = (tc2ms(timeout) - tc2ms(timein))/1000
space_time = ((tc2ms((srt[j][1][0][:indexer]))) - (tc2ms(srt[i][1][0][-indexer:])))/1000
out1 = "The space between Line " + str(i) + " and Line " + str(j) + " lasts " + str(space_time) + " seconds." + "\n"
out2 = "Line " + str(i) + ": " + str(srt[i][2]) + "\n\n"
times.write(out1)
times.write(out2)
print(out1, end="")
print(out2)
i += 1
j += 1
else:
indexer = srt[i][1][0].index(" --> ")
timein = srt[i][1][0][:indexer]
timeout = srt[i][1][0][-indexer:]
line_time = (tc2ms(timeout) - tc2ms(timein))/1000
outend = "Line " + str(i) + ": " + str(srt[i][2]) + "\n<End of File>"
times.write(outend)
print(outend)
My two write() method output files, respectively, only print out either ~150 or ~200 items of the 220 things it otherwise correctly prints to the screen.
You want to close your times file when done writing; operating systems use write buffers to speed up file I/O, collecting larger blocks of data to be written to disk in one go; closing the file flushes that buffer:
times.close()
Consider opening the file in a with block:
with open('times.txt', 'w') as times:
# all code that needs to write to times
I am new to python and I am trying to run this code,which I found on github ,but it does not work, is something wrong with the code?Or is it my fault? I am always getting the
"no data found"
message.
skyscanner.py :
#!/usr/bin/python
"""The script obtains prices and flight information for a given
input (departure, arrival airports and date), outputs this
data to the console and writes it to a csv file."""
__author__ = "Ingvaras Merkys"
import json
import urllib2
import re
import sys
import time
# Global vars:
AUTOSUGGEST_URL = "http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/"
# e. g. http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/edinb
SKYSCANNER_URL = "http://www.skyscanner.net/flights/"
# e. g. http://www.skyscanner.net/flights/vno/edi/130419
ROUTEDATA_URL = "http://www.skyscanner.net/dataservices/routedate/v2.0/"
# e. g. http://www.skyscanner.net/dataservices/routedate/v2.0/a00765d2-7a39-404b-86c0-e8d79cc5f7e3
SUGGESTIONS_URL = "http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP"
# e. g. http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP&fp=KAUN&tp=EDIN&dd=20130410
def main(argv):
input_from = argv[0].replace(" ", "%20").replace("\"", "")
input_to = argv[1].replace(" ", "%20").replace("\"", "")
date = argv[2].replace("/", "")
place_id_from, place_id_to, name_from, name_to = get_codes(input_from, input_to)
# testjuly = map (lambda x: len(x) == 1 and '13070'+x or '1307'+x, [ str(i+1) for i in range(31) ])
# for date in testjuly:
session_key = get_session_key(place_id_from, place_id_to, date)
for attempt in range(3):
# if script is run repeatedly sometimes an empty html is returned
try:
response = urllib2.urlopen(ROUTEDATA_URL + session_key)
html = response.read()
data = json.loads(html)
except ValueError:
f = open("error.log", "a")
f.write(ROUTEDATA_URL + session_key + "\n")
f.write("Returned:\n" + html + "\n")
time.sleep(1)
else:
break
else:
sys.exit(1)
query = data['Query']
if data['Stats']['OutboundLegStats']['TotalCount'] == 0:
print "No flights found from", name_from, "to", name_to
return 0
#show_suggestions(query['OriginPlace'], query['DestinationPlace'], date)
#sys.exit(2)
stations = data['Stations']
quotes = data['Quotes']
carriers = data['Carriers']
cheapest_price = data['Stats']['ItineraryStats']['Total']['CheapestPrice']
print "Results for flight from", name_from, "to", name_to
print "Outbound date:", re.split('T', query['OutboundDate'])[0]
print "Cheapest Journey:", cheapest_price, "RMB"
return cheapest_price
# f = open(place_id_from + '-' + place_id_to + '-' + date + '.csv','w')
# for leg in data['OutboundItineraryLegs']:
# leg_price = get_leg_price(leg['PricingOptions'], quotes)
# depart_time = leg['DepartureDateTime'].replace("T", " ")
# arrive_time = leg['ArrivalDateTime'].replace("T", " ")
# duration = leg['Duration']
# carrier_names = get_carrier_names(leg['MarketingCarrierIds'], carriers)[1]
# print "\n\tPrice:", leg_price, "GBP"
# print "\tDeparting:", depart_time
# print "\tArriving:", arrive_time
# print "\tDuration:", duration/60, "h", duration%60, "min"
# print "\tCarriers:", carrier_names
# print "\t# of stops: ", leg['StopsCount']
# stop_ids = leg.get('StopIds', [])
# stop_ids_string = ", ".join([ get_station_name(stop_id, stations) for stop_id in stop_ids ])
# print "\t\t", stop_ids_string
# row = str(leg_price) + "\t" + depart_time + "\t" + arrive_time + "\t" + str(duration) + "\t" + carrier_names + "\t" + stop_ids_string
# f.write(row + "\n")
# Functions
def get_codes(input_from, input_to):
"""Returns place id codes and names, e. g. ("EDI", "KUN", "Edinburgh", "Kaunas")"""
try:
i = 0
autosuggest_json_from = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_from))
if len(autosuggest_json_from[0]['PlaceId']) == 4:
# for cases where the first result is abstract (e. g. Glasgow (Any))
i = 1
place_id_from = autosuggest_json_from[i]['PlaceId']
name_from = autosuggest_json_from[i]['PlaceName']
j = 0
autosuggest_json_to = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_to))
if len(autosuggest_json_to[0]['PlaceId']) == 4:
j = 1
place_id_to = autosuggest_json_to[j]['PlaceId']
name_to = autosuggest_json_to[j]['PlaceName']
except IndexError:
print "No code found for:"
print input_from, "AND/OR", input_to
sys.exit(3)
return (place_id_from, place_id_to, name_from, name_to)
def get_session_key(place_id_from, place_id_to, date):
"""Returns a session key for a given query, on failure exits
NB. distant or past dates cause failures"""
response = urllib2.urlopen(SKYSCANNER_URL + place_id_from + "/" + place_id_to + "/" + date)
html = response.read()
regex = ur'"SessionKey":"(.+?)"'
# e. g. "SessionKey":"a00765d2-7a39-404b-86c0-e8d79cc5f7e3"
try:
session_key = re.findall(regex, html)[0]
except IndexError:
print "No data found for this date"
sys.exit(4)
return session_key
def show_suggestions(from_id, to_id, date):
"""Prints alternative departure airports"""
suggest_places_string = ""
suggestions_json = json.load(urllib2.urlopen(SUGGESTIONS_URL + "&fp=" + from_id + "&tp=" + to_id + "&dd=20" + date))
try:
suggest_places = suggestions_json['rs']
for place in suggest_places:
if place['fpid'] != from_id:
suggest_places_string += place['fan'] + ", "
if suggest_places_string[:-2] != "":
print "Try airports: ", suggest_places_string[:-2]
except (KeyError, IndexError):
print "Suggestions unavailable"
def get_station_name(station_id, stations):
"""Returns the name of the (intermediate) station,
e. g. "London Heathrow" """
for station in stations:
if station['Id'] == station_id:
return station['Name']
return ""
def get_leg_price(pricing, quotes):
"""Returns lowest leg price"""
prices = []
for price in pricing:
prices.append(get_quote_price(price['QuoteIds'], quotes))
return min(prices)
def get_quote_price(quote_ids, quotes):
"""Finds quotes by quote id and returns their price sum"""
price = 0;
for quote_id in quote_ids:
for quote in quotes:
if quote['Id'] == quote_id:
price += quote['Price']
return price
def get_carrier_names(carrier_ids, carriers):
"""Returns a tuple (list, string) with carrier names
e.g. (["airBaltic", "KLM"], "airBaltic, KLM")"""
carrier_names = []
carrier_names_string = ""
for carrier_id in carrier_ids:
carrierName = get_carrier_name(carrier_id, carriers)
carrier_names.append(carrierName)
carrier_names_string += carrierName + ", "
return (carrier_names, carrier_names_string[:-2])
def get_carrier_name(carrier_id, carriers):
"""Returns carrier name by id"""
for carrier in carriers:
if carrier['Id'] == carrier_id:
return carrier['Name']
return ""
if __name__ == "__main__":
if len(sys.argv) == 4:
main(sys.argv[1:])
else:
print "Enter arguments in this way:\n"
print "python skyscanner.py {departure airport} {arrival airport} {departure date (yy/mm/dd)}\n\n"
print "e. g. python skyscanner.py \"glasgow prestwick\" kaunas 13/07/21\n"
sys.exit()
These endpoints are not supported as external APIs, they are used by the site itself. They can/do change without notice and some require a level of "state" to operate.
However, we do have an API that would allow you access to the same auto-suggest / flight data that the site is driven from. More details can be found at http://business.skyscanner.net