Working with dictionaries - python

I have dictionary that takes data from a file and puts it in list. I want to make a search engine that when I type name or quantity or price of a component it will find all with that name and print info that it holds (price, quantity, category).
Input
I just can't make my script read info from lines in the file. The file's text looks like:
AMD A4-3300 2.5GHz 2-Core Fusion APU Box|5.179,00 din|58|opis|Procesor
AMD Athlon II X2 340 3.2GHz Box|4.299,00 din|8|opis|Procesor
INTEL Celeron G465 1.9GHz Box|3.339,00 din|46|opis|Procesor
INTEL Celeron Dual Core G550 2.6GHz Box|1.439,00 din|13|opis|Procesor
Output
Here is my code which should be a search engine for my components, I just don't know how I can take form list data and target that data full info for example I type key word like AMD and seach engine print all AMD components that have AMD in their name or price I put price range and I got all prices in that range. I tried some things but it wont work.Sorry for long time to respond.I translated my code, there may be some lines left out but I hope you get the picture.
def option_p_components():
option = 0
#component = []
components = []
while option == 0 :
option_comp = option_p_components_str()
option_k = int(raw_input("Chose option : ")
print "" \
""
if option_k != 1 and option_k != 2 :
error = "!!!Error!!!"
error_p = " you typed wrong command please try again ."
print "-" * 80
print error.center(80)
print error_p.center(80)
print "-" * 80
option = 0
if option_k == 1 :
option_p_d = 0
print "Components search "
print"-" * 80
cu = temp_comp(components)
print cu
print "X)Working with components(editing, deleting )"
print"-" * 80
print "1)Change components "
print "2)Editing components"
print "3)Delating componetns"
print "4)Components search "
print "5)Back"
print"-" * 80
option_p_d = int(raw_input("Chose option :"))
if Option_p_d == 2 :
option_d = 0
for I in range(5):
u_component_name = raw_input("Unesite naziv komponente :")
u_component_price= raw_input("Unestie cenu komponente:")
u_component_quantity = raw_input("Unesite kolicinu komponente :")
u_component_opis = raw_input("Unesite opis komponente :")
u_component_category = raw_input("Unesite kategoriju komponente:")
component = {"name_compo":u_komponenta_ime,
"price":u_komponenta_cena,
"quantity":u_komponenta_kolicina,
"opis":u_komponenta_opis,
"category":u_komponenta_kategorija}
upis_komponente = saving_components(component)
components.append(saving_components)
print"-" * 80
print "1)New component"
print "2)Back"
print"-" * 80
option_d = int(raw_input("Odaberite opciju :"))
if option_d == 1 :
option_k = 0
elif option_d == 2 :
option_p_komponenti()
elif option_k == 2 :
print "Back"
def saving_components(component):
final_komponenta = component["name_compo"] + "|" + component["price"] + "|" + componenta["quantity"] + "|"\
+ component["opis"] + "|" + component["category"]
file = open("Data/component.txt", "a")
file.write(final_component)
file.close
def reading_component(component):
file = open("Data/component.txt", "r")
for line in file :
name_comp, price, quantity, opis, category = line.split("|")
komponenta = {"name_compo": name_comp,
"price": price,
"quantity": quantity,
"opis" : opis,
"category": category}
# ovo izvlaci samo pojedinacne vrednosti iz recnika
compon_info = "Name: " + component["name_compo"] + "\n" + "price: " + component["Price"]+"\n" +\
"Quantity:" + component["quantity"] + "\n" + "Opis: " + komponenta["opis"] + \
"\n" + "category: " + component["category"] + "\n"
#print compon_info
component.append(component)
#print sortiranje(kompon_info)
#print sorted([compon_info])
#print compon_info.sort()
#Vrti koliko ima u fajlu for ...a to je 7
file.close()
return component
def temp_comp(components):
pretraga_po_opisu(komponente)
def pretraga_po_opisu(komponente):
kolicina = str(raw_input("Unesite kolicinu:"))
for komponenta in komponente:
if komponenta["kolicina"] == kolicina:
print komponenta["kolicina"]
return None
def pera(komponente, cena):
ulaz = input("Unesi")
list = komponente.pera("cena",cena)

All you need is csv.DictReader() together with a sequence of key names for each column:
with open(inputfilename, 'rb') as fileobj:
reader = csv.DictReader(fileobj,
('name_compon', 'price', 'quantity', 'something_else', 'category'),
delimiter='|')
for row in reader:
print row
where row is the dictionary you wanted.

If you want to look into using zip, you could always use it here:
component_dicts = []
components = ("name_compon", "price", "quanity", "category")
with open('/path/to/data') as f:
for line in f.readlines():
components_dicts.append(dict(zip(components, line.split("|")[:4])))
#slicing the first four elements because you never say which 4 out of 5 you wanted.
for c in components_dict:
print c
Here the line.split("|") method is creating a list of str's, dividing the string being read wherever the "|" character is found.
Then zip will return a list of tuples which you then feed into a dict:
# This is what it would look like after you zip the components tuple and the line.split("|") data
[(name_compon, 'AMD A4-3300 2.5GHz 2-Core Fusion APU Box'), (price, '5.179,00 din'), (quanity, 58), (type, opis)]

Related

^# shows up randomly in Bash on Ubuntu on Windows while running Python program

I have a python program that basically parses through some CSVs and prints out a line and then stops until the user hits enter. Here is the full code:
#!/usr/bin/python
import os
import csv
import sys
from datetime import datetime
teams = [[] for x in xrange(0, 400)]
counter = 0
with open('t26.csv', 'rb') as f:
next(f)
reader = csv.reader(f)
for row in reader:
if row:
if row[1] <> "" and row[1] <> "TEAM AVERAGES:":
teams[counter].append(row[16])
teams[counter].append(row[3])
teams[counter].append(row[0])
teams[counter].append(row[4])
counter += 1
for i in range(0, counter - 1):
diff = False
lastTeam = ""
firstDate = ""
eid = teams[i][0]
date = teams[i][1]
team = teams[i][2]
pc = teams[i][3]
for csvfile in os.listdir('Uploads'):
with open('Uploads/' + csvfile, 'rb') as f:
reader = csv.reader(f)
team_index = 0
eid_am_index = 0
eid_pm_index = 0
find = False
for row in reader:
index = 0
for column_name in row:
if "team" == column_name:
team_index = index
if "eid_am" in column_name:
eid_am_index = index
if "eid_pm" in column_name:
eid_pm_index = index
index += 1
if eid in row:
#print row[team_index] + ', ' + row[eid_am_index] + ', ' + row[eid_pm_index] + ', ' + ' ----> ' + csvfile
if row[team_index] <> lastTeam and lastTeam <> "":
diff = True
lastTeam = row[team_index]
if firstDate == "":
firstDate = csvfile
break
if diff:
print "\n*diff"
else: #teams are the same
team = team[5:]
if "(" in team:
team = team[:team.index('(') - 1]
try:
lastTeam = lastTeam[:lastTeam.index(' ')]
except:
g = 0
print "\n*no diff: " + eid + " --> " + firstDate + " | " + date + "\tTeam: " + team + " | " + lastTeam + "\tPC: " + pc
if team <> lastTeam and lastTeam <> "":
print "*(!) teams not equal"
f = raw_input('') #read user input and do nothing with it
I run this program on Bash on Ubuntu on Windows, and sometimes the symbols "^#" will pop up randomly on the terminal, and then when I click enter I get an error.
Here's an example of what the terminal looks like (with some #comments to explain):
*no diff: 4903 --> 6-27-2017 3_44_01 PM.csv | 8/1/2017 1:56:39 PM Team: 180-A | 180-A PC: AGENT3-102 #this line is printed out by the python program
^# #this randomly show up
Traceback (most recent call last): #when i hit enter i get this error
File "parse.py", line 127, in <module>
f = raw_input('')
EOFError
Here's a screenshot as well:

Python function performance

I have 130 lines of code in which part except from line 79 to 89 work fine like compiles in ~0.16 seconds however after adding function which is 10 lines(between 79-89) it works in 70-75 seconds. In that function the data file(u.data) is 100000 lines of numerical data in this format:
>196 242 3 881250949
4 grouped numbers in every line. The thing is that when I ran that function in another Python file while testing (before implementing it in the main program) it showed that it works in 0.15 seconds however when I implemented it in main one (same code) it takes whole program 70 seconds almost.
Here is my code:
""" Assignment 5: Movie Reviews
Date: 30.12.2016
"""
import os.path
import time
start_time = time.time()
""" FUNCTIONS """
# Getting film names in film folder
def get_film_name():
name = ''
for word in read_data.split(' '):
if ('(' in word) == False:
name += word + ' '
else:
break
return name.strip(' ')
# Function for removing date for comparison
def throw_date(string):
a_list = string.split()[:-1]
new_string = ''
for i in a_list:
new_string += i + ' '
return new_string.strip(' ')
def film_genre(film_name):
oboist = []
genr_list = ['unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama',
'Fantasy',
'Movie-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
for item in u_item_list:
if throw_date(str(item[1])) == film_name:
for i in range(4, len(item)):
oboist.append(item[i])
dictionary = dict(zip(genr_list, oboist))
genres = ''
for key, value in dictionary.items():
if value == '1':
genres += key + ' '
return genres.strip(' ')
def film_link(film_name):
link = ''
for item in u_item_list:
if throw_date(str(item[1])) == film_name:
link += item[3]
return link
def film_review(film_name):
review = ''
for r, d, filess in os.walk('film'):
for fs in filess:
fullpat = os.path.join(r, fs)
with open(fullpat, 'r') as a_file:
data = a_file.read()
if str(film_name).lower() in str(data.split('\n', 1)[0]).lower():
for i, line in enumerate(data):
if i > 1:
review += line
a_file.close()
return review
def film_id(film_name):
for film in u_item_list:
if throw_date(film[1]) == film_name:
return film[0]
def total_user_and_rate(film_name):
rate = 0
user = 0
with open('u.data', 'r') as data_file:
rate_data = data_file.read()
for l in rate_data.split('\n'):
if l.split('\t')[1] == film_id(film_name):
user += 1
rate += int(l.split('\t')[2])
data_file.close()
print('Total User:' + str(int(user)) + '\nTotal Rate: ' + str(rate / user))
""" MAIN CODE"""
review_file = open("review.txt", 'w')
film_name_list = []
# Look for txt files and extract the film names
for root, dirs, files in os.walk('film'):
for f in files:
fullpath = os.path.join(root, f)
with open(fullpath, 'r') as file:
read_data = file.read()
film_name_list.append(get_film_name())
file.close()
with open('u.item', 'r') as item_file:
item_data = item_file.read()
item_file.close()
u_item_list = []
for line in item_data.split('\n'):
temp = [word for word in line.split('|')]
u_item_list.append(temp)
film_name_list = [i.lower() for i in film_name_list]
updated_film_list = []
print(u_item_list)
# Operation for review.txt
for film_data_list in u_item_list:
if throw_date(str(film_data_list[1]).lower()) in film_name_list:
strin = film_data_list[0] + " " + film_data_list[1] + " is found in the folder" + '\n'
print(film_data_list[0] + " " + film_data_list[1] + " is found in the folder")
updated_film_list.append(throw_date(str(film_data_list[1])))
review_file.write(strin)
else:
strin = film_data_list[0] + " " + film_data_list[1] + " is not found in the folder. Look at " + film_data_list[
3] + '\n'
print(film_data_list[0] + " " + film_data_list[1] + " is not found in the folder. Look at " + film_data_list[3])
review_file.write(strin)
total_user_and_rate('Titanic')
print("time elapsed: {:.2f}s".format(time.time() - start_time))
And my question is what can be the reason for that? Is the function
("total_user_and_rate(film_name)")
problematic? Or can there be other problems in other parts? Or is it normal because of the file?
I see a couple of unnecessary things.
You call film_id(film_name) inside the loop for every line of the file, you really only need to call it once before the loop.
You don't need to read the file, then split it to iterate over it, just iterate over the lines of the file.
You split each line twice, just do it once
Refactored for these changes:
def total_user_and_rate(film_name):
rate = 0
user = 0
f_id = film_id(film_name)
with open('u.data', 'r') as data_file:
for line in data_file:
line = line.split('\t')
if line[1] == f_id:
user += 1
rate += int(line[2])
data_file.close()
print('Total User:' + str(int(user)) + '\nTotal Rate: ' + str(rate / user))
In your test you were probably testing with a much smaller u.item file. Or doing something else to ensure film_id was much quicker. (By quicker, I mean it probably ran on the nanosecond scale.)
The problem you have is that computers are so fast you didn't realise when you'd actually made a big mistake doing something that runs "slowly" in computer time.
If your if l.split('\t')[1] == film_id(film_name): line takes 1 millisecond, then when processing a 100,000 line u.data file, you could expect your total_user_and_rate function to take 100 seconds.
The problem is that film_id iterates all your films to find the correct id for every single line in u.data. You'd be lucky, if the the film_id you're looking for is near the beginning of u_item_list because then the function would return in probably less than a nanosecond. But as soon as you run your new function for a film near the end of u_item_list, you'll notice performance problems.
wwii has explained how to optimise the total_user_and_rate function. But you could also gain performance improvements by changing u_item_list to use a dictionary. This would improve the performance of functions like film_id from O(n) complexity to O(1). I.e. it would still run on the nanosecond scale no matter how many films are included.

Python Nested Loops - continue iterates first loop

Brand new to programming but very enjoyable challenge.
Here's a question which I suspect may be caused by a misunderstanding of python loops.
System info: Using notepad++ and IDLE python 3.4.3 on Win 7 32-bit
My solution is to open 1 database, use it to look for a correct master entry from database 2, pulls a index number (task_no), then write a 3rd file identical to the first database, this time with the correct index number.
My problem is that it performs 1st and 2nd loop correctly, then on the 2nd iteration of loop 1, tries to perform a block in loop 2 while iterating through the rows of loop 1, not the task_rows of loop 2.
footnote: Both files are quite large (several MB) so I'm note sure if storing them in memory is a good idea.
This was a relevant question that I found closest to this problem:
python nested loop using loops and files
What I got out of it was that I tried moving the file opening within the 1st loop, but the problem persists. Something to do with how I'm using CSV reader?
I also have the sinking suspicion that there may be a root cause in problem solving so I am welcome to suggestions for alternative ways to solve the problem.
Thanks in advance!
The gist:
for row in readerCurrentFile: #LOOP 1
# iterates through readerCurrentFile to define search variables
[...]
for task_row in readerTaskHeader: #LOOP 2
# searches each row iteratively through readerTaskHeader
# Match compid
#if no match, continue <<<- This is where it goes back to 1st loop
[...]
# Match task frequency
#if no match, continue
[...]
# once both of the above matches check out, will grab data (task_no from task_row[0]
task_no = ""
task_no = task_row[0]
if task_row:
break
[...]
# writes PM code
print("Successful write of PM schedule row")
print(compid + " " + dict_freq_names[str(pmfreqx) + str(pmfreq)] + ": " + pmid + " " + task_no)
The entire code:
import csv
import re
#Writes schedule
csvNewPMSchedule = open('new_pm_schedule.csv', 'a', newline='')
writerNewPMSchedule = csv.writer(csvNewPMSchedule)
# Dictionaries of PM Frequency
def re_compile_dict(d,f):
for k in d:
d[k] = re.compile(d[k], flags=f)
dict_month = {60:'Quin',36:'Trien',24:'Bi-An',12:'Annual(?<!Bi-)(?<!Semi-)',6:'Semi-An',3:'Quart',2:'Bi-Month',1:'Month(?<!Bi-)'}
dict_week = {2:'Bi-Week',1:'Week(?<!Bi-)'}
dict_freq_names = {'60Months':'Quintennial','36Months':'Triennial','24Months':'Bi-Annual','12Months':'Annual','6Months':'Semi-Annual','3Months':'Quarterly','2Months':'Bi-Monthly','1Months':'Monthly','2Weeks':'Bi-Weekly','1Weeks':'Weekly'}
re_compile_dict(dict_month,re.IGNORECASE)
re_compile_dict(dict_week, re.IGNORECASE)
# Unique Task Counter
task_num = 0
total_lines = 0
#Error catcher
error_in_row = []
#Blank out all rows
pmid = 0
compid = 0
comp_desc = 0
pmfreqx = 0
pmfreq = 0
pmfreqtype = 0
# PM Schedule Draft (as provided by eMaint)
currentFile = open('pm_schedule.csv', encoding='windows-1252')
readerCurrentFile = csv.reader(currentFile)
# Loop 1
for row in readerCurrentFile:
if row[0] == "pmid":
continue
#defines row items
pmid = row[0]
compid = row[1]
comp_desc = row[2]
#quantity of pm frequency
pmfreqx_temp = row[3]
#unit of pm frequency, choices are: Months, Weeks
pmfreq = row[4]
#pmfreqtype is currently only static not sure what other options we have
pmfreqtype = row[5]
#pmnextdate is the next scheduled due date from this one. we probably need logic later that closes out any past due date
pmnextdate = row[6]
# Task Number This is what we want to change
# pass
# We want to change this to task header's task_desc
sched_task_desc = row[8]
#last done date
last_pm_date = row[9]
#
#determines frequency search criteria
#
try:
pmfreqx = int(pmfreqx_temp)
except (TypeError, ValueError):
print("Invalid PM frequency data, Skipping row " + pmid)
error_in_row.append(pmid)
continue
#
#defines frequency search variable
#
freq_search_var = ""
if pmfreq == "Weeks":
freq_search_var = dict_week[pmfreqx]
elif pmfreq == "Months":
freq_search_var = dict_month[pmfreqx]
if not freq_search_var:
print("Error in assigning frequency" + compid + " " + str(pmfreqx) + " " + pmfreq)
error_in_row.append(pmid)
continue
#defines Equipment ID Search Variable
print(compid + " frequency found: " + str(pmfreqx) + " " + str(pmfreq))
compid_search_var = re.compile(compid,re.IGNORECASE)
#
# Matching function - search taskHeader for data
#
#PM Task Header Reference
taskHeader = open('taskheader.csv', encoding='windows-1252')
readerTaskHeader = csv.reader(taskHeader)
for task_row in readerTaskHeader:
# task_row[0]: taskHeader pm number
# task_row[1]: "taskHeader task_desc
# task_row[2]: taskHeader_task_notes
#
# search for compid
compid_match = ""
compid_match = compid_search_var.search(task_row[1])
if not compid_match:
print(task_row[1] + " does not match ID for " + compid + ", trying next row.") #debug 2
continue # <<< STOPS ITERATING RIGHT OVER HERE
print("Found compid " + task_row[1]) # debug line
#
freq_match = ""
freq_match = freq_search_var.search(task_row[1])
if not freq_match:
print(task_row[1] + " does not match freq for " + compid + " " + dict_freq_names[str(pmfreqx) + str(pmfreq)] + ", trying next row.") #debug line
continue
print("Frequency Match: " + compid + " " + dict_freq_names[str(pmfreqx) + str(pmfreq)]) # freq debug line
#
task_no = ""
print("Assigning Task Number to " + task_row[0])
task_no = task_row[0]
if task_row:
break
#
#error check
#
if not task_no:
print("ERROR IN SEARCH " + compid + " " + pmid)
error_in_row.append(pmid)
continue
#
# Writes Rows
#
writerNewPMSchedule.writerow([pmid,compid,comp_desc,pmfreqx,pmfreq,pmfreqtype,pmnextdate,task_no,sched_task_desc,last_pm_date])
print("Successful write of PM schedule row")
print(compid + " " + dict_freq_names[str(pmfreqx) + str(pmfreq)] + ": " + pmid + " " + task_no)
print("==============")
# Error reporting lined out for now
# for row in error_in_row:
# writerNewPMSchedule.writerow(["Error in row:",str(error_in_row[row])])
# print("Error in row: " + str(error_in_row[row]))
print("Finished")

Is there some kind of limit to the amount of output Python 3.4 allows using the write() method at one time?

I put trailing print() methods right next to my write() method lines at the end of my code to test why my output files were incomplete. But, the print() output is "all the stuff" I expect; while the write() output is off by a confusing amount (only 150 out of 200 'things'). Reference Image of Output: IDLE versus external output file
FYI: Win 7 64 // Python 3.4.2
My modules take an SRT captions file ('test.srt') and returns a list object I create from it; in particular, one with 220 list entries of the form: [[(index), [time], string]]
times = open('times.txt', 'w')
### A portion of Riobard's SRT Parser: srt.py
import re
def tc2ms(tc):
''' convert timecode to millisecond '''
sign = 1
if tc[0] in "+-":
sign = -1 if tc[0] == "-" else 1
tc = tc[1:]
TIMECODE_RE = re.compile('(?:(?:(?:(\d?\d):)?(\d?\d):)?(\d?\d))?(?:[,.](\d?\d?\d))?')
match = TIMECODE_RE.match(tc)
try:
assert match is not None
except AssertionError:
print(tc)
hh,mm,ss,ms = map(lambda x: 0 if x==None else int(x), match.groups())
return ((hh*3600 + mm*60 + ss) * 1000 + ms) * sign
# my code
with open('test.srt') as f:
file = f.read()
srt = []
for line in file:
splitter = file.split("\n\n")
# SRT splitter
i = 0
j = len(splitter)
for items in splitter:
while i <= j - 2:
split_point_1 = splitter[i].index("\n")
split_point_2 = splitter[i].index("\n", split_point_1 + 1)
index = splitter[i][:split_point_1]
time = [splitter[i][split_point_1:split_point_2]]
time = time[0][1:]
string = splitter[i][split_point_2:]
string = string[1:]
list = [[(index), [time], string]]
srt += list
i += 1
# time info outputter
i = 0
j = 1
for line in srt:
if i != len(srt) - 1:
indexer = srt[i][1][0].index(" --> ")
timein = srt[i][1][0][:indexer]
timeout = srt[i][1][0][-indexer:]
line_time = (tc2ms(timeout) - tc2ms(timein))/1000
space_time = ((tc2ms((srt[j][1][0][:indexer]))) - (tc2ms(srt[i][1][0][-indexer:])))/1000
out1 = "The space between Line " + str(i) + " and Line " + str(j) + " lasts " + str(space_time) + " seconds." + "\n"
out2 = "Line " + str(i) + ": " + str(srt[i][2]) + "\n\n"
times.write(out1)
times.write(out2)
print(out1, end="")
print(out2)
i += 1
j += 1
else:
indexer = srt[i][1][0].index(" --> ")
timein = srt[i][1][0][:indexer]
timeout = srt[i][1][0][-indexer:]
line_time = (tc2ms(timeout) - tc2ms(timein))/1000
outend = "Line " + str(i) + ": " + str(srt[i][2]) + "\n<End of File>"
times.write(outend)
print(outend)
My two write() method output files, respectively, only print out either ~150 or ~200 items of the 220 things it otherwise correctly prints to the screen.
You want to close your times file when done writing; operating systems use write buffers to speed up file I/O, collecting larger blocks of data to be written to disk in one go; closing the file flushes that buffer:
times.close()
Consider opening the file in a with block:
with open('times.txt', 'w') as times:
# all code that needs to write to times

Skyscanner API CSV file

I am new to python and I am trying to run this code,which I found on github ,but it does not work, is something wrong with the code?Or is it my fault? I am always getting the
"no data found"
message.
skyscanner.py :
#!/usr/bin/python
"""The script obtains prices and flight information for a given
input (departure, arrival airports and date), outputs this
data to the console and writes it to a csv file."""
__author__ = "Ingvaras Merkys"
import json
import urllib2
import re
import sys
import time
# Global vars:
AUTOSUGGEST_URL = "http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/"
# e. g. http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/edinb
SKYSCANNER_URL = "http://www.skyscanner.net/flights/"
# e. g. http://www.skyscanner.net/flights/vno/edi/130419
ROUTEDATA_URL = "http://www.skyscanner.net/dataservices/routedate/v2.0/"
# e. g. http://www.skyscanner.net/dataservices/routedate/v2.0/a00765d2-7a39-404b-86c0-e8d79cc5f7e3
SUGGESTIONS_URL = "http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP"
# e. g. http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP&fp=KAUN&tp=EDIN&dd=20130410
def main(argv):
input_from = argv[0].replace(" ", "%20").replace("\"", "")
input_to = argv[1].replace(" ", "%20").replace("\"", "")
date = argv[2].replace("/", "")
place_id_from, place_id_to, name_from, name_to = get_codes(input_from, input_to)
# testjuly = map (lambda x: len(x) == 1 and '13070'+x or '1307'+x, [ str(i+1) for i in range(31) ])
# for date in testjuly:
session_key = get_session_key(place_id_from, place_id_to, date)
for attempt in range(3):
# if script is run repeatedly sometimes an empty html is returned
try:
response = urllib2.urlopen(ROUTEDATA_URL + session_key)
html = response.read()
data = json.loads(html)
except ValueError:
f = open("error.log", "a")
f.write(ROUTEDATA_URL + session_key + "\n")
f.write("Returned:\n" + html + "\n")
time.sleep(1)
else:
break
else:
sys.exit(1)
query = data['Query']
if data['Stats']['OutboundLegStats']['TotalCount'] == 0:
print "No flights found from", name_from, "to", name_to
return 0
#show_suggestions(query['OriginPlace'], query['DestinationPlace'], date)
#sys.exit(2)
stations = data['Stations']
quotes = data['Quotes']
carriers = data['Carriers']
cheapest_price = data['Stats']['ItineraryStats']['Total']['CheapestPrice']
print "Results for flight from", name_from, "to", name_to
print "Outbound date:", re.split('T', query['OutboundDate'])[0]
print "Cheapest Journey:", cheapest_price, "RMB"
return cheapest_price
# f = open(place_id_from + '-' + place_id_to + '-' + date + '.csv','w')
# for leg in data['OutboundItineraryLegs']:
# leg_price = get_leg_price(leg['PricingOptions'], quotes)
# depart_time = leg['DepartureDateTime'].replace("T", " ")
# arrive_time = leg['ArrivalDateTime'].replace("T", " ")
# duration = leg['Duration']
# carrier_names = get_carrier_names(leg['MarketingCarrierIds'], carriers)[1]
# print "\n\tPrice:", leg_price, "GBP"
# print "\tDeparting:", depart_time
# print "\tArriving:", arrive_time
# print "\tDuration:", duration/60, "h", duration%60, "min"
# print "\tCarriers:", carrier_names
# print "\t# of stops: ", leg['StopsCount']
# stop_ids = leg.get('StopIds', [])
# stop_ids_string = ", ".join([ get_station_name(stop_id, stations) for stop_id in stop_ids ])
# print "\t\t", stop_ids_string
# row = str(leg_price) + "\t" + depart_time + "\t" + arrive_time + "\t" + str(duration) + "\t" + carrier_names + "\t" + stop_ids_string
# f.write(row + "\n")
# Functions
def get_codes(input_from, input_to):
"""Returns place id codes and names, e. g. ("EDI", "KUN", "Edinburgh", "Kaunas")"""
try:
i = 0
autosuggest_json_from = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_from))
if len(autosuggest_json_from[0]['PlaceId']) == 4:
# for cases where the first result is abstract (e. g. Glasgow (Any))
i = 1
place_id_from = autosuggest_json_from[i]['PlaceId']
name_from = autosuggest_json_from[i]['PlaceName']
j = 0
autosuggest_json_to = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_to))
if len(autosuggest_json_to[0]['PlaceId']) == 4:
j = 1
place_id_to = autosuggest_json_to[j]['PlaceId']
name_to = autosuggest_json_to[j]['PlaceName']
except IndexError:
print "No code found for:"
print input_from, "AND/OR", input_to
sys.exit(3)
return (place_id_from, place_id_to, name_from, name_to)
def get_session_key(place_id_from, place_id_to, date):
"""Returns a session key for a given query, on failure exits
NB. distant or past dates cause failures"""
response = urllib2.urlopen(SKYSCANNER_URL + place_id_from + "/" + place_id_to + "/" + date)
html = response.read()
regex = ur'"SessionKey":"(.+?)"'
# e. g. "SessionKey":"a00765d2-7a39-404b-86c0-e8d79cc5f7e3"
try:
session_key = re.findall(regex, html)[0]
except IndexError:
print "No data found for this date"
sys.exit(4)
return session_key
def show_suggestions(from_id, to_id, date):
"""Prints alternative departure airports"""
suggest_places_string = ""
suggestions_json = json.load(urllib2.urlopen(SUGGESTIONS_URL + "&fp=" + from_id + "&tp=" + to_id + "&dd=20" + date))
try:
suggest_places = suggestions_json['rs']
for place in suggest_places:
if place['fpid'] != from_id:
suggest_places_string += place['fan'] + ", "
if suggest_places_string[:-2] != "":
print "Try airports: ", suggest_places_string[:-2]
except (KeyError, IndexError):
print "Suggestions unavailable"
def get_station_name(station_id, stations):
"""Returns the name of the (intermediate) station,
e. g. "London Heathrow" """
for station in stations:
if station['Id'] == station_id:
return station['Name']
return ""
def get_leg_price(pricing, quotes):
"""Returns lowest leg price"""
prices = []
for price in pricing:
prices.append(get_quote_price(price['QuoteIds'], quotes))
return min(prices)
def get_quote_price(quote_ids, quotes):
"""Finds quotes by quote id and returns their price sum"""
price = 0;
for quote_id in quote_ids:
for quote in quotes:
if quote['Id'] == quote_id:
price += quote['Price']
return price
def get_carrier_names(carrier_ids, carriers):
"""Returns a tuple (list, string) with carrier names
e.g. (["airBaltic", "KLM"], "airBaltic, KLM")"""
carrier_names = []
carrier_names_string = ""
for carrier_id in carrier_ids:
carrierName = get_carrier_name(carrier_id, carriers)
carrier_names.append(carrierName)
carrier_names_string += carrierName + ", "
return (carrier_names, carrier_names_string[:-2])
def get_carrier_name(carrier_id, carriers):
"""Returns carrier name by id"""
for carrier in carriers:
if carrier['Id'] == carrier_id:
return carrier['Name']
return ""
if __name__ == "__main__":
if len(sys.argv) == 4:
main(sys.argv[1:])
else:
print "Enter arguments in this way:\n"
print "python skyscanner.py {departure airport} {arrival airport} {departure date (yy/mm/dd)}\n\n"
print "e. g. python skyscanner.py \"glasgow prestwick\" kaunas 13/07/21\n"
sys.exit()
These endpoints are not supported as external APIs, they are used by the site itself. They can/do change without notice and some require a level of "state" to operate.
However, we do have an API that would allow you access to the same auto-suggest / flight data that the site is driven from. More details can be found at http://business.skyscanner.net

Categories