Skyscanner API CSV file - python

I am new to python and I am trying to run this code,which I found on github ,but it does not work, is something wrong with the code?Or is it my fault? I am always getting the
"no data found"
message.
skyscanner.py :
#!/usr/bin/python
"""The script obtains prices and flight information for a given
input (departure, arrival airports and date), outputs this
data to the console and writes it to a csv file."""
__author__ = "Ingvaras Merkys"
import json
import urllib2
import re
import sys
import time
# Global vars:
AUTOSUGGEST_URL = "http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/"
# e. g. http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/edinb
SKYSCANNER_URL = "http://www.skyscanner.net/flights/"
# e. g. http://www.skyscanner.net/flights/vno/edi/130419
ROUTEDATA_URL = "http://www.skyscanner.net/dataservices/routedate/v2.0/"
# e. g. http://www.skyscanner.net/dataservices/routedate/v2.0/a00765d2-7a39-404b-86c0-e8d79cc5f7e3
SUGGESTIONS_URL = "http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP"
# e. g. http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP&fp=KAUN&tp=EDIN&dd=20130410
def main(argv):
input_from = argv[0].replace(" ", "%20").replace("\"", "")
input_to = argv[1].replace(" ", "%20").replace("\"", "")
date = argv[2].replace("/", "")
place_id_from, place_id_to, name_from, name_to = get_codes(input_from, input_to)
# testjuly = map (lambda x: len(x) == 1 and '13070'+x or '1307'+x, [ str(i+1) for i in range(31) ])
# for date in testjuly:
session_key = get_session_key(place_id_from, place_id_to, date)
for attempt in range(3):
# if script is run repeatedly sometimes an empty html is returned
try:
response = urllib2.urlopen(ROUTEDATA_URL + session_key)
html = response.read()
data = json.loads(html)
except ValueError:
f = open("error.log", "a")
f.write(ROUTEDATA_URL + session_key + "\n")
f.write("Returned:\n" + html + "\n")
time.sleep(1)
else:
break
else:
sys.exit(1)
query = data['Query']
if data['Stats']['OutboundLegStats']['TotalCount'] == 0:
print "No flights found from", name_from, "to", name_to
return 0
#show_suggestions(query['OriginPlace'], query['DestinationPlace'], date)
#sys.exit(2)
stations = data['Stations']
quotes = data['Quotes']
carriers = data['Carriers']
cheapest_price = data['Stats']['ItineraryStats']['Total']['CheapestPrice']
print "Results for flight from", name_from, "to", name_to
print "Outbound date:", re.split('T', query['OutboundDate'])[0]
print "Cheapest Journey:", cheapest_price, "RMB"
return cheapest_price
# f = open(place_id_from + '-' + place_id_to + '-' + date + '.csv','w')
# for leg in data['OutboundItineraryLegs']:
# leg_price = get_leg_price(leg['PricingOptions'], quotes)
# depart_time = leg['DepartureDateTime'].replace("T", " ")
# arrive_time = leg['ArrivalDateTime'].replace("T", " ")
# duration = leg['Duration']
# carrier_names = get_carrier_names(leg['MarketingCarrierIds'], carriers)[1]
# print "\n\tPrice:", leg_price, "GBP"
# print "\tDeparting:", depart_time
# print "\tArriving:", arrive_time
# print "\tDuration:", duration/60, "h", duration%60, "min"
# print "\tCarriers:", carrier_names
# print "\t# of stops: ", leg['StopsCount']
# stop_ids = leg.get('StopIds', [])
# stop_ids_string = ", ".join([ get_station_name(stop_id, stations) for stop_id in stop_ids ])
# print "\t\t", stop_ids_string
# row = str(leg_price) + "\t" + depart_time + "\t" + arrive_time + "\t" + str(duration) + "\t" + carrier_names + "\t" + stop_ids_string
# f.write(row + "\n")
# Functions
def get_codes(input_from, input_to):
"""Returns place id codes and names, e. g. ("EDI", "KUN", "Edinburgh", "Kaunas")"""
try:
i = 0
autosuggest_json_from = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_from))
if len(autosuggest_json_from[0]['PlaceId']) == 4:
# for cases where the first result is abstract (e. g. Glasgow (Any))
i = 1
place_id_from = autosuggest_json_from[i]['PlaceId']
name_from = autosuggest_json_from[i]['PlaceName']
j = 0
autosuggest_json_to = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_to))
if len(autosuggest_json_to[0]['PlaceId']) == 4:
j = 1
place_id_to = autosuggest_json_to[j]['PlaceId']
name_to = autosuggest_json_to[j]['PlaceName']
except IndexError:
print "No code found for:"
print input_from, "AND/OR", input_to
sys.exit(3)
return (place_id_from, place_id_to, name_from, name_to)
def get_session_key(place_id_from, place_id_to, date):
"""Returns a session key for a given query, on failure exits
NB. distant or past dates cause failures"""
response = urllib2.urlopen(SKYSCANNER_URL + place_id_from + "/" + place_id_to + "/" + date)
html = response.read()
regex = ur'"SessionKey":"(.+?)"'
# e. g. "SessionKey":"a00765d2-7a39-404b-86c0-e8d79cc5f7e3"
try:
session_key = re.findall(regex, html)[0]
except IndexError:
print "No data found for this date"
sys.exit(4)
return session_key
def show_suggestions(from_id, to_id, date):
"""Prints alternative departure airports"""
suggest_places_string = ""
suggestions_json = json.load(urllib2.urlopen(SUGGESTIONS_URL + "&fp=" + from_id + "&tp=" + to_id + "&dd=20" + date))
try:
suggest_places = suggestions_json['rs']
for place in suggest_places:
if place['fpid'] != from_id:
suggest_places_string += place['fan'] + ", "
if suggest_places_string[:-2] != "":
print "Try airports: ", suggest_places_string[:-2]
except (KeyError, IndexError):
print "Suggestions unavailable"
def get_station_name(station_id, stations):
"""Returns the name of the (intermediate) station,
e. g. "London Heathrow" """
for station in stations:
if station['Id'] == station_id:
return station['Name']
return ""
def get_leg_price(pricing, quotes):
"""Returns lowest leg price"""
prices = []
for price in pricing:
prices.append(get_quote_price(price['QuoteIds'], quotes))
return min(prices)
def get_quote_price(quote_ids, quotes):
"""Finds quotes by quote id and returns their price sum"""
price = 0;
for quote_id in quote_ids:
for quote in quotes:
if quote['Id'] == quote_id:
price += quote['Price']
return price
def get_carrier_names(carrier_ids, carriers):
"""Returns a tuple (list, string) with carrier names
e.g. (["airBaltic", "KLM"], "airBaltic, KLM")"""
carrier_names = []
carrier_names_string = ""
for carrier_id in carrier_ids:
carrierName = get_carrier_name(carrier_id, carriers)
carrier_names.append(carrierName)
carrier_names_string += carrierName + ", "
return (carrier_names, carrier_names_string[:-2])
def get_carrier_name(carrier_id, carriers):
"""Returns carrier name by id"""
for carrier in carriers:
if carrier['Id'] == carrier_id:
return carrier['Name']
return ""
if __name__ == "__main__":
if len(sys.argv) == 4:
main(sys.argv[1:])
else:
print "Enter arguments in this way:\n"
print "python skyscanner.py {departure airport} {arrival airport} {departure date (yy/mm/dd)}\n\n"
print "e. g. python skyscanner.py \"glasgow prestwick\" kaunas 13/07/21\n"
sys.exit()

These endpoints are not supported as external APIs, they are used by the site itself. They can/do change without notice and some require a level of "state" to operate.
However, we do have an API that would allow you access to the same auto-suggest / flight data that the site is driven from. More details can be found at http://business.skyscanner.net

Related

How to make input correspond with a collection of values, and make a decision based on the input?

The goal here is to write a function called displayPerson​ that takes in an integer called id as its first parameter, and a dictionary as its second parameter, called personData.
The purpose of the function is to print the name and birthday of a given user identified by the input id. If there is no entry with the given id, then print “No user found with that id” instead.
The format should be “Person # id is name with a birthday of
date”, where id is the id # inputted, and name is the name of the person (from the file) and date is the birthday of the user (formatted as YYYY-­MM-­DD.
This is what I have so far
import argparse
import urllib.request
import datetime
import logging
#url https://s3.amazonaws.com/cuny-is211-spring2015/birthdays100.csv
#url = input('Insert URL here: ')
url = "https://s3.amazonaws.com/cuny-is211-spring2015/birthdays100.csv"
def downloadData(url):
response = urllib.request.urlopen(url)
data = response.read().decode('utf-8')
#print(data)
return data
def processData(file_content):
dictionary = {}
#print(file_content)
# [
# "1,Charles Paige,06/01/1963",
# "2,Andrew Bell,29/03/1972",
# ...
# "99,Alan Wilson,03/04/1960",
# "100,Austin Burgess,04/06/1979"
# ]
count = 0
data_items = file_content.splitlines()
logging.basicConfig(filename='error.log', filemode='w', level=logging.ERROR)
for line in data_items[1:]:
data_pieces = line
data_pieces = data_pieces.split(',')
# ["1", "Charles Paige", "06/01/1963"]
count = count + 1
#print(data_pieces)
# dictionary[data_pieces[0]] = (data_pieces[1]), datetime.datetime.strptime((data_pieces[2]), '%d/%m/%Y')
try:
dictionary[data_pieces[0]] = (data_pieces[1]), datetime.datetime.strptime((data_pieces[2]), '%d/%m/%Y')
except ValueError:
logging.error("Error processing line #: " + str(count) + " for ID #: " + str(data_pieces[0]))
return dictionary
def displayPerson(id, personData):
#print(personData)
#return
try:
id = input("ID:")
print("Person #" + id + "is" + dictionary[data_pieces[1]] + "with a birthday of" + datetime.datetime.strptime((data_pieces[2]), '%Y-%m-%d'))
except:
print("No user ID found")
def main():
downloadData(url)
file_content = downloadData(url)
values = processData(file_content)
#print(values)
displayPerson(id, values)
When I input an ID number, it raises the except every time. I'm not sure how to format the code to correspond the ID number with the values from the dictionary I created in processData.
Your code all seems to work OK except for the print line in your displayPerson function. Replace that line with this, and I think you'll get the behavior you're looking for:
print("Person #" + id + " is " + personData[id][0] + " with a birthday of " + personData[id][1].strftime('%d/%m/%Y'))
When I enter a value of "1", I get the following output:
Person #1 is Charles Paige with a birthday of 06/01/1963

python error could not convert string to float

We are getting this below error while migrating the data from slack channel to a file, when we execute the script for fetching the data for one day, it executing perfectly.
But when we execute the script for 2 months data, it gives 10 days data in separate file but getting throwing an error on particular date. It might be possible that the source data on slack is bit different from expected
Traceback (most recent call last):
File "C:\Users\Slack SCript\script.py", line 218, in <module>
main()
File "C:\Users\Slack SCript\script.py", line 201, in main
parse(message['text'])
File "C:\Users\Slack SCript\script.py", line 114, in parse
size = float(elements[1])
ValueError: could not convert string to float:
As per the source data we found that some value is 0 maybe the error we got because of this value. is there any way to skip or continue future.
from slackclient import SlackClient
import time
import os
import sys
import datetime
from dateutil.relativedelta import relativedelta
servers = ("fd2a", "ff1a", "hh3b", "kw1a", "kw1b", "lo8a", "os5a", "os5b", "sg2a", "sg2b", 'sy1a', 'va1a', 'va1b')
types = ("", "nfs", "cluster")
currser = "d"
currtype = ""
used = {}
total = {}
available = {}
ts = 0
dir_name = "data"
def savedata(dir_path, filename, data):
f = open(dir_path + filename, "w") # opens file with name of "test.txt"
print(dir_path + filename)
f.write(data)
f.close()
def reset_data():
print("datareset")
for i in range(0, len(servers)):
for j in range(0, len(types)):
used[servers[i] + types[j]] = 0
total[servers[i] + types[j]] = 0
available[servers[i] + types[j]] = 0
def write_data(ts):
datastr = ''
global used
global total
ttotaltotalsum = 0
for j in range(0, len(types)):
datastr += types[j] + '\n'
datastr += "Name\t" + "Region\t" + "total(TB)\t" + "used(TB)\t" + "available(TB)\t" + "Used(%)\n"
for i in range(0, len(servers)):
tused = used[servers[i] + types[j]]
ttotal = total[servers[i] + types[j]]
ttotaltotalsum += ttotal
if (ttotal != 0):
datastr += (
servers[i][0:len(servers[i]) - 1] + "\t\t" +
servers[i][len(servers[i]) - 1] + "\t\t" +
"{:.1f}".format(ttotal / 1024) + " \t\t" +
"{:.1f}".format(tused / 1024) + " \t\t" +
"{:.1f}".format((ttotal - tused) / 1024) +"\t\t"+
"{:.1f}".format(tused / ttotal * 100) + " \t\t" +
" \n")
print("..")
if (ttotaltotalsum > 0):
hour= datetime.datetime.fromtimestamp(int(ts)).hour
day= datetime.datetime.fromtimestamp(int(ts)).day
month= datetime.datetime.fromtimestamp(int(ts)).month
year=datetime.datetime.fromtimestamp(int(ts)).year
if hour < 12:
savedata("data/", "Storage-Update-M-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
else:
savedata("data/", "Storage-Update-E-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
def parse(text):
global currser
global currtype
global used
global total
global available
global ts
content = text.split("\n")
for line in content:
line = line[:len(line)]
if line.__contains__("Netapp Cluster"):
for server in servers:
if line.__contains__(server):
currser = server
for type in types:
if line.__contains__(type):
currtype = type
# print(line)
if line.__contains__("Total available capacity"):
# print(line)
# print ("contains","Total available capacity------")
elements = line.split(":")
# print (elements)
size = float(elements[1])
# print(size)
total[currser + currtype] += size
# print(size,"TOTAL capacity",total)
elif line.__contains__("size provisioned"):
# print(line)
# print("contains", "Total LUN size provisioned------- ")
elements = line.split(":")
# print(elements)
size = float(elements[1])
# print(size)
used[currser + currtype] += size
# print(size, "Used", used)
# print( currser)
# print( currtype)
# print( used)
# print(total)
# print(available)
return (used, total)
def make_dir(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def main():
slack_token = ""
channel_name = ''
time_on_last_message = time.time()
channel_id = ""
ts = 0.000
threshmins = 20
channels_call = SlackClient(slack_token).api_call("channels.list")
print(channels_call)
print(channels_call.keys())
for channel in channels_call["channels"]:
if channel["name"] == channel_name:
channel_id = channel["id"]
print(channel)
make_dir(dir_name)
print(channel_id)
reset_data()
time_since_last_update = time.time() - time_on_last_message
print("Waiting for new data....", time.time() - time_on_last_message)
if time_since_last_update > threshmins * 60:
write_data(ts)
reset_data()
sc = SlackClient(slack_token)
date_after_month = datetime.datetime.now() + relativedelta(months=-6)
date_after_month=date_after_month.timestamp()
while True:
breakflag=0
data = sc.api_call(
"channels.history",
channel=channel_id,
oldest=date_after_month,
count=1000,
)
if (data['ok'] == True):
messages = data['messages']
for message in reversed(messages):
# print(message['ts'])
if float(message['ts']) > ts:
print("difference=", float(message['ts']) - ts)
if float(message['ts']) - ts > (threshmins * 60):
print("greater diffrrece>reset................")
write_data(ts)
print(ts)
reset_data()
time_on_last_message = time.time()
ts = float(message['ts'])
parse(message['text'])
if (data["has_more"] == True):
print("has more")
date_after_month=message['ts']
else:
breakflag=1
else:
print("No data returned or error")
time.sleep(1) # in Seconds
if(breakflag==1):
break
main()
Based on the error message, elements[1] is empty. And Python cannot convert an empty string to float:
>>> float("")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: could not convert string to float:
The elements[1] element is a string that can't be parsed to a float. The easiest way would be to attach a debugger and investigate what is being parsed. Then change your code to parse it better.
The second easiest way would be to binary search for the record that makes it fail and fix your code to parse it better.
The totally absolutely preferred way would be to, when you found what the case was that your code didn't support, you would write a test that proves that that case was added:
def test_parse_xyz():
assert [("blablabla", None)] == parse(["blablabla: -certainly_not_a_float"])
These tests can automatically be detected by e.g. pytest:
$ pytest parser.py

How to retrieve column values by column name in python whit cx_Oracle

I'm programming a script that connects to an Oracle database and get the results into a log file. I want to get a output like this:
FEC_INCLUSION = 2005-08-31 11:43:48,DEBITO_PENDIENTE = None,CAN_CUOTAS = 1.75e-05,COD_CUENTA = 67084,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 1,FEC_MODIFICACION = 10/04/2012 09:45:22,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = CTAPELA,SAL_TOTAL = 0.15,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
FEC_INCLUSION = 2005-08-31 11:43:49,DEBITO_PENDIENTE = None,CAN_CUOTAS = 0.0,COD_CUENTA = 67086,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 9,FEC_MODIFICACION = 25/02/2011 04:38:52,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = OPEJAMO,SAL_TOTAL = 0.0,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
I created a dictionary with the query results:
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
Then I created this function which finally save the results into my log:
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
This is the main function:
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
data = cursor.fetchall()
WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
But I realized that it works if I use a query that fetch few records, however if I try to fetch many records my script never ends.
This is my output when I executed a query with 5000 records. As you can see it takes too long.
jballesteros#SplunkPorvenir FO_TIPSALDOS_X_CUENTA]$ python db_execution.py
Starting connection: 5636
GetLastCheckpoint function took 0.073 ms
GetLastCheckpoint function took 0.025 ms
ReadQuery function took 0.084 ms
File does not exist, writing new log file
DictFactory function took 23.050 ms
ReadCheckpointName function took 0.079 ms
WriteCheckpoint function took 0.204 ms
WriteLog function took 45112.133 ms
GetInfo function took 46193.033 ms
I'm pretty sure you know a much better way to do what I am trying to do.
This is the complete code:
#!/usr/bin/env python
# encoding: utf-8
import re
import sys
try:
import cx_Oracle
except:
print "Error: Oracle module required to run this plugin."
sys.exit(0)
import datetime
import re
import commands
import os
from optparse import OptionParser
import csv
import time
#################################
#### Database Variables ####
#################################
Config = {
"host" : "",
"user" : "",
"password" : "",
"instance" : "",
"port" : "",
}
Query = {
"sql" : "",
"checkpoint_datetype" : "",
"checkpoint_name" : "",
}
dir = '/home/jballesteros/PENS2000/FO_TIPSALDOS_X_CUENTA/'
connection_dir = '/home/jballesteros/PENS2000/Connection'
checkpoint_file = dir + 'checkpoint.conf'
log_file = '/var/log/Pens2000/FO_TIPSALDOS_X_CUENTA.csv'
internal_log = '/var/log/Pens2000/internal.log'
query = dir + 'query'
sys.path.append(os.path.abspath(connection_dir))
from db_connect_pool import *
def Timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print "%s function took %0.3f ms" % (f.func_name,(time2- time1)*1000.0)
return ret
return wrap
#Timing
def InternalLogWriter(message):
now = datetime.datetime.now()
log = open(internal_log, 'a')
log.write("%s ==> %s" % (now.strftime("%Y-%m-%d %H:%M:%S"),message))
log.close()
return
#Timing
def GetLastCheckpoint():
global cur_checkpoint
conf = open(checkpoint_file, 'r')
cur_checkpoint = conf.readline()
cur_checkpoint = cur_checkpoint.rstrip('\n')
cur_checkpoint = cur_checkpoint.rstrip('\r')
conf.close()
#Timing
def ReadQuery():
global cur_checkpoint
GetLastCheckpoint()
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
sql = Query["sql"]
checkpoint_datetype = Query["checkpoint_datetype"]
checkpoint_name = Query["checkpoint_name"]
if (checkpoint_datetype == "DATETIME"):
sql = sql + " AND " + checkpoint_name + " >= " + "TO_DATE('%s','YYYY-MM-DD HH24:MI:SS') ORDER BY %s" % (cur_checkpoint,checkpoint_name)
if (checkpoint_datetype == "NUMBER"):
sql = sql + " AND " + checkpoint_name + " > " + "%s ORDER BY %s" % (cur_checkpoint,checkpoint_name)
qr.close()
return str(sql)
#Timing
def ReadCheckpointName():
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
checkpoint_name = Query["checkpoint_name"]
return str(checkpoint_name)
#Timing
def LocateCheckPoint(description):
description
checkpoint_name = ReadCheckpointName()
#print checkpoint_name
#print description
startcounter = 0
finalcounter = 0
flag = 0
for d in description:
prog = re.compile(checkpoint_name)
result = prog.match(d[0])
startcounter = startcounter + 1
if result:
finalcounter = startcounter - 1
counterstr = str(finalcounter)
print "Checkpoint found in the array position number: " + counterstr
flag = 1
if (flag == 0):
print "Checkpoint did not found"
return finalcounter
#Timing
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
#Timing
def WriteCsvLog(log_file,header,data):
checkpoint_index = LocateCheckPoint(header)
file_exists = os.path.isfile(log_file)
with open(log_file,'ab') as csv_file:
headers = [i[0] for i in header]
csv_writer = csv.writer(csv_file,delimiter='|')
if not file_exists:
print "File does not exist, writing new CSV file"
csv_writer.writerow(headers) # Writing headers once
for d in data:
csv_writer.writerow(d)
cur_checkpoint = d[checkpoint_index]
cur_checkpoint = str(cur_checkpoint)
WriteCheckpoint(cur_checkpoint,checkpoint_file)
csv_file.close()
#Timing
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
#prin #string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
#Timing
def WriteCheckpoint(cur_checkpoint,conf_file):
conf = open(conf_file,'w')
conf.write(cur_checkpoint)
conf.close()
#Timing
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
#data = cursor.fetchall()
#WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
def __main__():
parser = OptionParser()
parser.add_option("-c","--change- password",dest="pass_to_change",help="Change the password for database connection",metavar="1")
(options, args) = parser.parse_args()
if (options.pass_to_change):
UpdatePassword()
else:
GetInfo()
__main__()
This is a query sample:
SELECT COD_EMPRESA, COD_TIPSALDO, COD_INVERSION, COD_CUENTA, COD_OBJETIVO, CAN_CUOTAS, SAL_TOTAL, INT_TOTAL, SAL_RESERVA, APORTE_PROM, SAL_PROM, COSTO_PROM, SAL_TOT_ANTERIOR, FEC_ULT_CALCULO, INCLUIDO_POR, FEC_INCLUSION, MODIFICADO_POR, TO_CHAR(FEC_MODIFICACION,'DD/MM/YYYY HH24:MI:SS') AS FEC_MODIFICACION, CUOTA_COMISION, MONTO_COMISION, SAL_INFORMATIVO, CREDITO_PENDIENTE, DEBITO_PENDIENTE, CAN_CUOTAS_ANTERIOR FROM FO.FO_TIPSALDOS_X_CUENTA WHERE ROWNUM <=100000 AND FEC_INCLUSION >= TO_DATE('2005-08-31 11:43:49','YYYY-MM-DD HH24:MI:SS') ORDER BY FEC_INCLUSION
PS: I've really been searching in google and this forum about my question but I haven't found anything similar.

The reading loop of QXmlReader for PyQt5 does not return the expected data

I'd like to make an QAbstractItemModel that gets its data from a series of Xml files, all situated in the same directory. Since PyQt5 no longer supports QDomDocument (or atleast i couldn't find a way to make it work), i've had to resort to a QXmlStreamReader. I'm putting the data itself in a giant python dictionary (well... not exactly giant by computer science standards) that contains other dictionaries under various keys to create a tree-like structure.
this is my code so far:
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
print("--------Beginning parsing----------")
print("Reading file: "+str(filePath))
while not parser.atEnd():
parser.readNext()
token = parser.tokenType()
print("Reading tag: " + str(parser.name()))
print("Tag type is: " + str(token))
if token == parser.StartDocument:
self.data["XML Version"] = str(parser.documentVersion())
self.data["XML Encoding"] = str(parser.documentEncoding())
if token == parser.StartElement:
tokenName = parser.name()
if parser.tokenType() == parser.Characters:
tokenText = parser.text()
print("This tag has a text value: " + str(tokenText))
print("current data: " + str(self.data))
if token == parser.EndElement:
if tokenText != None:
self.data[tokenName] = tokenText
else:
self.data[tokenName] = {}
tokenName = None
tokenText = None
else:
print(self.tr("xml file did not open properly"))
print(self.data)
While this code doesn't crash or anything, it does have a few issues that i have no idea why they're happening or how to fix:
1.the tokenName never changes from None for some reason - solved
2.the structure of the self.data dictionary does not turn into a tree-like one, no idea why :|
example data:
<?xml version="1.0" encoding="UTF-8"?>
<tag>
<description>This is a text</description>
<types>
<typesAllowed></typesAllowed>
<typesEnabled></typesEnabled>
</types>
</tag>
yields the final result:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'typesAllowed': '\n\t\t', None: '\n', 'typesEnabled': '\n\t\t', 'description': 'This is a text'}
instead of the wanted:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'tag': {'description': 'this is a text', typesAllowed': '\n\t\t', 'typesEnabled': '\n\t\t'}}
I know these issues are most likely a result of my poor understanding of how a StreamReader works, so any and all tips would be welcome :)
edit 1:
the tokenName change was a silly positioning error, silly me. the code reflects the fix.
edit 2:
added an example and example output
This question is now solved; I took a different approach to the problem.
I basically took a list into which i appended tuples (name, {}) if the StartElement token had the attribute parseAs == "element" and put an evaluated string (parseText function) into the last tuple's dictionary. When it meets an EndElement token, it finds the tuple with name == tokenName, which is the name of the current token, puts it into the previous tuple's dictionary as an entry with key name.
There's a few more details as to how it works, but I'd probably just overly complicate my explanation if I included them (how it knows when to submit currData to self.data etc.)
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
self.parsingLog = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
currData = []
haveStartToken = False
print(self.tr("--------Beginning parsing--------"))
print(self.tr("Reading file: "+str(filePath)))
print(self.tr("---------------------------------"))
while not parser.atEnd():
if not parser.hasError():
parser.readNext()
token = parser.tokenType()
print(self.tr("--------------------"))
print(self.tr("Token type: " + str(self.printTokenType(token))))
if token == parser.StartElement:
tokenName = parser.name()
attributes = parser.attributes()
parseAs = attributes.value("parseAs")
print(self.tr("Reading StartElement: " + str(tokenName)))
print(self.tr("parseAs: " + str(parseAs)))
if parseAs == "text":
textValue = self.parseText(parser.readElementText())
print(self.tr("Text Value: " + str(textValue)))
if len(currData) != 0:
currData[len(currData)-1][1][tokenName] = textValue
else:
print(self.tr("*******Terminating application*******"))
print(self.tr("Reason: currData is empty"))
print(self.tr("*******Terminating application*******"))
sys.exit()
elif parseAs == "element":
currData.append((tokenName, {}))
else:
print(self.tr("******WARNING******"))
print(self.tr("parseAs attribute is not given correctly"))
print(self.tr("******WARNING******"))
print(self.tr("--------------------"))
elif token == parser.EndElement:
tokenName = parser.name()
print(self.tr("Reading EndElement: " + str(tokenName)))
print(self.tr("currData before: " + str(currData)))
if not haveStartToken:
startToken = currData[0][0]
haveStartToken = True
for i in currData:
if i[0] == tokenName:
print(self.tr("Closing token: " + str(tokenName)))
if i[0] != startToken:
currData[len(currData)-2][1][tokenName] = currData[len(currData)-1][1]
del currData[len(currData)-1]
print(self.tr("currData after: " + str(currData)))
print(self.tr("--------------------"))
elif i[0] == startToken:
print(self.tr("This is the final token, writing to self.data"), end = "")
self.data[startToken] = currData[0][1]
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
print(self.tr("done."))
print(self.tr("--------------------"))
elif token == parser.Characters:
print(self.tr("Characters value: " + str(parser.text())))
print(self.tr("--------------------"))
elif token == parser.StartDocument:
self.parsingLog["File: "+str(filePath)] = {}
self.parsingLog["File: "+str(filePath)]["XML Version"] = str(parser.documentVersion())
self.parsingLog["File: "+str(filePath)]["XML Encoding"] = str(parser.documentEncoding())
print(self.tr("File Version: " + str(self.parsingLog["File: "+str(filePath)]["XML Version"])))
print(self.tr("File Encoding: " + str(self.parsingLog["File: "+str(filePath)]["XML Encoding"])))
elif token == parser.EndDocument:
print(self.tr("Cleaning up"), end = "")
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
time.sleep(0.1)
print(self.tr("done."))
print(self.tr("self.data: " + str(self.data)))
print(self.tr("types of data: yesNo (should be str) - " +
str(type(self.data["building"]["specialSlot"]["yesNo"])) +
" - id - should be int - " + str(type(self.data["building"]["specialSlot"]["id"])) +
" - isItFloat - should be float - " + str(type(self.data["building"]["specialSlot"]["isItFloat"]))))
print(self.tr("--------------------"))
else:
print(self.tr("XML file is not well-formatted"))
else:
print(self.tr("xml file did not open properly"))
def parseText(self, text):
if isinstance(text, str):
if text == "":
return str(text)
for i in text:
if i not in ("0123456789."):
return str(text)
for j in text:
if j not in ("0123456789"):
return float(text)
return int(text)
else:
return ValueError
def printTokenType(self, token):
if token == QtCore.QXmlStreamReader.NoToken:
return "NoToken"
elif token == 1:
return "Invalid"
elif token == QtCore.QXmlStreamReader.StartDocument:
return "StartDocument"
elif token == QtCore.QXmlStreamReader.EndDocument:
return "EndDocument"
elif token == QtCore.QXmlStreamReader.StartElement:
return "StartElement"
elif token == QtCore.QXmlStreamReader.EndElement:
return "EndElement"
elif token == QtCore.QXmlStreamReader.Characters:
return "Characters"
elif token == QtCore.QXmlStreamReader.Comment:
return "Comment"
elif token == QtCore.QXmlStreamReader.DTD:
return "DTD"
elif token == QtCore.QXmlStreamReader.EntityReference:
return "EntityReference"
elif token == QtCore.QXmlStreamReader.ProcessingInstruction:
return "ProcessingInstruction"

Working with dictionaries

I have dictionary that takes data from a file and puts it in list. I want to make a search engine that when I type name or quantity or price of a component it will find all with that name and print info that it holds (price, quantity, category).
Input
I just can't make my script read info from lines in the file. The file's text looks like:
AMD A4-3300 2.5GHz 2-Core Fusion APU Box|5.179,00 din|58|opis|Procesor
AMD Athlon II X2 340 3.2GHz Box|4.299,00 din|8|opis|Procesor
INTEL Celeron G465 1.9GHz Box|3.339,00 din|46|opis|Procesor
INTEL Celeron Dual Core G550 2.6GHz Box|1.439,00 din|13|opis|Procesor
Output
Here is my code which should be a search engine for my components, I just don't know how I can take form list data and target that data full info for example I type key word like AMD and seach engine print all AMD components that have AMD in their name or price I put price range and I got all prices in that range. I tried some things but it wont work.Sorry for long time to respond.I translated my code, there may be some lines left out but I hope you get the picture.
def option_p_components():
option = 0
#component = []
components = []
while option == 0 :
option_comp = option_p_components_str()
option_k = int(raw_input("Chose option : ")
print "" \
""
if option_k != 1 and option_k != 2 :
error = "!!!Error!!!"
error_p = " you typed wrong command please try again ."
print "-" * 80
print error.center(80)
print error_p.center(80)
print "-" * 80
option = 0
if option_k == 1 :
option_p_d = 0
print "Components search "
print"-" * 80
cu = temp_comp(components)
print cu
print "X)Working with components(editing, deleting )"
print"-" * 80
print "1)Change components "
print "2)Editing components"
print "3)Delating componetns"
print "4)Components search "
print "5)Back"
print"-" * 80
option_p_d = int(raw_input("Chose option :"))
if Option_p_d == 2 :
option_d = 0
for I in range(5):
u_component_name = raw_input("Unesite naziv komponente :")
u_component_price= raw_input("Unestie cenu komponente:")
u_component_quantity = raw_input("Unesite kolicinu komponente :")
u_component_opis = raw_input("Unesite opis komponente :")
u_component_category = raw_input("Unesite kategoriju komponente:")
component = {"name_compo":u_komponenta_ime,
"price":u_komponenta_cena,
"quantity":u_komponenta_kolicina,
"opis":u_komponenta_opis,
"category":u_komponenta_kategorija}
upis_komponente = saving_components(component)
components.append(saving_components)
print"-" * 80
print "1)New component"
print "2)Back"
print"-" * 80
option_d = int(raw_input("Odaberite opciju :"))
if option_d == 1 :
option_k = 0
elif option_d == 2 :
option_p_komponenti()
elif option_k == 2 :
print "Back"
def saving_components(component):
final_komponenta = component["name_compo"] + "|" + component["price"] + "|" + componenta["quantity"] + "|"\
+ component["opis"] + "|" + component["category"]
file = open("Data/component.txt", "a")
file.write(final_component)
file.close
def reading_component(component):
file = open("Data/component.txt", "r")
for line in file :
name_comp, price, quantity, opis, category = line.split("|")
komponenta = {"name_compo": name_comp,
"price": price,
"quantity": quantity,
"opis" : opis,
"category": category}
# ovo izvlaci samo pojedinacne vrednosti iz recnika
compon_info = "Name: " + component["name_compo"] + "\n" + "price: " + component["Price"]+"\n" +\
"Quantity:" + component["quantity"] + "\n" + "Opis: " + komponenta["opis"] + \
"\n" + "category: " + component["category"] + "\n"
#print compon_info
component.append(component)
#print sortiranje(kompon_info)
#print sorted([compon_info])
#print compon_info.sort()
#Vrti koliko ima u fajlu for ...a to je 7
file.close()
return component
def temp_comp(components):
pretraga_po_opisu(komponente)
def pretraga_po_opisu(komponente):
kolicina = str(raw_input("Unesite kolicinu:"))
for komponenta in komponente:
if komponenta["kolicina"] == kolicina:
print komponenta["kolicina"]
return None
def pera(komponente, cena):
ulaz = input("Unesi")
list = komponente.pera("cena",cena)
All you need is csv.DictReader() together with a sequence of key names for each column:
with open(inputfilename, 'rb') as fileobj:
reader = csv.DictReader(fileobj,
('name_compon', 'price', 'quantity', 'something_else', 'category'),
delimiter='|')
for row in reader:
print row
where row is the dictionary you wanted.
If you want to look into using zip, you could always use it here:
component_dicts = []
components = ("name_compon", "price", "quanity", "category")
with open('/path/to/data') as f:
for line in f.readlines():
components_dicts.append(dict(zip(components, line.split("|")[:4])))
#slicing the first four elements because you never say which 4 out of 5 you wanted.
for c in components_dict:
print c
Here the line.split("|") method is creating a list of str's, dividing the string being read wherever the "|" character is found.
Then zip will return a list of tuples which you then feed into a dict:
# This is what it would look like after you zip the components tuple and the line.split("|") data
[(name_compon, 'AMD A4-3300 2.5GHz 2-Core Fusion APU Box'), (price, '5.179,00 din'), (quanity, 58), (type, opis)]

Categories