Python read data in as binary

Python read data in as binary - python

I am wanting to read in the logData as binary and then parse the binary output in the second for loop as it is for a string but for binary. Is this possible?
logData = open(sys.argv[1]).readlines()
processedSources = sys.stdin.readlines()
stringDictionary = {}
for line in processedSources:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, FILE_NAME, DATA_STRING
match = re.search("(\d+),\s+(\d+),\s+(\d+),\s+(.*),\s+(\".*\")", line)
if match:
moduleId = int(match.group(1))
logLine = int(match.group(2))
argCount = int(match.group(3))
fileName = match.group(4)
outputString = match.group(5)
stringDictionary[(moduleId, logLine)] = [ moduleId, logLine, argCount, fileName, outputString ]
else:
print "Failed string dictionary on: " + line
for line in logData:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, ARGUMENTS
matchLogData = re.split("\s+", line)
if matchLogData:
moduleId = int(matchLogData[0], 16)
logLine = int(matchLogData[1], 16)
argCount = int(matchLogData[2], 16)
if stringDictionary[(moduleId, logLine)]:
processedData = stringDictionary[(moduleId, logLine)]
if argCount != processedData[2]:
print "Argument count mismatch on : " + line
print " expected %d found %d" % (argCount, processedData[2])
else:
index = 0
logString = "%02x:%4d:%s:" + processedData[4]
logData = (processedData[0], processedData[1], processedData[3])
while index < argCount:
logData = logData + (int(matchLogData[index+3], 16),)
index = index + 1
print logString % logData
else:
print "ModuleId:%d Line:%d, not found in source dictionary" % (moduleId, logLine)
print " Line data: " + line
else:
print "Expected log input data mismatch MODULE_ID LOG_LINE ARG_COUNT ARGS"
print "Line: " + line

Related

Yaml To Json using Regex python

I just converted my Yaml file to Json with Python
/Stackoverflow telling me to put more lines of text so adding/
Yaml file here.
timetable:
subject1:
day: Пн
time: 08:20-09:50
room: 0
lesson: Физическая Культура
teacher: Трифонов
location: Онлайн
parity: False
subject2:
day: Пн
time: 10:00-11:30
room: 11210
lesson: Математика (Практические занятия)
teacher: Игоревич
location: д.9, лит.
parity: False
Here is my converting code:
inputfile = open('saturday.yaml', 'r', encoding = "utf-8")
outputfile = open('timetable.json','w', encoding = "utf-8")
newline = inputfile.readline()
data = list()
lines = 0
list1 = list()
stringo = list()
while newline:
data.append(newline)
lines += 1
newline = inputfile.readline()
inputfile.close()
start_k = len(data[0]) - len(data[0].lstrip())
outputfile.write("{\n")
for i in range(0, lines - 1):
if data[i].lstrip()[0] == '-':
list1.append(' "' + data[i].lstrip().lstrip('-'))
outputfile.write(' "' + data[i].lstrip().lstrip('-'))
else:
stringo = data[i].lstrip().split(':', maxsplit = 1)
outputfile.write(' "' + stringo[0] + '":' + stringo[1].lstrip())
end_k = len(data[i + 1]) - len(data[i + 1].lstrip())
if end_k < start_k:
outputfile.write(" },"'\n')
if end_k > start_k:
outputfile.write('\n'" {"'\n')
start_k = end_k
outputfile.write(' }\n }\n}')
inputfile.close()
outputfile.close()
Now I need to convert the file Yaml to Json using Regular Expressions and I am stuck there. Any suggestions?

python error could not convert string to float

We are getting this below error while migrating the data from slack channel to a file, when we execute the script for fetching the data for one day, it executing perfectly.
But when we execute the script for 2 months data, it gives 10 days data in separate file but getting throwing an error on particular date. It might be possible that the source data on slack is bit different from expected
Traceback (most recent call last):
File "C:\Users\Slack SCript\script.py", line 218, in <module>
main()
File "C:\Users\Slack SCript\script.py", line 201, in main
parse(message['text'])
File "C:\Users\Slack SCript\script.py", line 114, in parse
size = float(elements[1])
ValueError: could not convert string to float:
As per the source data we found that some value is 0 maybe the error we got because of this value. is there any way to skip or continue future.
from slackclient import SlackClient
import time
import os
import sys
import datetime
from dateutil.relativedelta import relativedelta
servers = ("fd2a", "ff1a", "hh3b", "kw1a", "kw1b", "lo8a", "os5a", "os5b", "sg2a", "sg2b", 'sy1a', 'va1a', 'va1b')
types = ("", "nfs", "cluster")
currser = "d"
currtype = ""
used = {}
total = {}
available = {}
ts = 0
dir_name = "data"
def savedata(dir_path, filename, data):
f = open(dir_path + filename, "w") # opens file with name of "test.txt"
print(dir_path + filename)
f.write(data)
f.close()
def reset_data():
print("datareset")
for i in range(0, len(servers)):
for j in range(0, len(types)):
used[servers[i] + types[j]] = 0
total[servers[i] + types[j]] = 0
available[servers[i] + types[j]] = 0
def write_data(ts):
datastr = ''
global used
global total
ttotaltotalsum = 0
for j in range(0, len(types)):
datastr += types[j] + '\n'
datastr += "Name\t" + "Region\t" + "total(TB)\t" + "used(TB)\t" + "available(TB)\t" + "Used(%)\n"
for i in range(0, len(servers)):
tused = used[servers[i] + types[j]]
ttotal = total[servers[i] + types[j]]
ttotaltotalsum += ttotal
if (ttotal != 0):
datastr += (
servers[i][0:len(servers[i]) - 1] + "\t\t" +
servers[i][len(servers[i]) - 1] + "\t\t" +
"{:.1f}".format(ttotal / 1024) + " \t\t" +
"{:.1f}".format(tused / 1024) + " \t\t" +
"{:.1f}".format((ttotal - tused) / 1024) +"\t\t"+
"{:.1f}".format(tused / ttotal * 100) + " \t\t" +
" \n")
print("..")
if (ttotaltotalsum > 0):
hour= datetime.datetime.fromtimestamp(int(ts)).hour
day= datetime.datetime.fromtimestamp(int(ts)).day
month= datetime.datetime.fromtimestamp(int(ts)).month
year=datetime.datetime.fromtimestamp(int(ts)).year
if hour < 12:
savedata("data/", "Storage-Update-M-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
else:
savedata("data/", "Storage-Update-E-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
def parse(text):
global currser
global currtype
global used
global total
global available
global ts
content = text.split("\n")
for line in content:
line = line[:len(line)]
if line.__contains__("Netapp Cluster"):
for server in servers:
if line.__contains__(server):
currser = server
for type in types:
if line.__contains__(type):
currtype = type
# print(line)
if line.__contains__("Total available capacity"):
# print(line)
# print ("contains","Total available capacity------")
elements = line.split(":")
# print (elements)
size = float(elements[1])
# print(size)
total[currser + currtype] += size
# print(size,"TOTAL capacity",total)
elif line.__contains__("size provisioned"):
# print(line)
# print("contains", "Total LUN size provisioned------- ")
elements = line.split(":")
# print(elements)
size = float(elements[1])
# print(size)
used[currser + currtype] += size
# print(size, "Used", used)
# print( currser)
# print( currtype)
# print( used)
# print(total)
# print(available)
return (used, total)
def make_dir(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def main():
slack_token = ""
channel_name = ''
time_on_last_message = time.time()
channel_id = ""
ts = 0.000
threshmins = 20
channels_call = SlackClient(slack_token).api_call("channels.list")
print(channels_call)
print(channels_call.keys())
for channel in channels_call["channels"]:
if channel["name"] == channel_name:
channel_id = channel["id"]
print(channel)
make_dir(dir_name)
print(channel_id)
reset_data()
time_since_last_update = time.time() - time_on_last_message
print("Waiting for new data....", time.time() - time_on_last_message)
if time_since_last_update > threshmins * 60:
write_data(ts)
reset_data()
sc = SlackClient(slack_token)
date_after_month = datetime.datetime.now() + relativedelta(months=-6)
date_after_month=date_after_month.timestamp()
while True:
breakflag=0
data = sc.api_call(
"channels.history",
channel=channel_id,
oldest=date_after_month,
count=1000,
)
if (data['ok'] == True):
messages = data['messages']
for message in reversed(messages):
# print(message['ts'])
if float(message['ts']) > ts:
print("difference=", float(message['ts']) - ts)
if float(message['ts']) - ts > (threshmins * 60):
print("greater diffrrece>reset................")
write_data(ts)
print(ts)
reset_data()
time_on_last_message = time.time()
ts = float(message['ts'])
parse(message['text'])
if (data["has_more"] == True):
print("has more")
date_after_month=message['ts']
else:
breakflag=1
else:
print("No data returned or error")
time.sleep(1) # in Seconds
if(breakflag==1):
break
main()

Based on the error message, elements[1] is empty. And Python cannot convert an empty string to float:
>>> float("")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: could not convert string to float:

The elements[1] element is a string that can't be parsed to a float. The easiest way would be to attach a debugger and investigate what is being parsed. Then change your code to parse it better.
The second easiest way would be to binary search for the record that makes it fail and fix your code to parse it better.
The totally absolutely preferred way would be to, when you found what the case was that your code didn't support, you would write a test that proves that that case was added:
def test_parse_xyz():
assert [("blablabla", None)] == parse(["blablabla: -certainly_not_a_float"])
These tests can automatically be detected by e.g. pytest:
$ pytest parser.py

Getting an error because of the " \ " characters

I'm getting:
"unexpected character after line continuation character"
How should I write the line = line.strip("\xef\xbb\n\xbf")line without getting that error.
dataFile = open("data.txt","r")
updateFile = open("update","r")
newFile = open("newdata","w")
dataMatrix = []
updateMatrix = []
cardList = []
for line in dataFile:
line = line.strip("\xef\xbb\n\xbf")
tmp = line.split(" ")
cardNum = tmp[0]
cardName = " ".join(tmp[1:-2])
cardDate = tmp[-2]
cardSum = tmp[-1]
dataMatrix.append([cardNum,cardName,cardDate,cardSum])
cardList.append(cardNum)
i = 0
updateDate = ""
for line in updateFile:
line = line.strip("\xef\xbb\n\xbf")
if i==0 : updateDate = line; i=1; continue;
tmp = line.split(" ")
upNum = tmp[0]
upName = " ".join(tmp[1:-1])
upSum = tmp[-1]
updateMatrix.append([upNum,upName,upSum])
for row in updateMatrix:
if row[0] in cardList:
index = cardList.index(row[0])
plus = row[2]
if plus[0] == "+":
plus = int(plus[1:])
else:
plus = -int(plus[1:])
curSum = int(dataMatrix[index][3])
newSum = curSum+plus
dataMatrix[index][3] = newSum
dataMatrix[index][2] = updateDate
# dataMatrix[index][]
else:
dataMatrix.append([row[0],row[1],updateDate,row[2][1:]])
dataMatrix.sort(key=lambda row: row[0])
for row in dataMatrix:
print row
newFile.write(" ".join(str(a) for a in row) + "\n")

python: read file and split the data

I have a file config and the contents are separated by space " "
cat config
/home/user1 *.log,*.txt 30
/home/user2 *.trm,*.doc,*.jpeg 10
I want to read this file,parse each line and print each field from the each line.
Ex:-
Dir = /home/user1
Fileext = *.log,*.txt
days=30
I couldn't go further than the below..
def dir():
file = open('config','r+')
cont = file.readlines()
print "file contents are %s" % cont
for i in range(len(cont)):
j = cont[i].split(' ')
dir()
Any pointers how to move further?

Your code is fine, you are just missing the last step processing each element of the splitted string, try this:
def dir():
file = open('config','r+')
cont = file.readlines()
print "file contents are %s" % cont + '\n'
elements = []
for i in range(len(cont)):
rowElems = cont[i].split(' ')
elements.append({ 'dir' : rowElems[0], 'ext' : rowElems[1], 'days' : rowElems[2] })
for e in elements:
print "Dir = " + e['dir']
print "Fileext = " + e['ext']
print "days = " + e['days']
dir()
At the end of this code, you will have all the rows processed and stored in an array of dictionaries you can easily access later.

You can write a custom function to parse each line, and then use the map function to apply that function against each line in file.readlines():
def parseLine(line):
# function to split and parse each line,
# and return the formatted string
Dir, FileExt, Days = line.split(' ')[:3]
return 'Dir = {}\nFileext = {}\nDays = {}'.format(Dir, FileExt, Days)
def dir():
with open('config','r+') as file:
print 'file contents are\n' + '\n'.join(map(parseLine, file.readlines()))
Results:
>>> dir()
file contents are
Dir = /home/user1
Fileext = *.log,*.txt
Days = 30
Dir = /home/user2
Fileext = *.trm,*.doc,*.jpeg
Days = 10

Skyscanner API CSV file

I am new to python and I am trying to run this code,which I found on github ,but it does not work, is something wrong with the code?Or is it my fault? I am always getting the
"no data found"
message.
skyscanner.py :
#!/usr/bin/python
"""The script obtains prices and flight information for a given
input (departure, arrival airports and date), outputs this
data to the console and writes it to a csv file."""
__author__ = "Ingvaras Merkys"
import json
import urllib2
import re
import sys
import time
# Global vars:
AUTOSUGGEST_URL = "http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/"
# e. g. http://www.skyscanner.net/dataservices/geo/v1.0/autosuggest/uk/en/edinb
SKYSCANNER_URL = "http://www.skyscanner.net/flights/"
# e. g. http://www.skyscanner.net/flights/vno/edi/130419
ROUTEDATA_URL = "http://www.skyscanner.net/dataservices/routedate/v2.0/"
# e. g. http://www.skyscanner.net/dataservices/routedate/v2.0/a00765d2-7a39-404b-86c0-e8d79cc5f7e3
SUGGESTIONS_URL = "http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP"
# e. g. http://www.skyscanner.net/db.ashx?ucy=UK&lid=en&ccy=GBP&fp=KAUN&tp=EDIN&dd=20130410
def main(argv):
input_from = argv[0].replace(" ", "%20").replace("\"", "")
input_to = argv[1].replace(" ", "%20").replace("\"", "")
date = argv[2].replace("/", "")
place_id_from, place_id_to, name_from, name_to = get_codes(input_from, input_to)
# testjuly = map (lambda x: len(x) == 1 and '13070'+x or '1307'+x, [ str(i+1) for i in range(31) ])
# for date in testjuly:
session_key = get_session_key(place_id_from, place_id_to, date)
for attempt in range(3):
# if script is run repeatedly sometimes an empty html is returned
try:
response = urllib2.urlopen(ROUTEDATA_URL + session_key)
html = response.read()
data = json.loads(html)
except ValueError:
f = open("error.log", "a")
f.write(ROUTEDATA_URL + session_key + "\n")
f.write("Returned:\n" + html + "\n")
time.sleep(1)
else:
break
else:
sys.exit(1)
query = data['Query']
if data['Stats']['OutboundLegStats']['TotalCount'] == 0:
print "No flights found from", name_from, "to", name_to
return 0
#show_suggestions(query['OriginPlace'], query['DestinationPlace'], date)
#sys.exit(2)
stations = data['Stations']
quotes = data['Quotes']
carriers = data['Carriers']
cheapest_price = data['Stats']['ItineraryStats']['Total']['CheapestPrice']
print "Results for flight from", name_from, "to", name_to
print "Outbound date:", re.split('T', query['OutboundDate'])[0]
print "Cheapest Journey:", cheapest_price, "RMB"
return cheapest_price
# f = open(place_id_from + '-' + place_id_to + '-' + date + '.csv','w')
# for leg in data['OutboundItineraryLegs']:
# leg_price = get_leg_price(leg['PricingOptions'], quotes)
# depart_time = leg['DepartureDateTime'].replace("T", " ")
# arrive_time = leg['ArrivalDateTime'].replace("T", " ")
# duration = leg['Duration']
# carrier_names = get_carrier_names(leg['MarketingCarrierIds'], carriers)[1]
# print "\n\tPrice:", leg_price, "GBP"
# print "\tDeparting:", depart_time
# print "\tArriving:", arrive_time
# print "\tDuration:", duration/60, "h", duration%60, "min"
# print "\tCarriers:", carrier_names
# print "\t# of stops: ", leg['StopsCount']
# stop_ids = leg.get('StopIds', [])
# stop_ids_string = ", ".join([ get_station_name(stop_id, stations) for stop_id in stop_ids ])
# print "\t\t", stop_ids_string
# row = str(leg_price) + "\t" + depart_time + "\t" + arrive_time + "\t" + str(duration) + "\t" + carrier_names + "\t" + stop_ids_string
# f.write(row + "\n")
# Functions
def get_codes(input_from, input_to):
"""Returns place id codes and names, e. g. ("EDI", "KUN", "Edinburgh", "Kaunas")"""
try:
i = 0
autosuggest_json_from = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_from))
if len(autosuggest_json_from[0]['PlaceId']) == 4:
# for cases where the first result is abstract (e. g. Glasgow (Any))
i = 1
place_id_from = autosuggest_json_from[i]['PlaceId']
name_from = autosuggest_json_from[i]['PlaceName']
j = 0
autosuggest_json_to = json.load(urllib2.urlopen(AUTOSUGGEST_URL + input_to))
if len(autosuggest_json_to[0]['PlaceId']) == 4:
j = 1
place_id_to = autosuggest_json_to[j]['PlaceId']
name_to = autosuggest_json_to[j]['PlaceName']
except IndexError:
print "No code found for:"
print input_from, "AND/OR", input_to
sys.exit(3)
return (place_id_from, place_id_to, name_from, name_to)
def get_session_key(place_id_from, place_id_to, date):
"""Returns a session key for a given query, on failure exits
NB. distant or past dates cause failures"""
response = urllib2.urlopen(SKYSCANNER_URL + place_id_from + "/" + place_id_to + "/" + date)
html = response.read()
regex = ur'"SessionKey":"(.+?)"'
# e. g. "SessionKey":"a00765d2-7a39-404b-86c0-e8d79cc5f7e3"
try:
session_key = re.findall(regex, html)[0]
except IndexError:
print "No data found for this date"
sys.exit(4)
return session_key
def show_suggestions(from_id, to_id, date):
"""Prints alternative departure airports"""
suggest_places_string = ""
suggestions_json = json.load(urllib2.urlopen(SUGGESTIONS_URL + "&fp=" + from_id + "&tp=" + to_id + "&dd=20" + date))
try:
suggest_places = suggestions_json['rs']
for place in suggest_places:
if place['fpid'] != from_id:
suggest_places_string += place['fan'] + ", "
if suggest_places_string[:-2] != "":
print "Try airports: ", suggest_places_string[:-2]
except (KeyError, IndexError):
print "Suggestions unavailable"
def get_station_name(station_id, stations):
"""Returns the name of the (intermediate) station,
e. g. "London Heathrow" """
for station in stations:
if station['Id'] == station_id:
return station['Name']
return ""
def get_leg_price(pricing, quotes):
"""Returns lowest leg price"""
prices = []
for price in pricing:
prices.append(get_quote_price(price['QuoteIds'], quotes))
return min(prices)
def get_quote_price(quote_ids, quotes):
"""Finds quotes by quote id and returns their price sum"""
price = 0;
for quote_id in quote_ids:
for quote in quotes:
if quote['Id'] == quote_id:
price += quote['Price']
return price
def get_carrier_names(carrier_ids, carriers):
"""Returns a tuple (list, string) with carrier names
e.g. (["airBaltic", "KLM"], "airBaltic, KLM")"""
carrier_names = []
carrier_names_string = ""
for carrier_id in carrier_ids:
carrierName = get_carrier_name(carrier_id, carriers)
carrier_names.append(carrierName)
carrier_names_string += carrierName + ", "
return (carrier_names, carrier_names_string[:-2])
def get_carrier_name(carrier_id, carriers):
"""Returns carrier name by id"""
for carrier in carriers:
if carrier['Id'] == carrier_id:
return carrier['Name']
return ""
if __name__ == "__main__":
if len(sys.argv) == 4:
main(sys.argv[1:])
else:
print "Enter arguments in this way:\n"
print "python skyscanner.py {departure airport} {arrival airport} {departure date (yy/mm/dd)}\n\n"
print "e. g. python skyscanner.py \"glasgow prestwick\" kaunas 13/07/21\n"
sys.exit()

These endpoints are not supported as external APIs, they are used by the site itself. They can/do change without notice and some require a level of "state" to operate.
However, we do have an API that would allow you access to the same auto-suggest / flight data that the site is driven from. More details can be found at http://business.skyscanner.net

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python read data in as binary - python

Related

Yaml To Json using Regex python

python error could not convert string to float

Getting an error because of the " \ " characters

python: read file and split the data

Skyscanner API CSV file

Categories

Resources