From 2.7 to 3.4 - python

I wrote this program on IDLE 2.7 by mistake (I am a beginner).
Now I am trying to run it in 3.4 I get errors, I went on an made changes but I am still not able to run it. any help?
Yes the code might not even be great but I am still working on it. So any help will be greatly appreciated.
For me I thought only parentheses were the major difference between both updates.
# Convert a decimal to a hex as a string
def decimalToHex(decimalValue):
hex = ""
while decimalValue != 0:
hexValue = int(decimalValue) % 16
hex = toHexChar(hexValue) + hex
decimalValue = int(decimalValue) // 16
return hex
def printRect (row_count, col_count):
row = []
column = []
for r in range(row_count):
row = []
column = []
end_row_flag = 'False'
for c in range(col_count):
if r % (row_count) == 0:
if c % (col_count-1) == 0:
row.append('+')
else:
row.append('-')
end_row_flag = 'True'
if end_row_flag == 'True':
end_row = row
if c % (col_count-1) == 0:
column.append('|')
else:
column.append(' ')
if row:
print (row)
print (column)
print (end_row)
def charASCII(letter):
return (ord(letter))
# Convert an integer to a single hex digit in a character
def toHexChar(hexValue):
if 0 <= hexValue <= 9:
return chr(hexValue + ord('0'))
else: # 10 <= hexValue <= 15
return chr(hexValue - 10 + ord('A'))
def main():
# Prompt the user to enter a decimal integer
data_file = []
char_file = []
ascii_file = []
hex_key = []
decimal_key = []
nonkey_val = 32
data_file.append(' Dec Hex Char ')
data_file.append('+---------------+')
for i in range(nonkey_val):
a_char = chr(i)
hex_convert = decimalToHex(i)
if i < 10:
decimal_key = '0%s' % i
else:
decimal_key = '%s' % i
if i <= 15:
hex_key = '0%s' % hex_convert
else:
hex_key = hex_convert
data_file.append('| %s %s %s |' % (decimal_key.strip(), hex_key.strip(), a_char))
# data_file.append('%s' % (a_char))
with open ('sample_file.txt', 'r') as f:
data = f.readlines()
for character in data:
print ('character is %s' % character)
decimalValue = charASCII(character[0])
hex_convert = decimalToHex(decimalValue)
print ('decimalValue is %s' % decimalValue)
print ('The hex number for decimal %s is %s' % (decimalValue, hex_convert)
data_file.append('| %s %s %s |' % (decimalValue, hex_convert.strip(), character.strip())))
data_file.append('+---------------+')
print data_file
f.close()
with open ('output_file.txt', 'w+') as o:
for line in data_file:
o.write('%s\n'% line)
o.close
main() # Call the main function
rows = input("Enter the numer of rows: ")
columns = input("Enter the number of columns: ")
printRect (rows, columns)

You had three typos which if you provided the traceback and the relevant piece of code it happened in would have made it more obvious.
with open ('sample_file.txt', 'r') as f:
data = f.readlines()
for character in data:
print ('character is %s' % character)
decimalValue = charASCII(character[0])
hex_convert = decimalToHex(decimalValue)
print ('decimalValue is %s' % decimalValue)
print ('The hex number for decimal %s is %s' % (decimalValue, hex_convert) )# missing paren here
data_file.append('| %s %s %s |' % (decimalValue, hex_convert.strip(), character.strip())) # had extra paren here
data_file.append('+---------------+')
print(data_file ) # missing parens
with closes your files so you don't need to manually but if you were it would be o.close() not o.close
I would also you an actual boolean not a string "True":
end_row_flag = True
if end_row_flag:

Related

Retrieve amino acids from fasta file and include its overall percentage for only the top 5 results

I want to display the amino acid, count and percentage for the top 5 but I can only manage the count and the amino acid with the script described below. How could I update this?
L: 139002 (10.7%)
A: 123885 (9.6%)
G: 95475 (7.4%)
V: 91683 (7.1%)
I: 77836 (6.0%)
#!/usr/bin/env python3
def FASTA(filename):
try:
f = open(filename)
except IOError:
print ("The file, %s, does not exist" % filename)
return
order = []
sequences = {}
counts = {}
for line in f:
if line.startswith('>'):
name = line[1:].rstrip('\n')
# name = name.replace('_', ' ')
order.append(name)
sequences[name] = ''
else:
sequences[name] += line.rstrip('\n').rstrip('*')
for aa in sequences[name]:
if aa in counts:
counts[aa] = counts[aa] + 1
else:
counts[aa] = 1
print ("%d sequences found" % len(order))
print (counts)
return (order, sequences)
x, y = FASTA("/filepath")

Python read data in as binary

I am wanting to read in the logData as binary and then parse the binary output in the second for loop as it is for a string but for binary. Is this possible?
logData = open(sys.argv[1]).readlines()
processedSources = sys.stdin.readlines()
stringDictionary = {}
for line in processedSources:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, FILE_NAME, DATA_STRING
match = re.search("(\d+),\s+(\d+),\s+(\d+),\s+(.*),\s+(\".*\")", line)
if match:
moduleId = int(match.group(1))
logLine = int(match.group(2))
argCount = int(match.group(3))
fileName = match.group(4)
outputString = match.group(5)
stringDictionary[(moduleId, logLine)] = [ moduleId, logLine, argCount, fileName, outputString ]
else:
print "Failed string dictionary on: " + line
for line in logData:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, ARGUMENTS
matchLogData = re.split("\s+", line)
if matchLogData:
moduleId = int(matchLogData[0], 16)
logLine = int(matchLogData[1], 16)
argCount = int(matchLogData[2], 16)
if stringDictionary[(moduleId, logLine)]:
processedData = stringDictionary[(moduleId, logLine)]
if argCount != processedData[2]:
print "Argument count mismatch on : " + line
print " expected %d found %d" % (argCount, processedData[2])
else:
index = 0
logString = "%02x:%4d:%s:" + processedData[4]
logData = (processedData[0], processedData[1], processedData[3])
while index < argCount:
logData = logData + (int(matchLogData[index+3], 16),)
index = index + 1
print logString % logData
else:
print "ModuleId:%d Line:%d, not found in source dictionary" % (moduleId, logLine)
print " Line data: " + line
else:
print "Expected log input data mismatch MODULE_ID LOG_LINE ARG_COUNT ARGS"
print "Line: " + line

Parsing a big text file, extract data & store it in a CSV file.. Too Slow

I have a big log file (say 1-3 Gb) which I need to parse, extract data & save it in a CSV file.
Text File Data
* D:40035FC8 wr-long 00000008 \\core0\Global\u4TimeHiCnt 1.000us
* D:40027C5C rd-byte 00 *core0\Global\Ypf_OILL_OilLvlOn 20.342us
* D:40010044 rd-word 0FE2 *l\u2SAD_OILLVS_RecoveryCounter 0.160us
* D:40010044 wr-word 0FE1 *l\u2SAD_OILLVS_RecoveryCounter 0.040us
* D:40035FC8 wr-long 00000008 \\core0\Global\u4TimeHiCnt 1.000us
I have to extract the variable name which is after the last \ and then the number of Read & Write along with the datatype & store it in a CSV file.
CSV File Result
Variable Datatype CORE 0 CORE 1 CORE X
Read Write Read Write Read Write
OS_inKernel byte 0 0 111768 111878 0 0
OS_globalIntLevel long 0 0 281604 237901 0 0
The problem is it takes too much time. Can you pls look in to the attached code & suggest ways to make it faster.
import string
import sys
import time
MyFile = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata
GeneratedFile = open(str(("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\")+'ParsedOutput.csv'),'w')
try:
MyVariableList = []
TimeStartTest = time.time() #Starting Time
GeneratedFile.write('\nVariable')
GeneratedFile.write(', Datatype')
GeneratedFile.write(', CORE 0')
GeneratedFile.write(',, CORE 1')
GeneratedFile.write(',, CORE X')
GeneratedFile.write('\n,, Read ')
GeneratedFile.write(', Write ')
GeneratedFile.write(', Read ')
GeneratedFile.write(', Write ')
GeneratedFile.write(', Read ')
GeneratedFile.write(', Write ')
GeneratedFile.write('\n')
for CurrentLine in MyFile:
NoofSpaces = 0
if CurrentLine.find('\\') != -1:
MyVariable = CurrentLine[CurrentLine.rfind('\\')+1:].split(' ')[0]
elif CurrentLine.find('*\\') != -1:
MyVariable = CurrentLine[CurrentLine.rfind('*\\')+1:].split(' ')[0]
elif CurrentLine.find('*') != -1:
MyVariable = CurrentLine[CurrentLine.rfind('*')+1:].split(' ')[0]
VariableFound = 0
MyVariableList.sort()
Lowerbound = 0
Upperbound = len(MyVariableList)-1
while Lowerbound <= Upperbound and VariableFound == 0:
middle_pos = (Lowerbound+Upperbound) // 2
if MyVariableList[middle_pos] < MyVariable:
Lowerbound = middle_pos + 1
elif MyVariableList[middle_pos] > MyVariable:
Upperbound = middle_pos - 1
else:
VariableFound = 1
if VariableFound == 0:
MyVariableList.append(MyVariable)
try:
MyFile1 = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata
Core0_ReadCount = 0
Core0_WriteCount = 0
Core1_ReadCount = 0
Core1_WriteCount = 0
CoreX_ReadCount = 0
CoreX_WriteCount = 0
for CurrentLine1 in MyFile1:
if CurrentLine1.find(MyVariable) != -1:
## CORE 0 ##
if CurrentLine1.find("0\\Global") != -1:
DataType = CurrentLine1.split(' ')[0].split('-')[1]
DataOperation = CurrentLine1.split(' ')[0].split('-')[0].split(' ')[-1]
if DataOperation == 'rd':
Core0_ReadCount = Core0_ReadCount + 1
elif DataOperation == 'wr':
Core0_WriteCount = Core0_WriteCount + 1
## CORE 1 ##
elif CurrentLine1.find("1\\Global") != -1:
DataType = CurrentLine1.split(' ')[0].split('-')[1]
DataOperation = CurrentLine1.split(' ')[0].split('-')[0].split(' ')[-1]
if DataOperation == 'rd':
Core1_ReadCount = Core1_ReadCount + 1
elif DataOperation == 'wr':
Core1_WriteCount = Core1_WriteCount + 1
## CORE X ##
else:
DataType = CurrentLine1.split(' ')[0].split('-')[1]
DataOperation = CurrentLine1.split(' ')[0].split('-')[0].split(' ')[-1]
if DataOperation == 'rd':
CoreX_ReadCount = CoreX_ReadCount + 1
elif DataOperation == 'wr':
CoreX_WriteCount = CoreX_WriteCount + 1
GeneratedFile.write('\n %s' %MyVariable)
GeneratedFile.write(', %s' %DataType)
GeneratedFile.write(', %d' %Core0_ReadCount)
GeneratedFile.write(', %d' %Core0_WriteCount)
GeneratedFile.write(', %d' %Core1_ReadCount)
GeneratedFile.write(', %d' %Core1_WriteCount)
GeneratedFile.write(', %d' %CoreX_ReadCount)
GeneratedFile.write(', %d' %CoreX_WriteCount)
GeneratedFile.write('\n')
finally:
MyFile1.close()
except:
print sys.exc_info()
finally:
GeneratedFile.close()
MyFile.close()
TimeStopTest = time.time()
print str(int((TimeStopTest - TimeStartTest)/60))
You'd better use with statement, like this:
# if this file is line based
with open('test.txt') as f:
for line in f:
# process line, do something with line

Python image processing

I get that there isnt enough arguments to format string, any suggestions? It is in the else f.write line (52). This is taking the average pixel value and replacing it with a second image. As you can see the file reads both images and writes them. During the write process I get an error saying that there isn't enough arguments to format the string.
def main():
f = open("BoardHappy.ppm","r")
lines = f.readlines()
kinds = lines[0]
size=lines[1].split()
width=int(size[0])
height=int(size[1])
depth = int(lines[2])
red=[]
green=[]
blue=[]
for i in range(3, len(lines)):
temp = lines[i].split()
for j in range(0, len(temp) ,3):
red.append(int(temp[j]))
green.append(int(temp[j+1]))
blue.append(int(temp[j+2]))
f.close()
f = open("Godzilla.ppm","r")
lines = f.readlines()
kinds = lines[0]
size=lines[1].split()
width=int(size[0])
height=int(size[1])
depth = int(lines[2])
red1=[]
green1=[]
blue1=[]
for i in range(3, len(lines)):
temp = lines[i].split()
for j in range(0, len(temp), 3):
red1.append(int(temp[j]))
green1.append(int(temp[j+1]))
blue1.append(int(temp[j+2]))
f.close()
f = open("lab5.ppm","w")
f.write("P3 \n")
width=720
height=540
f.write("%d %d \n"%(width,height))
f.write("255 \n")
for i in range(width*height):
new = (red[i]+green[i]+blue[i])/3.0
if new >=100:
f.write("%d %d %d/n"% red[i], green[i],blue[i])
else:
f.write("%d %d %d/n"% red1[i], green1[i], blue1[i])
f.close()
main()
You need brackets:
f.write("%d %d %d/n"% (red[i], green[i],blue[i]))

Python library for converting plain text (ASCII) into GSM 7-bit character set?

Is there a python library for encoding ascii data to 7-bit GSM character set (for sending SMS)?
There is now :)
Thanks to Chad for pointing out that this wasn't quite right
Python2 version
# -*- coding: utf8 -*-
gsm = (u"#£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
u"|````````````````````````````````````€``````````````````````````")
def gsm_encode(plaintext):
res = ""
for c in plaintext:
idx = gsm.find(c)
if idx != -1:
res += chr(idx)
continue
idx = ext.find(c)
if idx != -1:
res += chr(27) + chr(idx)
return res.encode('hex')
print gsm_encode(u"Hello World")
The output is hex. Obviously you can skip that if you want the binary stream
Python3 version
# -*- coding: utf8 -*-
import binascii
gsm = ("#£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?"
"¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = ("````````````````````^```````````````````{}`````\\````````````[~]`"
"|````````````````````````````````````€``````````````````````````")
def gsm_encode(plaintext):
res = ""
for c in plaintext:
idx = gsm.find(c);
if idx != -1:
res += chr(idx)
continue
idx = ext.find(c)
if idx != -1:
res += chr(27) + chr(idx)
return binascii.b2a_hex(res.encode('utf-8'))
print(gsm_encode("Hello World"))
I got tips from gnibbler's answer. Here is a script I somehow made up after looking at an online converter: http://smstools3.kekekasvi.com/topic.php?id=288, and it works correctly for me. Both encoding and decoding.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
gsm = (u"#£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
u"|````````````````````````````````````€``````````````````````````")
def get_encode(currentByte, index, bitRightCount, position, nextPosition, leftShiftCount, bytesLength, bytes):
if index < 8:
byte = currentByte >> bitRightCount
if nextPosition < bytesLength:
idx2 = bytes[nextPosition]
byte = byte | ((idx2) << leftShiftCount)
byte = byte & 0x000000FF
else:
byte = byte & 0x000000FF
return chr(byte).encode('hex').upper()
return ''
def getBytes(plaintext):
if type(plaintext) != str:
plaintext = str(plaintext)
bytes = []
for c in plaintext.decode('utf-8'):
idx = gsm.find(c)
if idx != -1:
bytes.append(idx)
else:
idx = ext.find(c)
if idx != -1:
bytes.append(27)
bytes.append(idx)
return bytes
def gsm_encode(plaintext):
res = ""
f = -1
t = 0
bytes = getBytes(plaintext)
bytesLength = len(bytes)
for b in bytes:
f = f+1
t = (f%8)+1
res += get_encode(b, t, t-1, f, f+1, 8-t, bytesLength, bytes)
return res
def chunks(l, n):
if n < 1:
n = 1
return [l[i:i + n] for i in range(0, len(l), n)]
def gsm_decode(codedtext):
hexparts = chunks(codedtext, 2)
number = 0
bitcount = 0
output = ''
found_external = False
for byte in hexparts:
byte = int(byte, 16);
# add data on to the end
number = number + (byte << bitcount)
# increase the counter
bitcount = bitcount + 1
# output the first 7 bits
if number % 128 == 27:
'''skip'''
found_external = True
else:
if found_external == True:
character = ext[number % 128]
found_external = False
else:
character = gsm[number % 128]
output = output + character
# then throw them away
number = number >> 7
# every 7th letter you have an extra one in the buffer
if bitcount == 7:
if number % 128 == 27:
'''skip'''
found_external = True
else:
if found_external == True:
character = ext[number % 128]
found_external = False
else:
character = gsm[number % 128]
output = output + character
bitcount = 0
number = 0
return output
I could not find any library. But I think this should not need a library. Its somewhat easy to do.
Here is Jon Skeet himself on the same topic.
Example:
s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
def ascii_to_gsm(ch):
return bin(65 + s.index(ch))
print ascii_to_gsm('A')
print '--'
binary_stream = ''.join([str(ascii_to_gsm(ch))[2:] for ch in s])
print binary_stream
You can also use dict to store mapping between ASCII and GSM 7-bit character set.
I faced a similar issue recently where we were getting gsm7bit decoded text messages, mostly for Verizon carrier with Spanish characters, from the aggregator and we were not able to decode it successfully.
Here is the one I created with the help of other answers in the forum. This is for Python 2.7.x.
def gsm7bitdecode(text):
gsm = (u"#£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>"
u"?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà")
ext = (u"````````````````````^```````````````````{}`````\\````````````[~]`"
u"|````````````````````````````````````€``````````````````````````")
text = ''.join(["{0:08b}".format(int(text[i:i+2], 16)) for i in range(0, len(text), 2)][::-1])
text = [(int(text[::-1][i:i+7][::-1], 2)) for i in range(0, len(text), 7)]
text = text[:len(text)-1] if text[-1] == 0 else text
text =iter(text)
result = []
for i in text:
if i == 27:
i = next(text)
result.append(ext[i])
else:
result.append(gsm[i])
return "".join(result).rstrip()
All the above solutions are not correct. A GSM 03.38 encoding is using only 7 bits for a character and all above solutions are using byte aligned output, which is identical to ASCII in most cases as the result. Here is a proper solution using a bit string.
I'm using the Python the additional module:
pip3 install gsm0338
gsmencode.py:
import sys
import gsm0338
def __create_septets__(octets: bytes) -> (bytes, int):
num_bits = 0
data = 0
septets = bytearray()
for i in range(len(octets)):
gsm_char = octets[i]
data |= (gsm_char << num_bits)
num_bits += 7
while num_bits >= 8:
septets.append(data & 0xff)
data >>= 8
num_bits -= 8
if num_bits > 0:
septets.append(data & 0xff)
return bytes(septets), len(octets) % 8
if __name__ == '__main__':
octets = sys.argv[1].encode('gsm03.38')
septets, sparse = __create_septets__(octets)
print("sparse bits: %d" % sparse)
print("encoded (hex): %s" % septets.hex())
python3 gsmencode.py Sample
Output:
sparse bits: 6
encoded (hex): d3701bce2e03

Categories