read multiple file and compare with the fixed files - python

I have 50 files in a directory that are suppose to compare with one file, e.g., original.txt. I have the following code. It works well when I give the file name one-by-one, manually. I want to automate it for this I used 'glob.blog'
folder = "files/"
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
Here the complete code:
import glob
from itertools import islice
import linecache
num_lines_nonbram = 1891427
bits_perline = 32
total_bit_flips = 0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
folder = "files/"
path = '*.rbd'
path = folder + path
files=sorted(glob.glob(path))
original=open('files/mull-original-readback.rbd','r')
#source1 = open(file1, "r")
for filename in files:
del_lines = 101
with open(filename,'r') as f:
i=1
while i <= del_lines:
line1 = f.readline()
lineoriginal=original.readline()
i+=1
i=0
num_bit_diff_flip_zero = 0
num_bit_diff_flip_ones = 0
num_lines_diff =0
i=0
j=0
k=0
a_write2 = ""
while i < (num_lines_nonbram-del_lines):
line1 = f.readline()
lineoriginal = original.readline()
while k < bits_perline:
if ((lineoriginal[k] == line1[k])):
a_write2 += " "
else:
if (lineoriginal[k]=="0"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_zero += 1
if (lineoriginal[k]=="1"):
#if ((line1[k]=="0" and line1[k]=="1")):
num_bit_diff_flip_ones += 1
#if ((line1[k]==1 and line1[k]==0)):
#a_write_file2 = str(i+1) + " " + str(31-k) + "\n" + a_write_file2
#a_write2 += "^"
#num_bit_diff_flip_one += 1
# else:
# a_write2 += " "
k+=1
total_bit_flips=num_bit_diff_flip_zero+num_bit_diff_flip_ones
i+=1
k=0
i = 0
print files
print "Number of bits flip zero= %d" %num_bit_diff_flip_zero +"\n" +"Number of bits flip one= %d" %num_bit_diff_flip_ones +"\n" "Total bit flips = %d " %total_bit_flips
f.close()
original.close()
I got the error:
Traceback (most recent call last):
File "random-ones-zeros.py", line 65, in <module>
if ((lineoriginal[k] == line1[k])):
IndexError: string index out of range
I guess there is some issue with the reading the file automatically, instead giving name manually. But, didn't able to find the solution.

For this the string index is out of range because the value k is iterated once more than intended so the value of the variable exceeds the scope of the program. This should be able to be fixed by using substituting it to
if ((lineoriginal[k-1] == line1[k-1])):
Hope this helps, but I can't access Python right now so I can't test it out :-)

Related

Python Error: IndexError wordLength = len(wordList[stringInc])

Writing a python script to parse an incoming string from a text file and output word by word to serial (serial parts commented out)
Getting the following error:
Traceback (most recent call last):
File "/Users/di/Desktop/RBDS/rbdsScroll.py", line 23, in <module>
wordLength = len(wordList[stringInc])
IndexError: list index out of range
I know it has to do with the new list (when the contents of the text file have far less words than the previous list) not having a high enough index number to work. I'm not quite sure how to go about remedying this. Any help would be appreciated. Full code below:
import time
#import serial
from config import dataPath
from config import scrollTime
from config import preText
from config import postText
from config import port
stringInc=wordFirst=wordLast=wordList=wordLength=word2Length=0
while True:
f = open(dataPath, 'r')
file_contents = f.read()
f.close()
wordList = file_contents.split()
maxLength = len(wordList)
wordLength = len(wordList[stringInc])
if stringInc < maxLength - 1:
word2Length = len(wordList[stringInc + 1])
wordFirst = 0
wordLast = 8
if wordLength > 8:
longString = (wordList[stringInc])
while wordLength + 1 > wordLast:
# ser = serial.Serial(port)
# ser.write(preText + longString[wordFirst:wordLast] + postText)
# ser.close()
print(preText + longString[wordFirst:wordLast] + postText)
wordFirst = wordFirst + 1
wordLast = wordLast + 1
time.sleep(scrollTime / 1000)
elif (wordLength + word2Length < 8) and (stringInc + 1 < maxLength):
# ser = serial.Serial(port)
# ser.write(preText + wordList[stringInc] + " " + wordList[stringInc + 1] + postText)
# ser.close()
print(preText + wordList[stringInc] + " " + wordList[stringInc + 1] + postText)
stringInc = stringInc + 1
time.sleep(scrollTime / 1000)
else:
# ser = serial.Serial(port)
# ser.write(preText + wordList[stringInc] + postText)
# ser.close()
print(preText + wordList[stringInc] + postText)
time.sleep(scrollTime / 1000)
stringInc = stringInc + 1
if stringInc == maxLength:
stringInc = 0
If the file content is fixed, you should read it only once, and move the while just before setting wordLength. That sends all the read words one after the other.
In case the file content changes, which would explain the error, if the new content is shorter than the current value of stringInc, you lower maxLength (just after reading the file) but never force stringInc to be lower than this new value, hence the error message...
Adding a check like if stringInc >= maxLength: after changing maxLength should help correcting the code (in that case set stringInc to 0, I guess).
But the behavior is still strange, and I would rather send all the words of the file after reading it once, not reading it in the while, and then later would read the file again, to send if only if modified...

Python for in for loop: FileExistError

I am writing a program that encrypts files and decrypts them with a key. Code:
import random, os, time
#Collecting all files
pad = r'C:\Users\Hugo\Desktop\Malware\Test'
bestand_list = []
for files in next(os.walk(pad))[2]:
bestand_list.append(pad + '\\' + files)
#Random 10 characters making encryption
s = 'abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!##$%^&'
sleutel = ''
for i in range(10):
sleutel += random.choice(s)
print(sleutel)
print('The computer is being encrypted')
time.sleep(1)
def Decrypt():
x = 0
decryptie = input('Enter your decryption code: \n')
if (decryptie == sleutel):
for bestand in bestand_list:
for bestand_naam in bestandnaam_list:
locatie = bestand_list[x]
os.rename(bestand_naam, locatie)
x += 1
print ('Your file will now be decrypted')
else:
print ('Unfortunately your file was not decrypted')
def Encrypt():
x = 0
global bestandnaam_list
bestandnaam_list = []
for bestand in bestand_list:
bestand_naam = ''
locatie = bestand_list[x]
for i in range (10):
bestand_naam += random.choice(s)
bestandnaam_list.append(pad + '\\' + bestand_naam)
os.rename(locatie, pad + '\\' + bestand_naam)
x += 1
print('Your files are now encrypted')
Decrypt()
Encrypt()
The problem that I am having is that the second file gets renamed to the first file by decryption. This results in a FileExistError. There is probably something wrong with the:
x += 1
in def Decrypt()
How to fix this issue?
Edit: Full traceback:
Traceback(most recent call last):
File "crypto.py", line 29, in Decrypt
os.rename(bestand_naam, locatie)
FileExistsError: [WinError 183] Cannot make a file that already exists.
You had multiple problems in your code.
def Decrypt():
x = 0
decryptie = input('Enter your decryption code: \n')
if (decryptie == sleutel):
for bestand in bestand_list:
os.rename(bestandnaam_list[x], bestand)
x += 1
print ('Your file will now be decrypted')
else:
print ('Unfortunately your file was not decrypted')
This should do the trick.
Btw when you submit your code please DO PRINT what you are doing, for you and for us and DO use english varnames since i have no clue on what sleutel and other variables names means.
You were doing 2 forloops that was giving wrong and multiples filenames/paths to os.rename .
Forgot to add this,
in def Encrypt you call directly Decrypt, move it to the end of the file.
Like so:
Encrypt()
Decrypt()

Python coding for opening and saving data to a file

I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)

Check which label or name a part (from krn) has in music21

I want to extract 2 parts from 4 voice krn score and save them as a midi file.
I can load the files:
s = converter.parse('/something.krn')
I can get some basic info like this:
s.metadata.title
In v2, I want to store the part of s that has a label "Cantus". Any idea how to check for a label? They have a label in krn.
Once I have the number of the part, I can get it with
s.parts[i]
The krn file is defined like this:
**kern **kern **kern **kern **kern
*Ibass *Itenor *Itenor *Icalto *Icant
!Bassus !Tenor 2 !Tenor 1 !Altus !Cantus
I am guessing labels is not the correct name, as I can't find this in music21 documentation, perhaps the name of the part?
I can't seem to find the property in the music21 documentation.
I was finally able to do it this way:
import sys
from music21 import *
import os
# input ("Please make sure that you have places all the krn files in a subdirectory called data. Press enter to continue")
for filename in os.listdir('./data'):
s = converter.parse('./data/' + filename)
sys.stdout.write('Processing ' + filename + '... ')
numcant = -1
nums = list()
try:
length = len(s.parts)
except:
length = 0
if (length > 0):
for num in range(0,length):
# sys.stdout.write(s.parts[num].flat.getElementsByClass('SpineComment')[0].comment + ' - ')
if (s.parts[num].flat.getElementsByClass('SpineComment')[0].comment == "Cantus"):
numcant = num
# print "cant "
# print numcant
else:
# print "nums"
nums.append(num)
# print num
else:
# sys.stdout.write(' - no parts present.')
sys.stdout.write('\n')
try:
length = len(nums)
except:
length = 0
if (length > 0):
sys.stdout.write('\n')
if (numcant != -1):
for num in nums:
sys.stdout.write(' - ' + filename[:-4] + '_' + str(num) + '.mid written.\n')
# print "cantus present"
s2 = stream.Stream()
s2.insert(0, s.parts[num])
s2.insert(0, s.parts[numcant])
# write the midi file
s2.write('midi', './midi/' + filename[:-4] + '_' + str(num) + '.mid')
# sys.stdout.write('I')
else:
sys.stdout.write(' - no cantus specified for this file.\n')
else:
sys.stdout.write(' - not enough parts in this file.\n')
sys.stdout.write('\n')

Cutting character values according to value from file

This is the which i am doing
import csv
output = open('output.txt' , 'wb')
# this functions return the min for num.txt
def get_min(num):
return int(open('%s.txt' % num, 'r+').readlines()[0])
# temporary variables
last_line = ''
input_list = []
#iterate over input.txt in sort the input in a list of tuples
for i, line in enumerate(open('input.txt', 'r+').readlines()):
if i%2 == 0:
last_line = line
else:
input_list.append((last_line, line))
filtered = [(header, data[:get_min(header[-2])] + '\n' ) for (header, data) in input_list]
[output.write(''.join(data)) for data in filtered]
output.close()
In this code input.txt is something like this
>012|013|0|3|M
AFDSFASDFASDFA
>005|5|67|0|6
ACCTCTGACC
>029|032|4|5|S
GGCAGGGAGCAGGCCTGTA
and num.txt is something like this
M 4
P 10
I want that in above input.txt check the amount of value from the num.txt by looking at its last column which is same like in num.txt and cut its character according to that values
I think the error in my code is that it only accept the integer text file , where it should also accept file which contain alphabets
The totally revised version, after a long chat with the OP;
import os
import re
# Fetch all hashes and counts
file_c = open('num.txt')
file_c = file_c.read()
lines = re.findall(r'\w+\.txt \d+', file_c)
numbers = {}
for line in lines:
line_split = line.split('.txt ')
hash_name = line_split[0]
count = line_split[1]
numbers[hash_name] = count
#print(numbers)
# The input file
file_i = open('input.txt')
file_i = file_i.read()
for hash_name, count in numbers.iteritems():
regex = '(' + hash_name.strip() + ')'
result = re.findall(r'>.*\|(' + regex + ')(.*?)>', file_i, re.S)
if len(result) > 0:
data_original = result[0][2]
stripped_data = result[0][2][int(count):]
file_i = file_i.replace(data_original, '\n' + stripped_data)
#print(data_original)
#print(stripped_data)
#print(file_i)
# Write the input file to new input_new.txt
f = open('input_new.txt', 'wt')
f.write(file_i)
You can do it like so;
import re
min_count = 4 # this variable will contain that count integer from where to start removing
str_to_match = 'EOG6CC67M' # this variable will contain the filename you read
input = '' # The file input (input.txt) will go in here
counter = 0
def callback_f(e):
global min_count
global counter
counter += 1
# Check your input
print(str(counter) + ' >>> ' + e.group())
# Only replace the value with nothing (remove it) after a certain count
if counter > min_count:
return '' # replace with nothing
result = re.sub(r''+str_to_match, callback_f, input)
With this tactic you can keep count with a global counter and there's no need to do hard line-loops with complex structures.
Update
More detailed version with file access;
import os
import re
def callback_f(e):
global counter
counter += 1
# Check your input
print(str(counter) + ' >>> ' + e.group())
# Fetch all hash-file names and their content (count)
num_files = os.listdir('./num_files')
numbers = {}
for file in num_files:
if file[0] != '.':
file_c = open('./num_files/' + file)
file_c = file_c.read()
numbers[file.split('.')[0]] = file_c
# Now the CSV files
csv_files = os.listdir('./csv_files')
for file in csv_files:
if file[0] != '.':
for hash_name, min_count in numbers.iteritems():
file_c = open('./csv_files/' + file)
file_c = file_c.read()
counter = 0
result = re.sub(r''+hash_name, callback_f, file_c)
# Write the replaced content back to the file here
Considered directory/file structure;
+ Projects
+ Project_folder
+ csv_files
- input1.csv
- input2.csv
~ etc.
+ num_files
- EOG6CC67M.txt
- EOG62JQZP.txt
~ etc.
- python_file.py
The CSV files contain the big chunks of text you state in your original question.
The Num files contain the hash-files with an Integer in them
What happens in this script;
Collect all Hash files (in a dictionary) and it's inner count number
Loop through all CSV files
Subloop through the collected numbers for each CSV file
Replace/remove (based on what you do in callback_f()) hashes after a certain count
Write the output back (it's the last comment in the script, would contain the file.write() functionality)

Categories