I've written a program to read in data files and do a bunch of nonsense with them that is irrelevant to this question. It will automatically skip the header when reading after the user inputs how many lines the header occupies.
I have built in some functionality to display the header in the terminal, if requested. Here is the functional yet idiotic looking snippet of code I've used to do this:
filename = (raw_input("Which file are we loading? "))
with open(filename) as myfile:
head = 'head'
aline = myfile.readline()
bline = myfile.readline()
cline = myfile.readline()
dline = myfile.readline()
eline = myfile.readline()
fline = myfile.readline()
gline = myfile.readline()
hline = myfile.readline()
iline = myfile.readline()
jline = myfile.readline()
kline = myfile.readline()
lline = myfile.readline()
mline = myfile.readline()
nline = myfile.readline()
oline = myfile.readline()
pline = myfile.readline()
qline = myfile.readline()
rline = myfile.readline()
sline = myfile.readline()
tline = myfile.readline()
header = input("How many header lines? (Type ``head`` to see the first 20 lines) ")
if header == head:
print ' 1 | ' + aline,
print ' 2 | ' + bline,
print ' 3 | ' + cline,
print ' 4 | ' + dline,
print ' 5 | ' + eline,
print ' 6 | ' + fline,
print ' 7 | ' + gline,
print ' 8 | ' + hline,
print ' 9 | ' + iline,
print '10 | ' + jline,
print '11 | ' + kline,
print '12 | ' + lline,
print '13 | ' + mline,
print '14 | ' + nline,
print '15 | ' + oline,
print '16 | ' + pline,
print '17 | ' + qline,
print '18 | ' + rline,
print '19 | ' + sline,
print '20 | ' + tline,
header = input("How many header lines? ")
Which appropriately gives:
How many header lines? (Type ``head`` to see the first 20 lines) head
1 | ------------------------------------------------------------------------------------------------------------------------------------------------
2 | K-KIDS GOLD LIST
3 | ------------------------------------------------------------------------------------------------------------------------------------------------
4 |
5 | N = 1048 K dwarfs within 50 parsecs
6 |
...
...
...
20 | stuff
Is there a more efficient and "Pythonic" way to go about this? Or is mine as good as it's going to get?
Cheers!
Not sure on the head and header logic but you can use itertools.islice to pull the first header_length lines and str.join to join the output:
from itertools import islice
filename = raw_input("Which file are we loading? "))
# ask user how many header lines
header_length = int(raw_input("Enter amount of header lines"))
with open(filename) as myfile:
# get the first header_length lines in a list
head = list(islice(myfile, header_length))
header = raw_input("How many header lines? (Type ``head`` to see the header lines)")
# if user types head
if "head" == header:
# use enumerate to get the line numbers/index in list
# the str.join the lines formatting index | line
print("".join(["{} | {}".format(i, line) for i, line in enumerate(head,start=1)]))
I believe that this is the functionality you're looking for:
filename = (raw_input("Which file are we loading? "))
with open(filename) as myfile:
file_lines = myfile.readlines() # save all lines from file into memory
header = raw_input("How many header lines? (Type ``head`` to see the first 20 lines) ")
num_to_print = 20 if header == 'head' else int(header) # get number of lines to be read. if 'head' then 20
for i, line in enumerate(file_lines[:num_to_print]):
print("{:02}|{}".format(i, line))
Related
I am trying to get this code to split one at a time, but it is not functioning as expected:
for line in text_line:
one_line = line.split(' ',1)
if len(one_line) > 1:
acro = one_line[0].strip()
meaning = one_line[1].strip()
if acro in acronyms_dict:
acronyms_dict[acro] = acronyms_dict[acro] + ', ' + meaning
else:
acronyms_dict[acro] = meaning
Remove the ' ' from the str.split. The file is using tabs to delimit the acronyms:
import requests
data_site = requests.get(
"https://raw.githubusercontent.com/priscian/nlp/master/OpenNLP/models/coref/acronyms.txt"
)
text_line = data_site.text.split("\n")
acronyms_dict = {}
for line in text_line:
one_line = line.split(maxsplit=1) # <-- remove the ' '
if len(one_line) > 1:
acro = one_line[0].strip()
meaning = one_line[1].strip()
if acro in acronyms_dict:
acronyms_dict[acro] = acronyms_dict[acro] + ", " + meaning
else:
acronyms_dict[acro] = meaning
print(acronyms_dict)
Prints:
{
'24KHGE': '24 Karat Heavy Gold Electroplate',
'2B1Q': '2 Binary 1 Quaternary',
'2D': '2-Dimensional',
...
I've two text files and contains data like this
I WANT this to be done in hadoop. Can any one suggest me the way ?
textfile1 --> 1 goerge hyder
2 ganesh singapore
textfile2 --> 1 goergy hydel
2 ganest singapore
It has to do comparission coulmn by column and character by character so after comparission it should give report as
column_name source destiny mismatch
xxx george georgy y
ganesh ganest h
hyder hydel r
Please help me in this.
f = open('textfile1.txt', 'a').readlines()
for n in f:
text1 = n.rstrip()
n = open('textfile2.txt', 'a').readlines()
for l in n:
text2 = l.rstrip()
if text1 == text2:
print("It Is the Same Thing")
report = open('report.txt')
report.write('It is The Same Thing with the text 1 and 2')
report.write('\n')
else:
print("it Is Not The Same Thing")
report = open('report.txt')
report.write('It is Not The Same Thign With the text 1 and 2')
report.write('\n')
with open(textfile1,"r") as f1:
with open(textfile2,"r") as f2:
words1 = f1.read().split(" ")
words2 = f2.read().split(" ")
#considering f1 and f2 have the same number of words
for i in range(len(words1)):
if words1[i] != words2[i]:
for j in range(len(words1[i])):
if words1[i][j] != words2[i][j]:
print(words1[i],words2[i],words2[i][j])
As Seer.The mentioned above, you can use difflib.
import difflib
# Read the files
f = open('textfile1.txt', 'r').readlines()
list1 = []
for n in f:
text = n.rstrip().split(" ")
list1.append(text)
f = open('textfile2.txt', 'r').readlines()
list2 = []
for n in f:
text = n.rstrip().split(" ")
list2.append(text)
# Get the output
for ii in range(len(list1)):
for jj in range(len(list1[0])):
output_list = [li[-1]
for li in list(difflib.ndiff(list1[ii][jj], list2[ii][jj]))
if "-" in li]
if output_list == []:
output_list = ["no difference"]
print "{} {} {}".format(list1[ii][jj], list2[ii][jj], output_list[0])
The output should look like:
goerge goergy e
hyder hydel r
ganesh ganest h
singapore singapore no difference
I have a file looking this way:
;1;108/1;4, 109
;1;51;4, 5
;2;109/2;4, 5
;2;108/2;4, 109
;3;108/2;4, 109
;3;51;4, 5
;4;109/2;4, 5
;4;51;4, 5
;5;109/2;4, 5
;5;40/6;5, 6, 7
where
;id1;id2;position_on_shelf_id2
;id1;id3;position_on_shelf_id3
as a result, i want to get:
id1;id2-id3;x
where x are common shelf positions for both id2 and id3, it should look like this
1;108/1-51;4
2;109/2-108/2;4
3;108/2-51;4
4;109/2-51;4, 5
5;109/2-40/6;5
my script works fine up to the moment where I need to type common shelf positions. I tried using .intersection, but it is not working properly, when I have positions consisting of double characters (pos:144-result: 14; pos:551, result: 51; pos:2222-result: 2 i.e)
result = id2_chars.intersection(id3_chars)
any fix for intersection? or maybe some better method on your mind?
code so far:
part1 - merge every 2nd line together
exp = open('output.txt', 'w')
with open("dane.txt") as f:
content = f.readlines()
strng = ""
for i in range(1,len(content)+1):
strng += content[i-1].strip()
if i % 2 == 0:
exp.writelines(strng + '\n')
strng = ""
exp.close()
part2 - intersection:
exp = open('output2.txt', 'w')
imp = open('output.txt')
for line in imp:
none, lp1, dz1, poz1, lp2, dz2, poz2 = line.split(';')
s1 = poz1.lower()
s2 = poz2.lower()
s1_chars = set(s1)
s2_chars = set(s2)
result = s1_chars.intersection(s2_chars)
result = str(result)
exp.writelines(lp1 + ';' + dz1 + '-' + dz2 + ';' + result + '\n')
exp.close()
** i did not filtered the result for my needs yet (it is in "list" form), but it won't be a problem once I get the right intersection result
Your main problem is that you try to intersect 2 sets of characters while you should intersect positions. So you should at least use:
...
s1 = poz1.lower()
s2 = poz2.lower()
s1_poz= set(x.strip() for x in s1.split(','))
s2_poz = set(x.strip() for x in s1.split(','))
result = s1_poz.intersection(s2_poz)
result = ', '.join(result)
...
But in fact, you could easily do the whole processing in one single pass:
exp = open('output.txt', 'w')
with open("dane.txt") as f:
old = None
for line in f: # one line at a time is enough
line = line.strip()
if old is None: # first line of a block, just store it
old = line
else: # second line of a bock, process both
none, lp1, dz1, poz1 = old.split(';')
none, lp2, dz2, poz2 = line.split(';')
poz1x = set(x.strip() for x in poz1.tolower().split(','))
poz2x = set(x.strip() for x in poz2.tolower().split(','))
result = ', '.join(poz1x.intersection(poz2x))
exp.write(lp1 + ';' + dz1 + '-' + dz2 + ';' + result + '\n')
old = None
I have text file, that store orders info in following format. I try to search an order by first line of the block, that represent ID and print 7 next lines. But my code checking just the first line or print all line's that contain an input number. Could somebody help me?
4735
['Total price: ', 1425.0]
['Type of menu: ', 'BBQ']
['Type of service: ', ' ']
['Amount of customers: ', 25.0]
['Discount: ', '5%', '= RM', 75.0]
['Time: ', '2017-01-08 21:39:19']
3647
['Total price: ', 2000.0]
['Type of menu: ', ' ']
['Type of service: ', 'Tent ']
['Amount of customers: ', 0]
.......
I use the following code to search in text file.
try:
f = open('Bills.txt', 'r')
f.close()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
Id_input = (raw_input("Enter ID of order\n"))
with open("Bills.txt", "r") as f:
searchlines = f.readlines()
j = len(searchlines) - 1
for i, line in enumerate(searchlines):
if Id_input in str(line): # I also try to check in this way (Id_input == str(line)), but it didn't work
k = min(i + 7, j)
for l in searchlines[i:k]: print l,
print
else:
absent_input = (raw_input("|----Order was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
check the following code.
Id_input = (raw_input("Enter ID of order\n")).strip()
try:
f = open("Bills.txt", "r")
print_rows = False
for idline in f:
if idline.strip() == Id_input:
print_rows = True
continue
if print_rows:
if idline.startswith("["):
print idline
else:
break
if not print_rows:
absent_input = (raw_input("|----Order was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
With the file crop data.txt containing this:
Lettuce 1 2 3
Tomato 4 5 6
When I run the code and input Tomato and 9 instead of removing 6 and inserting 9 after Tomato like it should, it replaces the whole contents of the file with 9, so that it is like this:
9
I'm not sure why it does this and how to fix it.
crop = input('Which crop? ')
quantity = input('How much? ')
file = ('cropdata.txt')
if crop in open(file).read():
with open(file, 'r') as file_read:
lines = []
for line in file_read:
if crop in line:
line = str(line.rstrip("\n"))
line_parts = line.split(" ")
print (len(line_parts))
if len (line_parts) > 4:
print('len greater')
line_parts.remove (line_parts[3])
line_parts.insert (1, quantity)
line = str(line_parts[0]+ line_parts[1] +
line_parts[2]+ line_parts[3] + ' ' + '/n')
else:
print('len less than')
line = str(quantity + " " + "\n")
lines.append(line)
with open(file, 'w') as file_rewrite:
file_rewrite.writelines(lines)
else:
print('crop not found')
At least your indentation wrong in two places, try this to get all lines:
crop = input('Which crop? ')
quantity = input('How much? ')
file = ('cropdata.txt')
if crop in open(file).read():
with open(file, 'r') as file_read:
lines = []
for line in file_read:
if crop in line:
line = str(line.rstrip("\n"))
line_parts = line.split(" ")
print (len(line_parts))
if len (line_parts) > 4:
print('len greater')
line_parts.remove (line_parts[3])
line_parts.insert (1, quantity)
line = str(line_parts[0]+ line_parts[1] + line_parts[2]+ line_parts[3] + ' ' + '/n')
else:
print('len less than')
line = str(quantity + " " + "\n")
lines.append(line)
with open(file, 'w') as file_rewrite:
file_rewrite.writelines(lines)
else:
print('crop not found')