Storing lines into array and print line by line - python
Current code:
filepath = "C:/Bg_Log/KLBG04.txt"
with open(filepath) as fp:
lines = fp.read().splitlines()
with open(filepath, "w") as fp:
for line in lines:
print("KLBG04",line,line[18], file=fp)
output:
KLBG04 20/01/03 08:09:13 G0001 G
Require flexibility to move the columns around and also manipulate the date as shown below with array or list
KLBG04 03/01/20 G0001 G 08:09:13
You didn't provide sample data, but I think this may work:
filepath = "C:/Bg_Log/KLBG04.txt"
with open(filepath) as fp:
lines = fp.read().splitlines()
with open(filepath, "w") as fp:
for line in lines:
ln = "KLBG04 " + line + " " + line[18] # current column order
sp = ln.split() # split at spaces
dt = '/'.join(sp[1].split('/')[::-1]) # reverse date
print(sp[0],dt,sp[3],sp[-1],sp[-2]) # new column order
# print("KLBG04",line,line[18], file=fp)
Try to split() the line first, then print the list in your desired order
from datetime import datetime # use the datetime module to manipulate the date
filepath = "C:/Bg_Log/KLBG04.txt"
with open(filepath) as fp:
lines = fp.read().splitlines()
with open(filepath, "w") as fp:
for line in lines:
date, time, venue = line.split(" ") # split the line up
date = datetime.strptime(date, '%y/%m/%d').strftime('%d/%m/%y') # format your date
print("KLBG04", date, venue, venue[0], time, file=fp) # print in your desired order
Why don't you store the output as a string itself and use the split() method to split the string at each space and then use another split method for the index 1 (The index that will contain the date) and split it again at each / (So that you can then manipulate the date around).
for line in lines:
String output ="KLBG04",line,line[18], file=fp # Rather than printing store the output in a string #
x = output.split(" ")
date_output = x[1].split("/")
# Now you can just manipulate the data around and print how you want to #
Try this
`
for line in lines:
words = line.split() # split every word
date_values = words[0].split('/') # split the word that contains date
#create a dictionary as follows
date_format = ['YY','DD','MM']
date_dict = dict(zip(date_format, date_values))
#now create a new variable with changed format
new_date_format = date_dict['MM'] + '/' + date_dict['DD'] + '/' + date_dict['YY']
print(new_date_format)
#replace the first word [index 0 is having date] with new date format
words[0] = new_date_format
#join all the words to form a new line
new_line = ' '.join(words)
print("KLBG04",new_line,line[18])
`
Related
How can i append a text file to order the contents
I have a text file with about 2000 numbers, they are written to the file in a random order...how can i order them from within python? Any help is appreciated file = open('file.txt', 'w', newline='') s = (f'{item["Num"]}') file.write(s + '\n') file.close() read = open('file.txt', 'a') sorted(read)
You need to: read the contents of the file: open('file.txt', 'r').read(). split the content using a separator: separator.split(contents) convert each item to a number, otherwise, you won't be able to sort numerically: int(item) sort the numbers: sorted(list_of_numbers) Here is a code example, assuming the file is space separated and that the numbers are integers: import re file_contents = open("file.txt", "r").read() # read the contents separator = re.compile(r'\s+', re.MULTILINE) # create a regex separator numbers = [] for i in separator.split(f): # use the separator try: numbers.append(int(i)) # convert to integers and append except ValueError: # if the item is not an integer, continue pass sorted_numbers = sorted(numbers) You can now append the sorted content to another file: with open("toappend.txt", "a") as appendable: appendable.write(" ".join(sorted_numbers)
Data comes out shifted using python
What this code is supposed to do is transfer weird looking .csv files written in one line into a multilined csv import csv import re filenmi = "original.csv" filenmo = "data-out.csv" infile = open(filenmi,'r') outfile = open(filenmo,'w+') for line in infile: print ('read data :',line) line2 = re.sub('[^0-9|^,^.]','',line) line2 = re.sub(',,',',',line2) print ('clean data: ',line2) wordlist = line2.split(",") n=(len(wordlist))/2 print ('num data pairs: ',n) i=0 print ('data paired :') while i < n*2 : pairstr = ','.join( pairlst ) print(' ',i/2+1,' ',pairstr) pairstr = pairstr + '\n' outfile.write( pairstr ) i=i+2 infile.close() outfile.close() What I want this code to do is change a messed up .txt file L,39,100,50.5,83,L,50.5,83 into a normally formatted csv file like the example below 39,100 50.5,83 50.5,83 but my data comes out like this ,39 100,50.5 83,50.5 83, I'm not sure what went wrong or how to fix this. So it would be great if someone could help ::Data Set:: L,39,100,50.5,83,L,50.5,83,57.5,76,L,57.5,76,67,67.5,L,67,67.5,89,54,L,89,54,100.5,49,L,100.5,49,111.5,45.5,L,111.5,45.5,134,42,L,134,42,152.5,44,L,152.5,44,160,46.5,L,160,46.5,168,52,L,168,52,170,56.5,L,170,56.5,162,64.5,L,162,64.5,152.5,70,L,152.5,70,126,85.5,L,126,85.5,113.5,94,L,113.5,94,98,105.5,L,98,105.5,72.5,132,L,72.5,132,64.5,145,L,64.5,145,57.5,165.5,L,57.5,165.5,57,176,L,57,176,63.5,199.5,L,63.5,199.5,69,209,L,69,209,76,216.5,L,76,216.5,83.5,222,L,83.5,222,90.5,224.5,L,90.5,224.5,98,225.5,L,98,225.5,105.5,225,L,105.5,225,115,223,L,115,223,124.5,220,L,124.5,220,133.5,216.5,L,133.5,216.5,142,212,L,142,212,149,207,L,149,207,156.5,201.5,L,156.5,201.5,163.5,195.5,L,163.5,195.5,172.5,185.5,L,172.5,185.5,175,180.5,L,175,180.5,177,173,L,177,173,177.5,154,L,177.5,154,174.5,142.5,L,174.5,142.5,168.5,133.5,L,168.5,133.5,150,131.5,L,150,131.5,135,136.5,L,135,136.5,120.5,144.5,L,120.5,144.5,110.5,154,L,110.5,154,104,161.5,L,104,161.5,99.5,168.5,L,99.5,168.5,98,173,L,98,173,97.5,176,L,97.5,176,99.5,178,L,99.5,178,105,179.5,L,105,179.5,112.5,179,L,112.5,179,132,175.5,L,132,175.5,140.5,175,L,140.5,175,149.5,175,L,149.5,175,157,176.5,L,157,176.5,169.5,181.5,L,169.5,181.5,174,185.5,L,174,185.5,178,206,L,178,206,176.5,214.5,L,176.5,214.5,161,240.5,L,161,240.5,144.5,251,L,144.5,251,134.5,254,L,134.5,254,111.5,254.5,L,111.5,254.5,98,253,L,98,253,71.5,248,L,71.5,248,56,246,
Your code fails because when you tried line2 = re.sub('[^0-9|^,^.]','',line), it outputs to ,39,100,50.5,83,,50.5,83. In that line you are using re to replace any char that isn't a number, dot or comma with nothing or ''. This will remove the L in your input but the second char which is a comma will stay. I've just fixed that and made a little modification on how you create a csv list. The below code works. import csv import re filenmi = "original.csv" filenmo = "data-out.csv" with open(filenmi, 'r') as infile: #get a list of words that must be split for line in infile: #remove any char which isn't a number, dot, or comma line2 = re.sub('[^0-9|^,^.]','',line) #replace ",," with "," line2 = re.sub(',,',',',line2) #remove the first char which is a "," line2 = line2[1:] #get a list of individual values, sep by "," wordlist = line2.split(",") parsed = [] for i,val in enumerate(wordlist): #for every even index, get the word pair try: if i%2 == 0: parstr = wordlist[i] + "," + wordlist[i+1] + '\n' parsed.append(parstr) except: print("Data set needs cleanup\n") with open(filenmo, 'w+') as f: for item in parsed: f.write(item)
Changing a text file and making a bigger text file in python
I have a tab separated text file like these example: infile: chr1 + 1071396 1271396 LOC chr12 + 1101483 1121483 MIR200B I want to divide the difference between columns 3 and 4 in infile into 100 and make 100 rows per row in infile and make a new file named newfile and make the final tab separated file with 6 columns. The first 5 columns would be like infile, the 6th column would be (5th column)_part number (number is 1 to 100). This is the expected output file: expected output: chr1 + 1071396 1073396 LOC LOC_part1 chr1 + 1073396 1075396 LOC LOC_part2 . . . chr1 + 1269396 1271396 LOC LOC_part100 chr12 + 1101483 1101683 MIR200B MIR200B_part1 chr12 + 1101683 1101883 MIR200B MIR200B_part2 . . . chr12 + 1121283 1121483 MIR200B MIR200B_part100 I wrote the following code to get the expected output but it does not return what I expect. file = open('infile.txt', 'rb') cont = [] for line in file: cont.append(line) newfile = [] for i in cont: percent = (i[3]-i[2])/100 for j in percent: newfile.append(i[0], i[1], i[2], i[2]+percent, i[4], i[4]_'part'percent[j]) with open('output.txt', 'w') as f: for i in newfile: for j in i: f.write(i + '\n') Do you know how to fix the problem?
Try this: file = open('infile.txt', 'rb') cont = [] for line in file: cont.append(list(filter(lambda x: not x.isspace(), line.split(' '))) newfile = [] for i in cont: diff= (int(i[3])-int(i[2]))/100 left = i[2] right = i[2] + diff for j in range(100): newfile.append(i[0], i[1], left, right, i[4], i[4]_'part' + j) left = right right = right + diff with open('output.txt', 'w') as f: for i in newfile: for j in i: f.write(i + '\n') In your code for i in cont youre loop over the string and i is a char and not string. To fix that i split the line and remove spaces.
Here are some suggestions: when you open the file, open it as a text file, not a binary file. open('infile.txt','r') now, when you read it line by line, you should strip the newline character at the end by using strip(). Then, you need to split your input text line by tabs into a list of strings, vs a just a long string containing your line, by using split('\t'): line.strip().split('\t') now you have: file = open('infile.txt', 'r') cont = [] for line in file: cont.append(line.strip().split('\t)) now cont is a list of lists, where each list contains your tab separated data. i.e. cont[1][0] = 'chr12'. You will probably able to take it from here.
Others have answered your question with respect to your own code, I thought I would leave my attempt at solving your problem here. import os directory = "C:/Users/DELL/Desktop/" filename = "infile.txt" path = os.path.join(directory, filename) with open(path, "r") as f_in, open(directory+"outfile.txt", "w") as f_out: #open input and output files for line in f_in: contents = line.rstrip().split("\t") #split line into words stored as a string 'contents' diff = (int(contents[3]) - int(contents[2]))/100 for i in range(100): temp = (f"{contents[0]}\t+\t{int(int(contents[2])+ diff*i)}\t{contents[3]}\t{contents[4]}\t{contents[4]}_part{i+1}") f_out.write(temp+"\n") This code doesn't follow python style convention well (excessively long lines, for example) but it works. The line temp = ... uses fstrings to format the output string conveniently, which you could read more about here.
How to insert a string into another string after the first (and only the first) ocurrence of a character
Using Python3 I need to insert the string BA| on each line of a file like this one: AZ|C|DTD E|GS|H But only after the first occurrence of |. So, after inserting the string, the file should look like this: AZ|BA|C|DTD E|BA|GS|H I could try inserting a string into another at a certain position, but the length of the words is not regular. I could insert the string after finding | but that could result in inserting the string more than once in the same line.
Use str.replace(). for line in file: line = line.replace("|", "|BA|", 1) # print line, write it to new file, etc.
You don't need Regex for this. Using str.partition: In [87]: str_ = 'AZ|C|DTD' # Gets the partitions spitted on first `|` # generates 3 element tuple: `('AZ', '|', 'C|DTD')` In [88]: parts = str_.partition('|') # Concatenate after joining first two elements with # empty string, `BA|` and last element In [89]: ''.join(parts[:2]) + 'BA|' + parts[2] Out[89]: 'AZ|BA|C|DTD' For a file: with open('file.txt') as f: for line in f: parts = line.strip().partition('|') new_line = ''.join(parts[:2]) + 'BA|' + parts[2]
line.index('|') gives the first occurrence outfile = open('outfile.txt', 'w') for line in open('inputfile.txt'): split_index = line.index('|') line = line[:split_index] + '|BA' + line[:split_index] outfile.write(line) or do it with split outfile = open('outfile.txt', 'w') for line in open('inputfile.txt'): line = line.split('|') line.insert(1, 'BA') line = '|'.join(line) outfile.write(line)
Creating a list of lists using append function
I have two functions I have created as follows def load_dates(stations): f = open(stations[0] + '.txt', 'r') dates = [] for line in f: dates.append(line.split()[0]) f.close() return dates stations = load_stations("stations.txt") dates = load_dates(stations) and def load_station_data(station): f = open(stations[0] + '.txt', 'r') temp = [] for line in f: x = (line.split()[1]) x = x.strip() temp.append(x) f.close() return temp The first function retrieves dates from a list in a seperate file (hence openfile function) which can be seen to be the first column and the second retrieves the temperatures whilst eliminating the spaces. The second function however goes and gets the temperatures from a specific file (station). Dates Temp 19600101 29.2 19600102 29.4 19600103 29.5 The question I have now is how I could make my new function display the list of data for temp inside a corresponding list for different station files for example there is a list of temperatures that belong to every station(city). I know what I have to do is create an empty list keep iterating through the the stations using a for loop and then add what i iterated throughout the empty lists using the append function. I am new to python and so am struggling with the part said above
Instead of using lists, it's better to use dictionnaries here. #" = {}" create a dictionnary cities = {} #put the files you want to parse in this list file_list = ("city1.txt", "city2.txt") for file_name in file_list: file = open(file_name, 'r') #we don't want the .txt in the name, so we'll cut it #split splits the file_name into a list, removing all dots city_name = file_name.split('.')[0] #"= []" create an empty list cities[city_name] = [] for line in file: #strip will remove all unnecessary spaces values = line.strip().strip(' ') #verify the length, we don't want to use empty lines if len(values) == 2: cities[city_name] = values[1] file.close() I hope this will do what you want Edit: All the cities and the values are now in the dictionnary 'cities', if you want to access a specific city's temps, you can do it like that print(cities["cityname"]) and if you want to read all data, you can print the whole dict for key, temperatures in cities.iteritems(): print("City: " + key) for temperature in temperatures: print("\t" + temperature)
agree with #Morb that a dict sounds more sensible but in answer to your original question you can certainly append a list to a list (as opposed to extend) So, say each line in your file was like: 19600101 29.2 28.4 25.6 30.2 19600102 26.2 24.4 21.6 30.5 you could temp.append(line.split()[1:]) and end up with a list of lists [['29.2', '28.4', '25.6', '30.2'],['26.2', '24.4', '21.6', '30.5']]
I am not sure I get the problem either, but maybe you should: Do only one loop to get both temperature and date: def load_all_data(stations): f = open(stations[0] + '.txt') dates, temps = [], [] for line in f.readlines(): dates.append(line.split()[0]) temps.append(line.split()[1].strip()) f.close() return dates, temps use list comprehension: def load_all_data(stations): f = open(stations[0] + '.txt'): dates = [line.split()[0] for line in f.readlines()] temps = [line.split()[1].split() for line in f.readlines()] f.close() return dates, temps use context manager for open as suggested by cool_jesus: def load_data_all(stations): with open(stations[0] + '.txt') as f: dates = [line.split()[0] for line in f.readlines()] temps = [line.split()[1].split() for line in f.readlines()] return dates, temps do a loop on stations : def load_data_all(stations): data_stations = [] for station in stations: with open(station + '.txt') as f: dates = [line.split()[0] for line in f.readlines()] temps = [line.split()[1].split() for line in f.readlines()] data_stations.append((temps, dates)) return data_stations