I am having a problem with a loop in python to get the desired result. Here is my issue.
First, I have 1 text file: urls.txt. This file has multiple URLs.
Second, I have multiple json files. Lets say there are 5 json files.
I want to process first n lines of the urls.txt file with 1.json file and then next n lines of urls.txt file with 2.json file and so on. After all the 5 json files are used, I want to start from 1.json file again and repeat the process until all the lines in urls.txt files are processed.
In my case I wanted to rotate the json files after each 100 lines of urls.txt
I have written some code to do that but unfortunately, I am not able to figure out how to repeat the operation once all the json files are used.
batch_size = 100
JSON_KEY_FILE_PATH = "json_files/"
JSON_FILENAME = '*.json'
json_file_list = glob.glob(JSON_KEY_FILE_PATH + JSON_FILENAME, recursive=True)
itr_length = len(json_file_list)
from itertools import count
def UrlCall(URL_FILE):
with open(URL_FILE, mode='r') as urllist:
for j in range(0,itr_length):
for i in count():
line = urllist.readlines(20)
print ('===>' + str(i) + '===>' + str(line))
if (i/batch_size).is_integer() and line != '' and i != 0 and j != itr_length:
# #define the json message
print ("Value of J" + str(j))
print ("JSON FILE IN USE:" + str(json_file_list[j]))
if j == itr_length-1:
print ("====>RESTARTING JSON EXECUTION")
time.sleep(10)
print ("Value of J" + str(j))
print ('===>' + str(i) + '===>' + str(line))
print ("JSON FILE IN USE:" + str(json_file_list[j]))
return
break
This code is existing after after all the json files are used. But I want to restart using the json files range again and process the next n line in urls.txt file.
You can use itertools.cycle(json_file_list) to loop through the list repeatedly.
You can use one of the techniques in What is the most "pythonic" way to iterate over a list in chunks? to iterate over the file in groups of N lines.
Then you can zip them to process them together.
from itertools import cycle, zip_longest
def grouper(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
def url_call(url_file, json_file_list):
with open(url_file) as f:
for json_file, lines in zip(cycle(json_file_list), grouper(f, batch_size)):
json_data = json.load(open(json_file))
for line in lines:
if line:
# do something with line and json_data
Related
I have 5 input files. I'm reading the targets & arrays from the 2nd & 3rd lines. I have 2sum function. I need to output the function's returned value(s) and output them to 5 output files.
I know my functions correct. I print it out just fine.
I know I'm reading 5 input files & creating & writing to 5 output files.
What I can't figure out is how to write my return value(s) from the twoSum function INTO the output function.
def twoSum(arr, target):
for i in range(len(arr)):
for j in range(i, len(arr)):
curr = arr[i] + arr[j]
if arr[i]*2 == target:
return [i, i]
return answer
if curr == target:
return [i, j]
Read 5 files
inPrefix = "in"
outPrefix = "out"
for i in range(1, 6):
inFile = inPrefix + str(i) + ".txt"
with open(inFile, 'r') as f:
fileLines = f.readlines()
target = fileLines[1]
arr = fileLines[2]
how do i get write twoSum's value to output file?
???????
Output to 5 files
outFile = outPrefix + str(i) + ".txt"
with open(outFile, 'a') as f:
f.write(target) #just a test to make sure I'm writing successfully
f.write(arr) #just a test to make sure I'm writing successfully
Two things that come to mind:
open the file in wt mode (Write Text).
processedOutput appears to be a list. When you write to a file, you need write a string. Are you wanting a CSV style output of those values ? or just a line of spaced out values ? The simplest way here, would be something like: " ".join(processedOutput). That will separate all the items in your processedOut list by a string.
I have a 1000 files in a folder named md_1.mdp, md_2.mdp, ..., md_1000.mdp and the 186th line of every file reads:
gen_seed = 35086
This value is different in every file and it is what I want to extract and print as the output.
I have written the following code but it is not displaying any output.
import numpy as np
idx = np.arange(1,1000)
for i in idx:
f = open('/home/abc/xyz/mdp_200/md_'+str(i)+'.mdp','r')
l = f.readlines()
l = l[185].split(" ")
flag = 0
for k in l:
if flag==1:
if k!='':
print(k)
flag=0
if k=="t=":
flag=1
f.close()
What should I add to this program so that it prints the required value for each file one by one in the order of md_1.mdp, md_2.mdp and so on?
you can use:
for i in range(1, 1001):
with open('/home/abc/xyz/mdp_200/md_'+ str(i)+ '.mdp') as fp:
l = fp.readlines()
print(l[185].split('=')[-1].strip())
or you can use linecache.getline:
import linecache
for i in range(1, 1001):
file = f'/home/abc/xyz/mdp_200/md_{i}.mdp'
line = linecache.getline(file, 185)
print(line.split('=')[-1].strip())
after you get your line the split is done by = character
I know it's not completely finished, but I'm very confused as how to format the save inventory function so it prints like the original text file. During the add_item function, it shows that the item has been added to the lists. But when going to write nothing is there or updated.
Example of how the text file needs to look
def save_inventory(inventoryFile, descriptionArray, quantityArray, priceArray, intrecords):
outfile = open(inventoryFile, "w")
with open('inventory1.txt', 'r') as f:
count = -1
for line in f:
count+=1
if count % 3 == 0: #this is the remainder operator
outfile.write(descriptionArray)
print(descriptionArray)
with open('inventory1.txt', 'r') as f:
count = -2
for line in f:
count+=1
if count % 3 == 0: #this is the remainder operator
outfile.write(str(quantityArray))
print(quantityArray)
with open('inventory1.txt', 'r') as f:
count = -3
for line in f:
count+=1
if count % 3 == 0: #this is the remainder operator
outfile.write(str(priceArray))
print(priceArray)
outfile.close()
You are only writing to the file when you have read a line. If your text file is empty you will never write to the file.
What I would do is zip the lists together and loop through them. Then write three lines to the file for each pass through the loop. You can print a carriage return with '\n'
with open(inventoryFile, 'w') as f:
for d, q, p in zip(descriptionArray, quantityArray, priceArray):
f.write('%s\n%s\n%s\n' % (d, q, p))
I'm trying to split up a very large text file into multiple smaller ones. When I run the code below, the first created file is correct. Everything after that just contains the 'INSERT INTO ...' string and nothing else. Thanks in advance
import math
interval = 100000
with open('my-big-file','r') as c:
for i, l in enumerate(c):
pass
length = i + 1
numOfFiles = int(math.ceil(length / interval))
with open('my-big-file','r') as c:
for j in range(0, numOfFiles):
with open('my-smaller-file_{}.sql'.format(j),'w') as n:
print >> n, 'INSERT INTO codes (code, some-field, some-other-field) VALUES'
for i, line in enumerate(c):
if i >= j * interval and i < (j + 1) * interval:
line = line.rstrip()
if not line: continue
print >> n, '(%s,'something','something else'),' % (line)
else:
break
You don't need to count the number of lines before iterating the file, you can directly write to a new file whenever you reach the number of given lines:
#!/usr/bin/env python
def split(fn, num=1000, suffix="_%03d"):
import os
full, ext = os.path.splitext(fn)
with open(fn, 'r') as f:
for i, l in enumerate(f):
if i%num == 0:
try:
out.close()
except UnboundLocalError:
pass
out = open(full+suffix%(i/num)+ext, 'w')
out.write(l)
else:
out.close()
if __name__ == '__main__':
import sys
split(sys.argv[1])
You can run this from the command line. Though probably the split command is more useful, since it supports a multitude of options.
It's also possible to rewrite this code to also use with for the file(s) being written to, but that's another topic.
I need to get a certain part of my file and write it in new file. Keep the rest in a new file. So I will have 3 files . 1) Original file 2)Selected lines 3) The rest . I have a code that works for taking the first selection. I'm having problem to get the next selection and so on. Here's my code :
counter=0
with open('1','r') as file1: #open raw data
with open('2','w') as file3:
with open('3','w') as file_out:
for i in file1:
if counter <10: ############# Next I need to get line 10 to 20 followed by 20 to 30
file_out.write(i)
else:
file3.write(i)
counter += 1
How can I change my code so that I can get the next selection?
Does this make what you want?
def split_on_crosses(infile, chunk_size):
head_num = 1 # counter for chunks
head_file = open('1-head.txt', 'w') # outport to first head file
tails = [] # outports to tail files
with open(infile,'r') as inport: #open raw data
for i, line in enumerate(inport, start=1):
head_file.write(line)
for t in tails: # write to all tail files
t.write(line)
if i % chunk_size == 0: # boundary of chunk is reached
tails.append(open('%s-tail.txt' % head_num, 'w')) # add one tail file
head_num += 1
head_file = open('%s-head.txt' % head_num, 'w') # switch to next head file
split_on_crosses('infile.txt', 10)
This should do what you want, written in Python3.x.
#read file1, get the lines as an array, length of said array, and close it.
alpha=open('alpha.txt','r')
alphaLine=alpha.readlines()
alphaLength=len(alphaLine)
alpha.close()
#lines above 10 and below 20 are sent to beta, while 10 to 20 are sent to gamma.
beta=open('beta.txt','w')
gamma=open('gamma.txt','w')
for i in range(alphaLength):
if i<9:
beta.write(alphaLine[i])
elif i<20:
gamma.write(alphaLine[i])
else:
beta.write(alphaLine[i])
beta.close()
gamma.close()
For speed, I will assume the file is small enough to hold in memory (rather than re-reading the file each time):
from itertools import islice
BLOCKSZ = 10 # lines per chunk
# file names
INPUT = "raw_data.txt"
OUTPUT_LINES = lambda a, b: "data_lines_{}_to_{}.txt" .format(a, b-1)
OUTPUT_EXCEPT = lambda a, b: "data_except_{}_to_{}.txt".format(a, b-1)
def main():
# read file as list of lines
with open(INPUT) as inf:
data = list(inf)
num_blocks = (len(data) + BLOCKSZ - 1) // BLOCKSZ
for block in range(num_blocks):
# calculate start and end lines for this chunk
start = block * BLOCKSZ
end = (block + 1) * BLOCKSZ
# write out [start:end]
with open(OUTPUT_RANGE(start, end), "w") as outf:
for line in islice(data, start, end):
outf.write(line)
# write out [:start] + [end:]
with open(OUTPUT_EXCEPT(start, end), "w") as outf:
for line in islice(data, start):
outf.write(line)
for line in islice(data, end - start):
pass
for line in inf:
outf.write(line)
if __name__=="__main__":
main()
Edit: I just realized I made a mistake in my line-slicing for OUTPUT_EXCEPT (thinking of islice offsets as absolute not relative); this is now fixed.