I have to create a module that reads in an RNA-seq counts table (counts_table.csv) line by line, computes the average number of counts across each gene_ID, and writes the original table contents, in addition to the average counts in a new column, to a new file. My code cannot contain any import statements. This is what I have so far for my function:
def count_average(input_file, output_file):
try:
fin = open(input_file, "w")
fout = open(output_file, 'w')
except:
return -1
with fin, fout:
sums = []
averages = []
for i in range(len(fin)):
sums.append(0)
for n in range(len(fin[i])):
sums[i] += fin[i][n]
averages[i] += sums[i]/[n]
for line in fin:
fout.write(line)
fout.append(averages)
I know I am successfully opening the files and I can append all of the information of the infile to the outfile however I have two things I am stuck on.
I keep getting TypeError: object of type '_io.TextIOWrapper' has no len()
I am not sure if my code will properly append the averages and I don't know how to make a new average header on top of those
Ultimately I want the final result to look something like this:
Input_to_Output
The problem in your code is that you are opening the file in wrong way. Instead of writing this:
fin = open(input_file, "w")
Write this:
fin = open(input_file, "r")
would be much easier to provide a working example if you had posted your actual data.
The problem is this line:
for i in range(len(fin)):
and this line:
for n in range(len(fin[i])):
To iterate over every line/row in your file, use something like this instead:
for row in fin:
and then:
for cell in row:
EDIT:
To still get an index use for index, row in enumerate(fin):
I'm trying to merge both json files but I'm trying to append timestamp from file2 to corresponding frame number in file1.please guide.
JSON_FILE1
{"frameNumber":1,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classifications":[]}]}
JSON_FILE2
{
"frame1": "0:0:0:66",
"frame2": "0:0:0:100",
"frame3": "0:0:0:133",
"frame4": "0:0:0:166",
"frame5": "0:0:0:200"
}
expected output:
{"frameNumber":1,"frame1": "0:0:0:66",,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":true,"bbox":{"top":157,"left":581,"height":390,"width":297},"classifications":[]}]}
{"frameNumber":2, "frame2": "0:0:0:10,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.36,"width":297.16},"classifications":[]}]}
{"frameNumber":3,"frame3": "0:0:0:133,"classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":390.72,"width":297.32},"classifications":[]}]}
{"frameNumber":4,"frame4": "0:0:0:166","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.08,"width":297.48},"classifications":[]}]}
{"frameNumber":5,"frame5": "0:0:0:200","classifications":[],"objects":[{"featureId":"ckotybs4v00033b68edh8a6o5","schemaId":"ckoto8fzm16gj0y7uesrd0nzt","title":"Person 1","value":"person_1","color":"#1CE6FF","keyframe":false,"bbox":{"top":157,"left":581,"height":391.44,"width":297.64},"classification
I tried this way but I am unable to achieve.
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f,"rb") as infile:
result.append(json.load(infile))
with open("merged_file.json","wb") as outfile:
json.dump(result,outfile)
A correct .json needs a pair of [] and than you could json.load it, iterate over ever line and do the same like below but anyway:
The easiest solution is turn every line in a dict, if the framenumber matches add the timestamp and write it back.
def fuse(file1, file2, nTargetPath):
with open(nTargetPath, "wb") as tTargetFile:
with open(file1, "rb") as tSourceFileA:
for tLineA in tSourceFileA.readlines():
tDictA = json.loads(tLineA) #loads dict from a string
tKey = "frame"+tDictA["frameNumber"] #searching the correct entry but why not name this timestampX
with open(file2, "rb") as tSourceFileB:
for tLineB in tSourceFileB.readlines():
tDictB = json.loads(tLineB )
if tKey in tDictB:
tDictA[tKey] = tDictB[tKey]
break #cause there is only one timestamp
tTargetFile.write(json.dumps(tDictA)+'\n')
This code cann easily updated by improve the file accessing for example when you know the key for the timestamp in file2 is everytime in the same row as in file1 and so on.
As was pointed out, one file is ndjson and the other file is json. You need to implement some logic to add the json to the ndjson
# https://pypi.org/project/ndjson/
# pip install ndjson
import ndjson
import json
with open('path/to/file/im_a_ndjson.ndjson') as infile:
ndjson_object = ndjson.load(infile)
with open('path/to/file/json_file2.json') as infile:
dict_object = json.load(infile)
print(type(ndjson_object[0]['frameNumber']))
# output: <class 'int'>
for key in dict_object:
# int needed as you can see above
framenumber = int(key.strip('frame'))
# find the matching ndjson object
for ndjs in ndjson_object:
if ndjs['frameNumber'] == framenumber:
# add the key/value pair
ndjs[key] = dict_object[key]
# we can break as we've found it
break
with open('path/to/file/new_ndjson.ndjson', 'w') as outfile:
ndjson.dump(ndjson_object, outfile)
I have a large (50,000+ lines) file that is a collection of json outputs from another application that i would like to read in as json and perform some analysis on. The issue is that while a single entry is valid json, I can't read the entire file in as json because each entry isn't connected.
Snippet:
{"action":"Iops","idg":"2214472975167211","idx":537994,"system":"Qos","utc":"2019-07-02T11:45:09.606765Z","ver":"1.1","xQosIops":{"ActualReadOps":{"avg":0,"ct":60,"max":0,"min":0,"std":0,"tmax":29880,"tmin":29880}}}
{"action":"Latency","idg":"2214472975167211","idx":537995,"system":"Qos","utc":"2019-07-02T11:45:09.606829Z","ver":"1.1","xQosLatency":{"AverageLocalWriteLatencyUS":{"avg":0,"ct":60,"max":0,"min":0,"std":0,"tmax":29880,"tmin":29880}}}
Individually they are both valid, but what I would like to achieve is dynamically connect all of these into a single json object. It is important to note that these json responses could span multiple lines, so I can't just read in line by line. Any help would be appreciated.
You can load the contents of the file with vanilla Python (not using the json package), then use json to parse each individual line.
Example:
import json
data_fp = "/path/to/data.txt"
with open(data_fp, "r") as f:
lines = f.readlines()
# now, parse each line as a JSON string
json_object = [json.loads(l) for l in line]
# optional: dump as a JSON file
with open("/path/to/output.json", "w") as f:
json.dump(json_object, f)
Edit: if each dictionary is not necessarily limited to a single line, you could try parsing JSON for a variable number of lines until it succeeds (continuing from above example):
start_line = 0
end_line = 1
json_object = []
while end_line <= len(lines):
try:
data = json.loads("".join(lines[start_line:end_line]))
except:
end_line += 1
else:
json_object.append(data)
start_line = end_line
end_line = start_line + 1
If each line is valid JSON, you could wrap this in a script that read them in individually, and appended them to a list. Something like:
import json
data = []
with open("fakejson.txt") as data_f:
for line in data_f:
data.append(json.loads(line)
You can create a function that recognizes a json, by looking for pairs of open-close {}. See below:
def isjson(t):
for i in range(len(t)):
if t[i]=='{':
s=t[i]
c=1
n=1
while c>0:
s+=t[i+n]
if t[i+n]=='{':
c+=1
elif t[i+n]=='}':
c-=1
n+=1
return (s, i+n)
You can now load your entire file as text with the following:
with open('yourfile.txt') as f:
t=f.read()
And extact all jsons, using the above function:
d={}
n=1
while True:
d[n]=isjson(t)[0]
t=t[isjson(t)[1]+1:]
n+=1
if t.count('{')==0:
break
My Script is reading data from another file.
I require the data as float, not as string and I am searching for an elegant/pythonic way to combine float() with the last line instead of iterating over the entire list to change the data or changing it when I need it:
data = []
with open(os.path.join(path, file), "r") as f:
searchlines = f.readlines()
for i, line in enumerate(searchlines):
data.append(line.replace('[', ' ').replace(']', ' ').split())
So far this will save the data from the file in a list in a list as string.
How to combine the last line with float()?
Here is an example of the data before reading it:
[[ 563.15 1673.97 3078.41]
[ 563.15 1066.4 26617.7]
[ 563.212 778.931 59356.1]
Use map
Ex:
data.append(map(float, line.strip('[]').split()))
If python3
data.append(list(map(float, line.strip('[]').split())))
Do you have numpy installed?
Because in that case you can do:
import numpy as np
with open(os.path.join(path, file), "r") as f:
data = np.array([line.strip('[]').split() for line in f],dtype=float)
it gives you a matrix in float format. Of course, this assumes that each line has the same number of values in it
I have a problem that I can't solve with python, it is probably very stupid but I didn't manage to find the solution by myself.
I have a .json file where the results of a simulation are stored. The result is stored as a series of dictionaries like
{"F_t_in_max": 709.1800264942982, "F_t_out_max": 3333.1574129603068, "P_elec_max": 0.87088836042046958, "beta_max": 0.38091242406098391, "r0_max": 187.55175182942901, "r1_max": 1354.8636763521174, " speed ": 8}
{"F_t_in_max": 525.61428305710433, "F_t_out_max": 2965.0538075438467, "P_elec_max": 0.80977406754203796, "beta_max": 0.59471606595464666, "r0_max": 241.25371753877008, "r1_max": 688.61786996066826, " speed ": 9}
{"F_t_in_max": 453.71124051199763, "F_t_out_max": 2630.1763649193008, "P_elec_max": 0.64268078173342935, "beta_max": 1.0352896471221695, "r0_max": 249.32706230502498, "r1_max": 709.11415981343885, " speed ": 10}
I would like to open the file and and access the values like to plot "r0_max" as function of "speed" but I can't open unless there is only one dictionary.
I use
with open('./results/rigid_wing_opt.json') as data_file:
data = json.load(data_file)
but When the file contains more than one dictionary I get the error
ValueError: Extra data: line 5 column 1 - line 6 column 1 (char 217 - 431)
If your input data is exactly as provided then you should be able to interpret each individual dictionary using json.load. If each dictionary is on its own line then this should be sufficient:
with open('filename', 'r') as handle:
json_data = [json.loads(line) for line in handle]
I would recommend reading the file line-by-line and convert each line independently to a dictionary.
You can place each line into a list with the following code:
import ast
# Read all lines into a list
with open(fname) as f:
content = f.readlines()
# Convert each list item to a dict
content = [ ast.literal_eval( line ) for line in content ]
Or an even shorter version performing the list comprehension on the same line:
import ast
# Read all lines into a list
with open(fname) as f:
content = [ ast.literal_eval( l ) for l in f.readlines() ]
{...} {...} is not proper json. It is two json objects separated by a space. Unless you can change the format of the input file to correct this, I'd suggest you try something a little different. If the data is a simple as in your example, then you could do something like this:
with open('filename', 'r') as handle:
text_data = handle.read()
text_data = '[' + re.sub(r'\}\s\{', '},{', text_data) + ']'
json_data = json.loads(text_data)
This should work even if your dictionaries are not on separate lines.
That is not valid JSON. You can't have multiple obje at the top level, without surrounding them by a list and inserting commas between them.