Related
With the binomial tree, I am trying to make a nested list by level.
Here is what I've done:
arrows = [("modifier","adverb1"),("noun","modifier"),("verb","noun"),("verb","adverb2"),("root","verb")]
dom_list = list(dict.fromkeys([a[0] for a in arrows]))
dom_list.pop()
depth = []
for a in dom_list:
MC = [v[1] for v in arrows if v[0] == a]
depth.append(MC)
con = []
for i in range(len(dom_list)):
c = [dom_list[i], depth[i]]
con.append(c)
The code lines result in:
[['modifier', ['adverb1']],
['noun', ['modifier']],
['verb', ['noun', 'adverb2']]]
What I want to make with this input is a nested list by level like:
['root', ['verb', ['noun', ['modifier', ['adverb1']], 'adverb2']]]
Is there any way to do this? Any help would be greatly appreciated.
Certainly not my most optimized code but it works on the data you provided as I understood your problem. Maybe this helps you in figuring out a more elegant solution:
def flat_gen(x):
# Flattens a nested list, taken from here but forgot the post
def iselement(e):
return not(isinstance(e, collections.Iterable) and not isinstance(e, str))
for el in x:
if iselement(el):
yield el
else:
yield from flat_gen(el)
tmp = [list(x) for x in arrows]
tmp_new = []
for a in tmp:
for b in tmp:
if b[1] == a[0]:
b[1] = a
tmp_new.append(b)
else:
tmp_new.append(b)
# Find longest chain
idx_longest = [len(list(flat_gen(e))) for e in tmp_new].index(max([len(list(flat_gen(e))) for e in tmp_new]))
longest = tmp_new[idx_longest]
# Identify missing elements in longest chain
missing = set(flat_gen(arrows)).difference(list(set(flat_gen(longest))))
for m in missing:
for a in arrows:
if m in a:
outer = [""]
i = 0
outer[i] = longest
found = False
while not found:
print(a[0])
print(outer[i])
inner = outer[i][1]
if outer[i][0] == a[0]:
# Found position where to insert
outer[i].append(a[1])
found = True
else:
# Continue searching
outer.append(inner)
i += 1
a = list(a)
a[1] = outer[i]
print(a)
Returns:
['root', ['verb', ['noun', ['modifier', 'adverb1']], 'adverb2']]
I have an input list like [1,2,2,1,6] the task in hand is to sort by the frequency. I have solved this question and am getting the output as [1,2,6].
But the caveat is that if two of the numbers have the same count like count(1) == count(2). So the desired output is [2,1,6]
then in the output array, 2 must come before 1 as 2 > 1.
So for the input [1,1,2,2,3,3] the output should be [3,2,1]. The counts are the same so they got sorted by their actual values.
This is what I did
input format:
number of Test cases
The list input.
def fun(l):
d = {}
for i in l:
if i in d:
d[i] += 1
else:
d[i] = 1
d1 = sorted(d,key = lambda k: d[k], reverse=True)
return d1
try:
test = int(input())
ans = []
while test:
l = [int(x) for x in input().split()]
ans.append(fun(l))
test -= 1
for i in ans:
for j in i:
print(j, end = " ")
print()
except:
pass
I think that this can help you. I added reverse parameter that is setting by default to True, because that gives the solution, but I wrote in the code where you can edit this as you may.
Here is the code:
from collections import defaultdict # To use a dictionary, but initialized with a default value
def fun(l, reverse = True):
d = defaultdict(int)
# Add count
for i in l:
d[i] += 1
# Create a dictionary where keys are values
new_d = defaultdict(list)
for key,value in d.items():
new_d[value].append(key)
# Get frequencies
list_freq = list(new_d.keys())
list_freq.sort(reverse = reverse) #YOU CAN CHANGE THIS
list_freq
# Add numbers in decreasing order by frequency
# If two integers have the same frequency, the greater number goes first
ordered_list = []
for number in list_freq:
values_number = new_d[number]
values_number.sort(reverse = reverse) # YOU CAN CHANGE THIS
ordered_list.extend(values_number)
return ordered_list
Examples:
l = [1,2,2,1,6]
fun(l)
#Output [2,1,6]
I hope this can help you!
I have a list like
mylist = [75,75,76,77,78,79,154,155,154,156,260,262,263,550,551,551,552]
i need to remove numbers are close to each other by maxumim four number like:
num-4 <= x <= num +4
the list i need at the end should be like :
list = [75,154,260,550]
or
list = [76,156,263,551]
doesn't really matter which number to stay in the list , only one of those which are close.
i tried this which gave me :
for i in range(len(l)):
for j in range(len(l)):
if i==j or i==j+1 or i==j+2 or i == j+3:
pp= l.pop(j)
print(pp)
print(l)
IndexError: pop index out of range
and this one which doesn't work the way i need:
for q in li:
for w in li:
print(q,'////',w)
if q == w or q ==w+1 or q==w+2 or q==w+3:
rem = li.remove(w)
thanks
The below uses groupby to identify runs from the iterable that start with a value start and contain values that differ from start by no more than 4. We then collect all of those start values into a list.
from itertools import groupby
def runs(difference=4):
start = None
def inner(n):
nonlocal start
if start is None:
start = n
elif abs(start-n) > difference:
start = n
return start
return inner
print([next(g) for k, g in groupby(mylist, runs())])
# [75, 154, 260, 550]
This assumes that the input data is already sorted. If it's not, you'll have to sort it: groupby(sorted(mylist), runs()).
You can accomplish this using a set or list, you don't need a dict.
usedValues = set()
newList = []
for v in myList:
if v not in usedValues:
newList.append(v)
for lv in range(v - 4, v + 5):
usedValues.add(lv)
print(newList)
This method stores all values within 4 of every value you've seen so far. When you look at a new value from myList, you only need to check if you've seen something in it's ballpark before by checking usedValues.
I have a nested list comprehension which has created a list of six lists of ~29,000 items. I'm trying to parse this list of final data, and create six separate dictionaries from it. Right now the code is very unpythonic, I need the right statement to properly accomplish the following:
1.) Create six dictionaries from a single statement.
2.) Scale to any length list, i.e., not hardcoding a counter shown as is.
I've run into multiple issues, and have tried the following:
1.) Using while loops
2.) Using break statements, will break out of the inner most loop, but then does not properly create other dictionaries. Also break statements set by a binary switch.
3.) if, else conditions for n number of indices, indices iterate from 1-29,000, then repeat.
Note the ellipses designate code omitted for brevity.
# Parse csv files for samples, creating a dictionary of key, value pairs and multiple lists.
with open('genes_1') as f:
cread_1 = list(csv.reader(f, delimiter = '\t'))
sample_1_values = [j for i, j in (sorted([x for x in {i: float(j)
for i, j in cread_1}.items()], key = lambda v: v[1]))]
sample_1_genes = [i for i, j in (sorted([x for x in {i: float(j)
for i, j in cread_1}.items()], key = lambda v: v[1]))]
...
# Compute row means.
mean_values = []
for i, (a, b, c, d, e, f) in enumerate(zip(sample_1_values, sample_2_values, sample_3_values, sample_4_values, sample_5_values, sample_6_values)):
mean_values.append((a + b + c + d + e + f)/6)
# Provide proper gene names for mean values and replace original data values by corresponding means.
sample_genes_list = [i for i in sample_1_genes, sample_2_genes, sample_3_genes, sample_4_genes, sample_5_genes, sample_6_genes]
sample_final_list = [sorted(zip(sg, mean_values)) for sg in sample_genes_list]
# Create multiple dictionaries from normalized values for each dataset.
class BreakIt(Exception): pass
try:
count = 1
for index, items in enumerate(sample_final_list):
sample_1_dict_normalized = {}
for index, (genes, values) in enumerate(items):
sample_1_dict_normalized[genes] = values
count = count + 1
if count == 29595:
raise BreakIt
except BreakIt:
pass
...
try:
count = 1
for index, items in enumerate(sample_final_list):
sample_6_dict_normalized = {}
for index, (genes, values) in enumerate(items):
if count > 147975:
sample_6_dict_normalized[genes] = values
count = count + 1
if count == 177570:
raise BreakIt
except BreakIt:
pass
# Pull expression values to qualify overexpressed proteins.
print 'ERG values:'
print 'Sample 1:', round(sample_1_dict_normalized.get('ERG'), 3)
print 'Sample 6:', round(sample_6_dict_normalized.get('ERG'), 3)
Your code is too long for me to give exact answer. I will answer very generally.
First, you are using enumerate for no reason. if you don't need both index and value, you probably don't need enumerate.
This part:
with open('genes.csv') as f:
cread_1 = list(csv.reader(f, delimiter = '\t'))
sample_1_dict = {i: float(j) for i, j in cread_1}
sample_1_list = [x for x in sample_1_dict.items()]
sample_1_values_sorted = sorted(sample_1_list, key=lambda expvalues: expvalues[1])
sample_1_genes = [i for i, j in sample_1_values_sorted]
sample_1_values = [j for i, j in sample_1_values_sorted]
sample_1_graph_raw = [float(j) for i, j in cread_1]
should be (a) using a list named samples and (b) much shorter, since you don't really need to extract all this information from sample_1_dict and move it around right now. It can be something like:
samples = [None] * 6
for k in range(6):
with open('genes.csv') as f: #but something specific to k
cread = list(csv.reader(f, delimiter = '\t'))
samples[k] = {i: float(j) for i, j in cread}
after that, calculating the sum and mean will be way more natural.
In this part:
class BreakIt(Exception): pass
try:
count = 1
for index, items in enumerate(sample_final_list):
sample_1_dict_normalized = {}
for index, (genes, values) in enumerate(items):
sample_1_dict_normalized[genes] = values
count = count + 1
if count == 29595:
raise BreakIt
except BreakIt:
pass
you should be (a) iterating of the samples list mentioned earlier, and (b) not using count at all, since you can iterate naturally over samples or sample[i].list or something like that.
Your code has several problems. You should put your code in functions that preferably do one thing each. Than you can call a function for each sample without repeating the same code six times (I assume that is what the ellipsis is hiding.). Give each function a self-describing name and a doc string that explains what it does. There is quite a bit unnecessary code. Some of this might become obvious once you have it in functions. Since functions take arguments you can hand in your 29595, for example.
I have an 2 dimensional array. Each of the row vectors, in this case, is considered a quantity of interest. What I want to do is return all the rows that appear exactly once as one array, and all the rows that appear more than once as a second array.
For example, if the array was:
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
I would like to return two arrays:
nonsingles=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [5,1,6,0]]
singles= [[3,2,1,0], [4,4,1,0]]
It is important that the order stay preserved. The code I have written to do this is as follows:
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
arrayhash=map(tuple, array)
for x in arrayhash:
if (arrayhash.count(x)==1):
singles.append(x)
if (arrayhash.count(x)>1):
nonsingles.append(x)
nonsingles=array(nonsingles)
singles=array(singles)
return {'singles':singles, 'nonsingles':nonsingles}
Now, I am happy to say that this works, but unhappy to say that it is extremely slow, as a typical array i have is 30000(rows)x10 elements/row=300000 elements. Can anyone give me some tips about how to speed this up?? I apologize if this question is very simple, I am new to Python. Also, I am using Numpy/Scipy with Python 2.7, if that is any help.
In Python 2.7 or above, you can use collections.Counter to count the number of occurrences:
def unique_items(iterable):
tuples = map(tuple, iterable)
counts = collections.Counter(tuples)
unique = []
non_unique = []
for t in tuples:
if counts[t] == 1:
unique.append(t)
else:
non_unique.append(t)
return unique, non_unique
I think your problem is that you are doing an in test on a list. This has O(n) performance.
It should be faster to build a dict and then use that to figure out what to do with each row.
EDIT: The code had an unnecessary enumerate() in it; I stripped it out.
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
return {'singles':singles, 'nonsingles':nonsingles}
Here's a version that only returns unique rows:
from collections import defaultdict
def singles_nonsingles(array):
#returns the elements that occur only once, and the elements
#that occur more than once in the array
singles=[]
nonsingles=[]
d = defaultdict(int)
already_seen = set()
t = [tuple(row) for row in array]
for row in t:
d[row] += 1
for row in t:
if row in already_seen:
continue
if d[row] == 1:
singles.append(row)
else:
nonsingles.append(row)
already_seen.add(row)
return {'singles':singles, 'nonsingles':nonsingles}
a=[[1,1,1,0], [1,1,1,0], [5,1,6,0], [3,2,1,0], [4,4,1,0], [5,1,6,0]]
x = singles_nonsingles(a)
print("Array: " + str(a))
print(x)
The first return only the list of the single/no single arrays without repetitions, the second with repetitions
def comp (multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] > 1:
no_singles.append(list(k))
elif res[k] == 1:
singles.append(list(k))
return singles, no_singles
def count_w_repetitions(multi):
from collections import defaultdict
res = defaultdict(int)
for vect in multi:
res[tuple(vect)] += 1
singles = []
no_singles = []
for k in res:
if res[k] == 1:
singles.append(list(k))
else:
for i in xrange(res[k]):
no_singles.append(list(k))
return singles, no_singles
from itertools import compress,imap
def has_all_unique(a):
return len(a) == len(frozenset(a))
uniq = map( has_all_unique,a)
singles = list(compress(a,uniq))
notuniq = imap(lambda x: not x,uniq)
nonsingles = list(compress(a,notuniq))