append dictionaries from csv file - python

The following code applies one update to my project.
tagPath = ["Package_PLC/Tags/CCN_CNV01_MX001_A_FLT"]
alarmConfig = {"BLD_CHN01_VS001_A_FLT_C":[["enabled","Value","0"]]}
system.tag.editAlarmConfig(tagPaths, alarmConfig)
I need to do this hundreds of times.
I am trying to build tagPath and alarmConfig dictionaries from a csv file.
Sample csv:
Equipment,Item Name,Alarm Tag,Alarm Name,Cluster Name,Category,Alarm Desc,Delay,Help,Comment,Variable Tag A,Variable Tag B,Custom 1,Custom 2,Custom 3,Custom 4,Custom 5,Custom 6,Custom 7,Custom 8,Paging,Paging Group,Area,Privilege,Historize,Project,SEQUENCE,TAGGENLINK,EDITCODE,LINKED
"","","BLD_CHN01_VS001_A_FLT_C","BLD_CHN01_VS001_A_FLT_C","","","Catch-up Conveyor / Chain Comms Fault","00:00:00","","BLD_CHN01_VS001_A_FLT_C","BLD_CHN01_VS001_A_FLT_C","KFS_ZNE02_WRM","STUN","","","","","","","","","","1","","","","","","",""
"","","BLD_CHN01_VS001_A_FLT_V","BLD_CHN01_VS001_A_FLT_V","","","Catch-up Conveyor / Chain VSD Fault","00:00:00","","BLD_CHN01_VS001_A_FLT_V","BLD_CHN01_VS001_A_FLT_V","","STUN","","","","","","","","","","1","","","","","","",""
"","","BLD_CHN01_VS001_S_HTY","BLD_CHN01_VS001_S_HTY","","","Catch-up Conveyor / Chain Cicuit Breaker","00:00:00","","BLD_CHN01_VS001_S_HTY","NOT BLD_CHN01_VS001_S_HTY","KFS_ZNE02_WRM","STUN","","","","","","","","","","1","","","","","","",""
This is what I have so far:
import system
import csv
path = system.file.openFile('csv')
if path != None:
print "path found"
f=open(path)
reader = csv.DictReader(f)
path1 = "Package_PLC/Tags/"
tagpath = []
alarmConfig = []
state = 0
comment = ""
for i in reader:
if row['Alarm Tag'] == 'ECN*' || 'FCN*' || 'PAC*':
tagpath.append(path1 + int(row['Alarm Tag']))
alarmname = row[Alarm Tag]
if row[Variable Tag A] == "NOT*":
state = 0
else:
state = 1
comment = row[Alarm Desc]
alarmConfig.append({alarmname: [["setpointA","Value",state],
["displayPath","Value","Packing"],
["notes","Value",comment]]
})
system.tag.editAlarmConfig(tagPaths, alarmConfig)
f.close()
The following error gets thrown.
Traceback (most recent call last):
File "<buffer>", line 28, in <module>
TypeError: list indices must be integers

This worked.
import string
import system
import csv
path = system.file.openFile('csv')
if path != None:
print "path found"
f=open(path)
reader = csv.DictReader(f)
path1 = "Package_PLC/Tags/"
tagpath = []
alarmConfig = {}
state = 0
readerlist = list(reader)
for stuff in readerlist:
if "PAC" in stuff['Alarm Tag'] or "ECN" in stuff['Alarm Tag'] or "CCN" in stuff['Alarm Tag'] or "FCN" in stuff['Alarm Tag'] :
tagpath = []
tagpath.append(str( path1 + stuff['Alarm Tag']))
if "NOT" in stuff['Variable Tag A']:
state = 0
else :
state = 1
display = ['displayPath','Value','Packing']
notes = ['notes','Value',str(stuff['Alarm Desc'])]
setpointA =['setpointA','Value', str(state)]
alarmConfig = {}
alarmConfig[stuff['Alarm Tag']] = [display,notes,setpointA]
system.tag.editAlarmConfig(tagpath, alarmConfig)
f.close()

It's difficult to help you because:
The sample file doesn't trigger anything
You didn't provide the system module
But still here's my attempt:
import os.path
import csv
input_file_name = 'Sample.csv'
if os.path.exists(input_file_name):
with open(input_file_name, newline='') as input_file:
events = csv.DictReader(input_file)
data_extracted = [
(
current_event['Alarm Tag'],
0 if current_event['Variable Tag A'].startswith('NOT') else 1,
current_event['Alarm Desc']
)
for current_event in events
if current_event['Alarm Tag'][:3] in ('ECN', 'FCN', 'PAC')
]
tag_paths = [f'Package_PLC/Tags/{x[0]}' for x in data_extracted]
alarm_config = {
alarm_name: [
['setpointA', 'Value', state],
['displayPath', 'Value', 'Packing'],
['notes', 'value', comment]
]
for (alarm_name, state, comment) in data_extracted
}
system.tag.editAlarmConfig(tag_paths, alarm_config)

Related

No values given for wildcard, expand issue

I'm new to python and snakemake. I'm trying to create a bed file from trf output. I'm reusing code from github, but I don't need to do all of the things the github code does. I've pulled the relevant trf sections from here:
https://github.com/mrvollger/assembly_workflows/blob/master/workflows/mask.smk
When I run the script, I get an error:
No values given for wildcard 'ID,\\d+'.
File "/project/90daydata/cotton_genomics/genomes/GB0085/reference/trf.smk", line 107, in <module>
From what I've read on other posts, the issue isn't really about the wildcard constraint, rather the expand function in line 107. I'm still figuring out how expand works, but I'm missing something and don't understand how to fix it. It is mind-bending to me still. Any help is appreciated.
import os
import sys
import re
import re
import pysam
import pandas as pd
from datetime import date
from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
FTP = FTPRemoteProvider()
HTTP = HTTPRemoteProvider()
today = date.today()
DATE = today.strftime("%Y/%m/%d")
SDIR=os.path.realpath(os.path.dirname(srcdir("env.cfg"))+"/..")
shell.prefix(f"source {SDIR}/env.cfg ; set -eo pipefail; ")
# delete if not debug
DEBUG=True
def tempd(fname):
if(DEBUG):
return(fname)
return(temp(fname))
FASTA = os.path.abspath( config["fasta"] )
FAI = FASTA + ".fai"
assert os.path.exists(FAI), f"Index must exist. Try: samtools faidx {FASTA}"
# WILDCARDS
NIDS = min(200, len(open(FAI).readlines()) )
IDS = [ "{:03}".format(ID+1) for ID in range(NIDS) ]
# IDS = [ 1 ]
#x = range(6)
#for n in x:
# print(n)
for y in range(len(IDS)):
print(IDS[y], "+++")
print(open(FAI).readlines())
SM = "asm"
if("sample" in config): SM = config["sample"]
SPECIES = "human"
if("species" in config): SPECIES = config["species"]
THREADS = 16
if("threads" in config): THREADS = config["threads"]
SMS = [SM]
wildcard_constraints:
SM="|".join(SMS),
ID="\d+",
# FASTA_FMT = f"Masked/temp/{SM}_{{ID}}.fasta"
FASTA_FMT = f"temp/{SM}_{{ID}}.fa"
TRFBED = os.path.abspath(f"{SM}_{{ID}}.trf.bed")
rule split_fasta:
input:
fasta = FASTA,
output:
fastas = tempd(expand(FASTA_FMT, ID=IDS)),
threads: 1
resources:
mem=8
run:
fasta = pysam.FastaFile(input["fasta"])
outs = [open(f,"w+") for f in output.fastas]
outidx = 0
for name in fasta.references:
seq = fasta.fetch(name)
outs[outidx].write( ">{}\n{}\n".format(name, seq) )
outidx += 1
if(outidx == NIDS): outidx = 0
for out in outs:
out.close()
# This runs trf on the temp output
rule run_trf:
input:
fasta = FASTA_FMT,
output:
dat = tempd(FASTA_FMT + ".dat")
benchmark:
FASTA_FMT + ".bench"
resources:
mem=24,
threads: 1
shell:"""
trf {input.fasta} 2 7 7 80 10 50 15 -l 25 -h -ngs > {output.dat}
"""
#for yy in range(len(IDS)):
# print(IDS[yy], "++")
rule trf_bed:
input:
dats = expand(rules.run_trf.output.dat, ID=IDS, SM=SM),
output:
bed = TRFBED,
resources:
mem=8,
threads: 1
run:
trf = []
header = '#chr start end PeriodSize CopyNumber ConsensusSize PercentMatches PercentIndels Score A C G T Entropy Motif Sequence'.split()
for datf in input.dats:
chrom = None
sys.stderr.write( "\r" + datf )
with open(datf, 'r') as dat:
for line in dat:
splitline = line.split()
if( line.startswith("Sequence:") ):
chrom = int(line.split()[1].strip())
#sys.stderr.write(chrom + "\n")
elif( line.startswith("#") ):
chrom = splitline[0][1:].strip() # grab everything after the # in the first word
else:
# Catch index errors when line is blank
try:
# Check if in header sequence (all non-header lines start with an int: start pos)
try:
int(splitline[0])
except ValueError:
continue
trf.append([chrom] + splitline[ 0: (len(header)-1) ] )
except IndexError:
pass
trf = pd.DataFrame(trf, columns=header)
print(trf.shape)
trf["start"] = trf["start"].astype(int)
trf.sort_values(by=["#chr", "start"], inplace=True)
print("done sorting trf")
trf.to_csv(output.bed, sep="\t", index=False)
rule trf:
input:
bed = rules.trf_bed.output.bed

JSONDecodeError: Expecting value: line 1 column 1 (char 0) when using Pushift API to scrape Reddit Data

import pandas as pd
import requests
import json
import datetime
import csv
def get_pushshift_data(after, before, sub):
url = 'https://api.pushshift.io/reddit/search/submission/?&after=' + str(after) + '&before='+ str(before) + '&subreddit='+ str(sub) + '&sort=asc&sort_type=created_utc&size=400'
print(url)
r = requests.get(url).json()
# data = json.loads(r.text, strict=False)
return r['data']
def collect_subData(subm):
subData = list() #list to store data points
title = subm['title']
url = subm['url']
try:
flair = subm['link_flair_text']
except KeyError:
flair = "NaN"
try:
# returns the body of the posts
body = subm['selftext']
except KeyError:
body = ''
author = subm['author']
subId = subm['id']
score = subm['score']
created = datetime.datetime.fromtimestamp(subm['created_utc']) #1520561700.0
numComms = subm['num_comments']
permalink = subm['permalink']
subData.append((subId,title,body,url,author,score,created,numComms,permalink,flair))
subStats[subId] = subData
def update_subFile():
upload_count = 0
location = "subreddit_data_uncleaned/"
print("Input filename of submission file, please add .csv")
filename = input()
file = location + filename
with open(file, 'w', newline='', encoding='utf-8') as file:
a = csv.writer(file, delimiter=',')
headers = ["Post ID","Title","Body","Url","Author","Score","Publish Date","Total No. of Comments","Permalink","Flair"]
a.writerow(headers)
for sub in subStats:
a.writerow(subStats[sub][0])
upload_count+=1
print(str(upload_count) + " submissions have been uploaded into a csv file")
# global dictionary to hold 'subData'
subStats = {}
# tracks no. of submissions
subCount = 0
#Subreddit to query
sub = 'politics'
# Unix timestamp of date to crawl from.
before = int(datetime.datetime(2021,5,17,0,0).timestamp())
after = int(datetime.datetime(2014,1,1,0,0).timestamp())
data = get_pushshift_data(after, before, sub)
while len(data) > 0:
for submission in data:
collect_subData(submission)
subCount+=1
# Calls getPushshiftData() with the created date of the last submission
print(len(data))
print(str(datetime.datetime.fromtimestamp(data[-1]['created_utc'])))
after = data[-1]['created_utc']
data = get_pushshift_data(after, before, sub)
print(len(data))
update_subFile()
At line 1: I call the get_pushshift_data(after, before, sub) function to scrape the data and there is no error. But then when I want to the same thing again at line 11 but with different time for after variable(type: int), the program comes out the error of JSONDecodeError: Expecting value: line 1 column 1 (char 0).
This is the image for you to refer to which I have just described above
This is the Error Image

CS50 PSET6 - DNA - Works fine on SMALL but not for LARGE database

I'm taking CS50 and got stuck on this pset6.
I made this code and it's working fine for 'small' given database.
On 'large' one i get wrong values in my DNA sequence.
Like, using debug50 i got that Albus sequence should be 15,49,38... and my seq is 21, 55, 64...
whats wrong? AND why it works fine on small database and not in large one?
Thanks for the help!
# Import ARGV and CSV library
from sys import argv, exit
import pandas as pd
import csv
# Check if argv has 3 arguments (program name, cvs file and dna sequence)
while True:
if len(argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit(1)
else:
break
with open(argv[2], 'r', encoding="UTF-8") as txt:
dna_seq = txt.read()
#Find the number of STR - AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
AGATC = dna_seq.count("AGATC")
TTTTTTCT = dna_seq.count("TTTTTTCT")
AATG = dna_seq.count("AATG")
TCTAG = dna_seq.count("TCTAG")
GATA = dna_seq.count("GATA")
TATC = dna_seq.count("TATC")
GAAA = dna_seq.count("GAAA")
TCTG = dna_seq.count("TCTG")
name = 0
if argv[1] == "databases/small.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['AATG'], row['TATC']
dna_db = list(dna_db)
seq = [AGATC, AATG, TATC]
seq = [str(x) for x in seq]
if dna_db[1:4] == seq:
name = dna_db[:1]
break
else:
name = "No match"
elif argv[1] == "databases/large.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['TTTTTTCT'], row['AATG'], row['TCTAG'],
row['GATA'], row['TATC'], row['GAAA'], row['TCTG']
dna_db = list(dna_db)
seq = [AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG]
seq = [str(x) for x in seq]
if dna_db[1:9] == seq:
name = dna_db[:1]
break
else:
name = "No match"
print(name)

List of tuples to an xls Python

Sorry if I do something wrong I'm new here.
I got a Problem with my Python Code.
I have a sorted_List out of an dictionary. the sorted List looks like
sorted_Dict = [('158124', 26708), ('146127', 12738), ('21068', 9949),
('274186', 8255), ('189509', 6550), ('165758', 5346), ...]
I now want to print them in an xls file which should look like
x y
'158124' 26708
i have to plot it in Excell but i also want to plot it in python (which is not necessary but cool) but i don't get how to do this. Here is my whole code. Thank you for any help
cheers
Sven
# -*- coding: iso-8859-1 -*-
from __future__ import division
import csv
import operator
def computeSoldProducts():
catalog = csv.reader(open("data/catalog.csv", "r"))
sales = csv.reader(open("data/sales_3yr.csv", "r"))
output = open("output.csv", "a")
catalogIDs = set()
lineNumber = 0
# lese katalog
for line in catalog:
id = line[0]
if lineNumber <> 0:
catalogIDs.add(eval(id))
lineNumber = 1
soldItems = set()
lineNumber = 0
# lese sales
for line in sales:
id = line[6]
if lineNumber <> 0:
soldItems.add(eval(id))
lineNumber = 1
print "anzahl Produkte:", len(catalogIDs)
print "verkaufte Produkte", len(soldItems)
notSoldIDs = catalogIDs - soldItems
print len(notSoldIDs)
catalog = csv.reader(open("data/catalog.csv", "r"))
sales = csv.reader(open("data/sales_3yr.csv", "r"))
soldDict = {}
for k in catalog:
soldDict[str(k[0])] = 0
for item in sales:
if str(item[6]) in soldDict:
soldDict[str(item[6])] +=1
sorted_soldDict = sorted(soldDict.iteritems(), key=operator.itemgetter(1), reverse=True)
print sorted_soldDict
print sorted_soldDict
for k in sorted_soldDict:
output.write(sorted_soldDict[k])
print "done"
computeSoldProducts()
Straight from the docs for the csv module
import csv
with open('text.csv', 'wb') as csvfile:
fwriter = csv.writer(csvfile)
for x in sorted_list:
fwriter.writerow(x)
You can then open this csv file in excel.
One alternative is to use my library pyexcel, documentation is here: http://pythonhosted.org//pyexcel/
import pyexcel
sorted_list_of_sets = ....
writer = pyexcel.Writer("output.csv")
writer.write_array(sorted_list_of_sets)
writer.close()
Your original solution becomes the following if pyexcel is used:
import pyexcel
import operator
def computeSoldProducts():
catalog = pyexcel.SeriesReader("data/catalog.csv")
sales = pyexcel.SeriesReader("data/sales_3yr.csv")
print "anzahl Produkte:", catalog.number_of_rows()
print "verkaufte Produkte", sales.number_of_rows()
product_list = catalog.column_at(0)
solditem_list = sales.column_at(6)
soldOnes = []
for item in solditem_list:
if item not in soldOnes:
soldOnes.append(item)
notSoldIDs = catalog.number_of_rows() - len(soldOnes)
print notSoldIDs
print product_list
print solditem_list
# initialize the soldDict
zeros_array = [0] * len(product_list)
soldDict = dict(zip(product_list, zeros_array))
for item in solditem_list:
if item in product_list:
soldDict[item] += 1
sorted_soldDict = sorted(soldDict.iteritems(), key=operator.itemgetter(1), reverse=True)
print sorted_soldDict
writer = pyexcel.Writer("output.csv")
writer.write_row(["product", "number"])
writer.write_array(sorted_soldDict)
writer.close()
print "done"
computeSoldProducts()

Making columns of data lists in python

So I have a program, that reads through a bunch of files and appends the necessary data that I need. I need to now take those particular data and show them as a list. To be more specific, these are the parameters I have:
a = Source, b = luminosity, c = luminosity error, d = HST, e = XRS, f = gmag, g = z, and h = rh
I want to display this in a list, each defining a particular column. I just don't know where exactly I should insert the print statement among the various for loops I've done to do this.
I would appreciate any help! Here's the program (the main focus is in the for loops done and how they iterate through the data, and don't worry about indentations, the program so far works I just need to display the data appended in columns):
import sys
import os
import re
import urllib
import urllib2
from os.path import basename
import urlparse
import shutil
base_dirname = '/projects/XRB_Web/apmanuel/499/'
base_sourcefile = base_dirname + 'Sources.txt'
try:
file = open(base_sourcefile, 'r')
except IOError:
print 'Cannot open: '+base_sourcefile
Source = []
Finallist = []
ACS = []
SRC = []
for line in file:
data_line_check = (line.strip())
if data_line_check:
line = re.sub(r'\s+', ' ', line)
point = line.split('|')
temp_source = (point[0]).strip()
if temp_source and len(point) == 3:
Source = (point[0]).strip()
Source = re.sub(r'\s', '_', Source)
print Source+"\n"
temp_finallist = (point[1]).strip()
if temp_finallist:
Finallistaddress = (point[1]).strip()
Finallistaddress = re.sub(r'\s', '_', Finallistaddress)
Luminositybase_dirname1 = '/projects/XRB_Web/apmanuel/499/Lists/' + Finallistaddress
try:
file2 = open(Luminositybase_dirname1, 'r')
except IOError:
print 'Cannot open: '+Luminositybase_dirname1
source = []
luminosity = []
luminosityerr = []
for line in file2:
pointy = line.split()
a = int(pointy[0])
b = float(pointy[5])
c = float(pointy[6])
source.append(a)
luminosity.append(b)
luminosityerr.append(c)
temp_HST = (point[2]).strip()
if temp_HST:
HSTaddress = (point[2]).strip()
HSTaddress = re.sub(r'\s', '_', HSTaddress)
HSTbase_dirname2 = '/projects/XRB_Web/apmanuel/499/Lists/' + HSTaddress
try:
file3 = open(HSTbase_dirname2, 'r')
except IOError:
print 'Cannot open: '+HSTbase_dirname2
HST = []
for line in file3:
pointy2 = line.split()
d = int(pointy2[0])
HST.append(d)
temp_XRS = (point[3]).strip()
if temp_XRS:
XRSaddress = (point[3]).strip()
XRSaddress =re.sub(r'\s', '_', XRSaddress)
XRSbase_dirname3 = '/projects/XRB_Web/apmanuel/499/Lists/' + XRSaddress
try:
file4 = open(XRSbase_dirname3, 'r')
except IOError:
print 'Cannot open: '+XRSbase_dirname3
XRS = []
for line in file4:
pointy3 = line.split()
e = int(pointy3[0])
XRS.append(e)
temp_others = (point[4]).strip()
if temp_others:
othersaddress = (point[4]).strip()
othersaddress =re.sub(r'\s', '_', othersaddress)
othersbase_dirname4 = '/projects/XRB_Web/apmanuel/499/Lists/' + othersaddress
try:
file5 = open(othersbase_dirname4, 'r')
except IOError:
print 'Cannot open: '+othersbase_dirname4
gmag = []
z = []
rh = []
for line in file5:
pointy4 = line.split()
f = float(pointy4[3])
g = float(pointy4[5])
h = float(pointy4[7])
rh.append(f)
gmag.append(g)
z.append(h)
this function will return columns for a list of rows. note that this requires the lists to all have an element in the column you are trying to access, though it would be relatively simple to change this if you need it.
def getcolumn(matrix,index): #index specifies which column of the matrix you want. note that like all other list indexes, this starts from 0, not one.
column = []
for row in matrix:
column.append(row[index])
return column

Categories