List Index out of range error: python - python

import MySQLdb
import pyodbc
typesFile = open('servinfo.txt', 'r').readlines()
dataTypes = dict((row.split(',')[0].strip(),row.split(',')[1].strip()) for row in typesFile)
conn = pyodbc.connect('DRIVER={FreeTDS}; SERVER=prsoft; DATABASE=blueseas; UID=sa; PWD=tiger')
msCursor = conn.cursor()
db = MySQLdb.connect(passwd="pr", db="tenable")
myCursor = db.cursor()
msCursor.execute("SELECT * FROM airlinemaster WHERE type='U'")
dbTables = msCursor.fetchall()
noLength = [56, 58, 61]
for tbl in dbTables:
msCursor.execute("SELECT * FROM airlinemaster WHERE airline = air india('%s')" % tbl[0]) #syscolumns: see sysobjects above.
columns = msCursor.fetchall()
attr = ""
for col in columns:
colType = dataTypes[str(col.xtype)]
if col.xtype == 60:
colType = "float"
attr += col.name +" "+ colType + "(" + str(col.length) + "),"
elif col.xtype in noLength:
attr += col.name +" "+ colType + ","
else:
attr += col.name +" "+ colType + "(" + str(col.length) + "),"
attr = attr[:-1]
myCursor.execute("CREATE TABLE " + tbl[0] + " (" + attr + ");") #create the new table and all columns
msCursor.execute("select * from %s" % tbl[0])
tblData = msCursor.fetchall()
#populate the new MySQL table with the data from MSSQL
for row in tblData:
fieldList = ""
for field in row:
if field == None:
fieldList += "NULL,"
else:
field = MySQLdb.escape_string(str(field))
fieldList += "'"+ field + "',"
fieldList = fieldList[:-1]
myCursor.execute("INSERT INTO " + tbl[0] + " VALUES (" + fieldList + ")" )
I'v tried the above code in many ways to import data.. but still keep on getting error as list index out of range.. Dont know where I'm going wrong.. how do I solve this?
Traceback (most recent call last):
File "C:\Python27\programs new\codes.py", line 10, in <module>
dataTypes = dict((row.split(',')[0].strip(),row.split(',')[1].strip()) for row in typesFile)
File "C:\Python27\programs new\codes.py", line 10, in <genexpr>
dataTypes = dict((row.split(',')[0].strip(),row.split(',')[1].strip()) for row in typesFile)
IndexError: list index out of range
pls help.... thnx in advance..

You have at least one row in typesFile that does not have a , comma in it:
>>> 'line without a comma'.split(',')
['line without a comma']
>>> 'line without a comma'.split(',')[1]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
IndexError: list index out of range
If servinfo.txt is supposed to be a comma-separated file with two columns, why not use the csv module to read it instead?
import csv
csvreader = csv.reader(open('servinfo.txt', 'rb'))
dataTypes = dict(csvreader)
That won't solve your issue with there being a line with no comma in it in that file though, you'll have to fix that first.

Related

Matching Regex in Python from Excelfile

Im using Regex to match the following excel file and Im struggling with how I can
seperate each row by
Timestamp [0:00:48],
ID 20052A
and the content content (more content)
This is the excel row (one of many, so the ID can vary from row to row and the timestamp as well as the content too)
[0:00:48] 20052A: content (more content)
I get an Error code
AttributeError: 'NoneType' object has no attribute 'group
for matching my ID where I have
(r"^(.+:)(.+)|(r(\w+)?\s*\[(.*)\]\s*(\w+))", c)
Keep in mind that from time to time the ID looks something like this
[0:00:33] 30091aA: (content) 
My whole skript is (cancel out the connection to database)
import os
import re
import pymysql
pymysql.install_as_MySQLdb()
import pandas as pd
import sqlalchemy
def insert_or_update(engine, pd_table, table_name):
inserts = 0
updates = 0
for i in range(len(pd_table)):
vals_with_quotes = ["'" + str(x) + "'" for x in pd_table.loc[i, :].values]
# print(vals_with_quotes)
update_pairs = [str(c) + " = '" + str(v) + "'" for c, v in zip(pd_table.columns, pd_table.loc[i, :])]
query = f"INSERT INTO {table_name} ({', '.join(list(pd_table.columns.values))}) " \
f"VALUES ({', '.join(vals_with_quotes)}) " \
f"ON DUPLICATE KEY UPDATE {', '.join(update_pairs)}"
print(query)
result = engine.execute(query)
if result.lastrowid == 0:
updates += 1
else:
inserts += 1
print(f"Inserted {inserts} rows and updated {updates} rows.")
schema = '---'
alchemy_connect = "---"
engine = sqlalchemy.create_engine(alchemy_connect) # connect to server
engine.execute(f"USE {schema}") # select new db
# engine.execute("SET NAMES UTF8MB4;")
query = "SELECT * FROM .... where ...=..."
pm = pd.read_sql(query, engine)
rootpath = "path/"
for root, dirs, files in os.walk(rootpath):
for file in files:
print(root, dirs, files, file)
d = pd.read_excel(root + file, header=None)
d.drop(columns=[0], inplace=True)
d.rename(columns={1: "content"}, inplace=True)
participants = []
for ix, row in d.iterrows():
c = row["content"]
match = re.search(r"^(.+:)(.+)|(r(\w+)?\s*\[(.*)\]\s*(\w+))", c)
prefix = match.group(1)
only_content = match.group(2)
try:
timestamp = re.search(r"\[(\d{1,2}:\d{1,2}:\d{1,2})\]", prefix).group(1)
except:
timestamp = "-99"
# print(timestamp)
if re.search(r"\s(Versuchsleiter|ersuchsleiter|Versuchsleit|Versuch):", prefix):
id_code = "Versuchsleiter"
else:
starting_digits = re.search(r"^(\d+)", prefix)
id_code = re.search(r"(\d{2,4}.{1,3}):", prefix).group(1)
if hasattr(starting_digits, 'group'):
id_code = starting_digits.group(1) + id_code #
# get pid
participant = pm.loc[pm["id_code"] == id_code, "pid"]
try:
pid = participant.values[0]
except:
pid = "Versuchsleiter"
# print(ix, pid, id_code, only_content, timestamp)
if pid and pid not in participants and pid != "Versuchsleiter":
participants.append(pid)
d.loc[ix, "pid"] = pid
d.loc[ix, "timestamp"] = timestamp
d.loc[ix, "content"] = only_content.strip()
d.loc[ix, "is_participant"] = 0 if pid == "Versuchsleiter" else 1
d = d[["pid", "is_participant", "content", "timestamp"]]
d.loc[(d['pid'] == "Versuchsleiter"), "pid"] = participants[0]
d.loc[(d['pid'] == None), "pid"] = participants[0]
insert_or_update(engine, d, "table of sql")```
I need "Versuchsleiter" since some of the ID's are "Versuchsleiter"
Thank you!
You should take advantage from using capturing groups.
All the initial regex matching (after c = row["content"] and before # get pid) can be done with
match = re.search(r"^\[(\d{1,2}:\d{1,2}:\d{1,2})]\s+(\w+):\s*(.*)", c)
if match:
timestamp = match.group(1)
id_code = match.group(2)
only_content = match.group(3)
if re.search(r"(?:Versuch(?:sleit(?:er)?)?|ersuchsleiter)", id_code):
id_code = "Versuchsleiter"
Your timestamp will be 0:00:33, only_content will hold (content) and id_code will contain 30091aA.
See the regex demo
Thank you for your help but this gives me the following error
Traceback (most recent call last):
File "C:/Users/.../PycharmProjects/.../.../....py", line 80, in <module>
insert_or_update(engine, d, "sql table")
TypeError: not enough arguments for format string

Inserting a Python List in Python into an SQL Table

I have code that reads from a socket and creates a list called i. The socket is read, the list is created from the socket, the list gets printed then deleted. This gets repeated in a while true loop. Instead of just printing the list, I'd like to insert it into a table in my DB. I already have the cursor and connection established in the code. I was messing around with some other stuff but keep getting errors. I would like to use REPLACE INTO instead of INSERT INTO. Thank you very much for the help.
This is an example of what the list will look like.
'Dec-11-2018,','12:28:43,','iPhone,','alpha,','lib,','lib,','(45.67.67)\n']
My table name is StudentPrototype and it has 7 columns
Columns - (Date,Time,Device,ID,AP,APGroup,MACAdd)
#!/bin/python
import socket
import os, os.path
import MySQLdb as mdb
con = mdb.connect('localhost', 'user', 'pass',
'StudentTacker');
cur = con.cursor()
cur.execute("SELECT VERSION()")
i = []
def ParseArray(l): #parses line in socke
i.append(l.split()[+0] + '-') # Gets Day
i.append(l.split()[+1] + '-') # Gets Month
i.append(l.split()[+3] + ',') # Gets Year
i.append(l.split()[+2] + ',') # Gets Time
i.append(l.split()[-2] + ',') # Gets Device
i.append(l.split()[+9] + ',') # Gets ID
i.append(l.split()[+18] + ',') # Gets AP
i.append(l.split()[+19] + ',') # Gets AP Group
i.append(l.split()[+16] + '\n') # Gets MAC
# This is where I want to REPLACE INTO my table called StudentTest using list i
print(i)
del i[:]
if os.path.exists("/-socket"):
os.remove("/-socket")
sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
sock.bind("/home/socket")
infile = sock.makefile('r')
while True:
l = sock.recv(4096).decode()
ParseArray(l)
Update: I tried another method that I found on this site for how to insert python lists in a db.
Here is my new code that I used inside my function:
def ParseArray(l): #parses line in socke
i.append(l.split()[+0] + '-') # Gets Day
i.append(l.split()[+1] + '-') # Gets Month
i.append(l.split()[+3] + ',') # Gets Year
i.append(l.split()[+2] + ',') # Gets Time
i.append(l.split()[-2] + ',') # Gets Device
i.append(l.split()[+9] + ',') # Gets FSU ID
i.append(l.split()[+18] + ',') # Gets AP
i.append(l.split()[+19] + ',') # Gets AP Group
i.append(l.split()[+16] + '\n') # Gets MAC
#insert line into db else by primary key mac adresss
#update line to db if mac adress doesn't exist
params = ['?' for item in i]
sql = 'REPLACE INTO SocketTest (month, day, year, time, device,
Id, ap, ApGroup, MacAdd) VALUES (%s); ' % ', '.join(params)
cur.execute(sql, i)
Using that I'm getting an error:
Traceback (most recent call last):
File "./UnixSocketReader9.py", line 55, in <module>
ParseArray(l)
File "./UnixSocketReader9.py", line 28, in ParseArray
cur.execute(sql, i)
File "/usr/lib64/python2.7/site-packages/MySQLdb/cursors.py", line
187, in execute
query = query % tuple([db.literal(item) for item in args])
TypeError: not all arguments converted during string formatting

Python SQLite3 - cursor.execute - no error

This is a piece of code which needs to perform the follow functionality:
Dump all table names in a database
From each table search for a column with either Latitude or Longitude in
Store these co-ords as a json file
The code was tested and working on a single database. However once it was put into another piece of code which calls it with different databases it now is not entering line 49. However there is no error either so I am struggling to see what the issue is as I have not changed anything.
Code snippet line 48 is the bottom line -
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
I am running this in the /tmp/ dir due to an earlier error with sqlite not working outside the temp.
Any questions please ask them.
Thanks!!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import os
import sys
filename = sys.argv[1]
def validateFile(filename):
filename, fileExt = os.path.splitext(filename)
print ("[Jconsole] Python: Filename being tested - " + filename)
if fileExt == '.db':
databases(filename)
elif fileExt == '.json':
jsons(fileExt)
elif fileExt == '':
blank()
else:
print ('Unsupported format')
print (fileExt)
def validate(number):
try:
number = float(number)
if -90 <= number <= 180:
return True
else:
return False
except ValueError:
pass
def databases(filename):
dbName = sys.argv[2]
print (dbName)
idCounter = 0
mainList = []
lat = 0
lon = 0
with sqlite3.connect(filename) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
print ("YAY1")
table = tablerow[0]
cursor.execute('SELECT * FROM {t}'.format(t=table))
for row in cursor:
print(row)
print ("YAY")
tempList = []
for field in row.keys():
tempList.append(str(field))
tempList.append(str(row[field]))
for i in tempList:
if i in ('latitude', 'Latitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
idCounter += 1
tempList.append(idCounter)
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lat = next_item
if i in ('longitude', 'Longitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lon = next_item
result = '{ "id": ' + str(idCounter) \
+ ', "content": "' + dbName + '", "title": "' \
+ str(lat) + '", "className": "' + str(lon) \
+ '", "type": "box"},'
mainList.append(result)
file = open('appData.json', 'a')
for item in mainList:
file.write('%s\n' % item)
file.close()
# {
# ...."id": 1,
# ...."content": "<a class='thumbnail' href='./img/thumbs/thumb_IMG_20161102_151122.jpg'>IMG_20161102_151122.jpg</><span><img src='./img/thumbs/thumb_IMG_20161102_151122.jpg' border='0' /></span></a>",
# ...."title": "50.7700721944444",
# ...."className": "-0.8727045",
# ...."start": "2016-11-02 15:11:22",
# ...."type": "box"
# },
def jsons(filename):
print ('JSON')
def blank():
print ('blank')
validateFile(filename)
Fixed.
The issue was up here
filename, fileExt = os.path.splitext(filename)
The filename variable was being overwritten without the file extension so when SQLite searched it didn't find the file.
Strange no error appeared but it is fixed now by changing the filename var to filename1.

Python IndexError: list index out of range while finding SNPs in vcf

Hi all I am supposed to use a Python script to identify possible SNPs at specified positions from a csv file in vcf files.
I just started using python and sadly I always get following Error :
Traceback (most recent call last):
File "getSNPs.py", line 20, in <module> oo = line[2] + "_" +
line[3]
IndexError: list index out of range from the following script
!/bin/python usage: python getSNPs.py your.vcf PhenoSNPs.csv
Code:
import sys
import gzip
SNPs = {}
for i in gzip.open(sys.argv[1], "r"):
if '#' not in i:
line = i.split("\t")
oo = line[0] + "_" + line[1]
SNPs[oo] = i
pp = sys.argv[1] + ".captureSNPs"
out = open(pp, "w")
for i in open(sys.argv[2], "r"):
line = i.split(",")
oo = line[2] + "_" + line[3]
try:
out.write(SNPs[oo])
except KeyError:
ow = line[2] + "\t" + line[3] + "\t" + "not covered" + "\n"
out.write(ow)
If for instance i = 'aa' and you do line = i.split(",") it implies that line = ['aa'], then you will get an IndexError when you do line[2] + "_" + line[3] because line doesn't have 2nd and 3rd elements.
Use try/except or re-think the logic of your code.

ValueError: too many values to unpack (Python)

So this is pretty much a web scraping program. I feel as if it is nearly finished, but I have no clue how to fix this!
Traceback (most recent call last):
File "AgriMet4.py", line 424, in <module>
orig_column = convert(station, webpage, data_type)
File "AgriMet4.py", line 244, in convert
ag_date, ag_time, ag_data_str = line.split()
ValueError: too many values to unpack
Here are the parts that are flagged:
# Break line into components
ag_date, ag_time, ag_data_str = line.split()
ag_datetime = ag_date + " " + ag_time
ag_datetime = datetime.datetime.strptime(ag_datetime, "%m/%d/%Y %H:%M")
ag_data = float(ag_data_str)
and
columns = []
for data_type in data_types:
webpage = download(station, data_type, effective_begin_date, effective_end_date)
orig_column = convert(station, webpage, data_type)
std_column = fill_gaps(orig_column, interval, data_type,
effective_begin_date, effective_end_date)
adjusted_column = adjust_datetimes(station, time_zone_offset, std_column)
columns.append(adjusted_column)
EDIT: I've made the code shorter. Sorry!
As the error indicates, there are too many values on the right hand side of your equation (line.split()).
You can find the documentation for string splitting. You seem to have more than 3 values to assign. If you don't know how many values you will get, you can use the * syntax to hold any remaining values.
line = "Hello there world"
word1, *remaining_words = line.split()
I've figured out my problem!
My download definition had the incorrect data_types, which caused it to gather all six-seven data types at once instead of one at a time.
def download(station, data_types, begin_date, end_date):
query_string = ('station=' + station
+ '&year=' + str(begin_date.year)
+ '&month=' + str(begin_date.month)
+ '&day=' + str(begin_date.day)
+ '&year=' + str(end_date.year)
+ '&month=' + str(end_date.month)
+ '&day=' + str(end_date.day)
+ '&pcode=' + data_types)
url = AGRIMET_WEBSITE + '?' + query_string

Categories