I'm having trouble iterating through a DictReader object. I'm completely stumped because the object gets returned from another function with the same properties (size and fieldnames), but I cannot figure out for the life of me how to iterate through it. I know there's been some changes on the object type, so I'll mention that this is on Python 3.7. Is there another way to access data in a DictReader object?
Anywho, here's relevant code. I cleaned it up to post here, so hope it's not too confusing:
def schedchange():
data = otherfunction.data_get(referer_url=setup.url)
logging.info(type(data)) # <class 'csv.DictReader'>
import sys
logging.info(sys.getsizeof(data)) # logs a value of 56
logging.info(data.fieldnames) # logs the fieldnames properly
count = 0
for row in data:
count += 1
logging.info('aspen count: ')
logging.info(count) # logs a value of: 37337
return data
def main():
rosters = client.schedchange()
logger.info(rosters.fieldnames) # Prints/logs fieldname properly
import sys
logging.info(sys.getsizeof(rosters)) # Logs 56, the same size as in schedchange
logging.info(type(rosters)) # <class 'csv.DictReader'>
count = 0
for i in rosters: #I've also tried enumerate(rosters) here
count += 1
logger.info(count) #nothing gets logged
logger.info('end count: ')
logger.info(count) # Count never gets incremented so a 0 is logged here
def data_get(self, referer_url):
url = self.baseurl + 'quickReportMenu.do' + ';jessionid={}'.format(self.sessionid)
r1 = get_retry(self.s, url, headers={'Referer': referer_url},
params={'format': 'simple',
'extension': '0',
'deploymentId': 'did'})
url = self.baseurl + 'quickReportMenu.do'
self.payload['org.apache.struts.taglib.html.TOKEN'] = findstruts(r1.text)
self.payload['userEvent'] = '930'
self.payload['deploymentId'] = 'did'
self.payload['extension'] = '0'
p1 = post_retry(self.s, url, headers={'Referer': r1.url}, data=self.payload)
# Extract the url for the csv from the text of the response
popupid = findurl(p1.text)
# Get csv
url = self.baseurl + popupid
d1 = get_retry_simple(self.s, url, headers={'Referer': p1.url})
# Break text into iterable lines for each /n
d1.encoding = 'utf-8-sig'
iterdat = str.splitlines(d1.text)
# Parse csv
data = csv.DictReader(iterdat)
return data
How to check item is exists and renew value only, if not exists and add new one?
For example:
I have an item 1 and value 1 already in my sheet, and then I get new value of item 1, I want to renew value 1 only, otherwise, if I get new item 2 and value 2, I want to add this in new columns.
I don't know how to write code, I search it long time but cannot found, could anyone help me? Many thanks!
The script below, the steps are:
first step, check my gmail get keyword 1
second, use keyword search datas in website (beautifulsoup module)
the last step, upload datas to google sheet (gspread module)
def Check_emailbox(box='Inbox', lab='SUBJECT', title='[PASS]'):
global email_content, report_info1, my_msg, report_info
dirpath = 'XXX'
with open(dirpath) as act:
content = act.read()
my_act = yaml.load(content, Loader=yaml.FullLoader)
user, password = my_act['user'], my_act['password']
imapUrl = 'imap.gmail.com'
my_mail = imaplib.IMAP4_SSL(imapUrl)
my_mail.login(user, password)
print('Login gmail account seccess.')
key = lab
value = title
_, data = my_mail.search(None, key, value)
mail_id_list = data[0].split()
msg_id = mail_id_list[-1]
res, data = my_mail.fetch(msg_id, '(RFC822)')
report_info = []
if res == 'OK':
raw_msg_txt = data[0][1]
my_msg = email.message_from_bytes(raw_msg_txt)
print('Subject: ', my_msg['subject'])
print('From: ', my_msg['from'])
print('Time: ', my_msg['date'])
for part in my_msg.walk():
email_content = part.get_payload()
report_info1 = ''.join('%s' % id for id in report_info)
print(report_info1, type(report_info1))
# print('Hide info, if want to see detail, unmark previous code')
# my_mail.store(msg_id, '-FLAGS', '\SEEN')
except AttributeError:
my_msg = email.message_from_string(raw_msg_txt)
print('AttributeError: ', my_msg)
return email_content, my_msg, report_info, report_info1
keyName = re.findall(r'Daily Report : (.*?)$', report_info1)
fwName = ''.join(keyName)
# ↑ This data will be upload to sheet, and this is main item for check:
# if "feName" is exists, renew below datas only, if not exists, add new one in next row.
fwVersion = ''.join(re.findall(r'\d-(.*?)-', fwName)).rsplit('.',1)[0]
# connect to the website and use beautifulsoup
ele = requests.get('XXXXXX')
felement = BeautifulSoup(ele.text, 'html.parser')
# print(felement.prettify())
fwinfo = felement.find(['a'], text = fwName)
fwhref = fwinfo.get('href')
print('Info: ', fwinfo)
rowid = ''.join(re.findall(r'data/(.*?)$', fwhref))
print('Download id is: ', rowid)
fwlink = 'XXXXXXXXX' + rowid
print('Download link: ', fwlink)
json_key = "XXXXXXX"
spread_url = ['https://spreadsheets.google.com/feeds']
connect_auth = SAC.from_json_keyfile_name(json_key, spread_url)
google_sheets = gspread.authorize(connect_auth)
sheet = google_sheets.open_by_key('XXXXXXXXX').worksheet('Pass Data')
Sheets = sheet
upload = []
print('==== Uplod to Google Sheet Done. ====')
In your situation, how about the following modification?
Modified script:
In this case, please use your google_sheets.
# Please set your values here.
fwName = "###"
fwVersion = "###"
rowid = "###"
fwlink = "###"
sheet = google_sheets.open_by_key('XXXXXXXXX').worksheet("Pass Data")
values = sheet.get_all_values()[2:]
obj = {}
for i, r in enumerate(values):
obj[r[0]] = i + 3
if obj.get(fwName):
sheet.update("B" + str(obj.get(fwName)), [[fwVersion, rowid, fwlink]], value_input_option="USER_ENTERED")
When this script is run, first, the values are retrieve from the sheet. And, by searching the value of column "A", new value is put to the searched row.
I prepared this modified script using your sample image. In your sample image, the 1st 2 rows are header rows. And, the search column is the column "A". I used them. So, when you change your Spreadsheet, this script might not be able to be used. Please be careful about this.
update(range_name, values=None, **kwargs)
I am currently having an issue where I am trying to store data in a list (using dataclasses). When I print the data inside the list in the function (PullIncursionData()) it responded with a certain amount of numbers (never the same, not possible due to it's nature). When printing it after it being called to store it's return in a Var it somehow prints only the same number.
I cannot share the numbers, as they update with EVE Online's API, so the only way is to run it locally and read the first list yourself.
The repository is Here: https://github.com/AtherActive/EVEAPI-Demo
Heads up! Inside the main.py (the file with issues) (a snippet of code is down below) are more functions. All functions from line 90 and forward are important, the rest can be ignored for this question, as they do not interact with the other functions.
def PullIncursionData():
#Pulls data from URL and converts it into JSON
url = 'https://esi.evetech.net/latest/incursions/?datasource=tranquility'
data = rq.get(url)
jsData = data.json()
#Init var to store incursions
incursions = []
#Set lenght for loop. yay
length = len(jsData)
# Every loop incursion data will be read by __parseIncursionData(). It then gets added to var Incursions.
for i in range(length):
# Add data to var Incursion.
incursions.append(__parseIncursionData(jsData, i))
# If Dev mode, print some debug. Can be toggled in settings.py
if settings.developerMode == 1:
return incursions
# Basically parses the input data in a decent manner. No comments needed really.
def __parseIncursionData(jsData, i):
icstruct = stru.Incursion
icstruct.constellation_id = jsData[i]['constellation_id']
icstruct.constellation_name = 'none'
icstruct.staging = jsData[i]['staging_solar_system_id']
icstruct.region_name = ResolveSystemNames(icstruct.constellation_id, 'con-reg')
icstruct.status = jsData[i]['state']
icstruct.systems_id = jsData[i]['infested_solar_systems']
icstruct.systems_names = ResolveSystemNames(jsData[i]['infested_solar_systems'], 'system')
return icstruct
# Resolves names for systems, regions and constellations. Still WIP.
def ResolveSystemNames(id, mode='constellation'):
#init value
output_name = 'none'
# If constellation, pull data and find region name.
if mode == 'con-reg':
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?constellationid={}&format=json'.format(id)
data = rq.get(url)
jsData = data.json()
output_name = jsData[0]['regionname']
# Pulls system name form Fuzzwork.co.uk.
elif mode == 'system':
#Convert output to a list.
output_name = []
lenght = len(id)
# Pulls system name from Fuzzwork. Not that hard.
for i in range(lenght):
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?solarsystemid={}&format=json'.format(id[i])
data = rq.get(url)
jsData = data.json()
return output_name
icdata = PullIncursionData()
print('external data check:')
length = len(icdata)
for i in range(length):
structures.py (custom file)
class Incursion:
constellation_id = int
constellation_name = str
staging = int
staging_name = str
systems_id = list
systems_names = list
region_name = str
status = str
def ___init___(self):
self.constellation_id = -1
self.constellation_name = 'undefined'
self.staging = -1
self.staging_name = 'undefined'
self.systems_id = []
self.systems_names = []
self.region_name = 'undefined'
self.status = 'unknown'
Background: I'm attempting to create a dataframe using data called from Twitch's API. They only allow 100 records per call so with each pull a new Pagination Cursor is offered in order to move on to the next page. I'm using the following code to try and efficiently pull this data rather than manually adjusting the after=(pagination value) in the get response. Right now the variable I'm trying to make dynamic is the 'Pagination' variable but it only gets updated once the loop finishes - not helpful! Take a look below and see if you notice anything I can change to achieve this goal. Any help is appreciated!
TwitchTopGamesDataFrame = [] #This is our Data List
BaseURL = 'https://api.twitch.tv/helix/games/top?first=100'
Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
Indent = 2
Pagination = ''
FullURL = BaseURL + Pagination
Response = requests.get(FullURL,headers=Headers)
iterations = 1 # Data records returned are equivalent to iterations x100
#Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
while count <= 3:
#Grab JSON Data, Convert, & Append
ResponseJSONData = Response.json()
#print(pgn) - Debug
pd.set_option('display.max_rows', None)
TopGamesDF = pd.DataFrame(ResponseJSONData['data'])
TopGamesDF = TopGamesDF[['id','name']]
TopGamesDF = TopGamesDF.rename(columns={'id':'GameID','name':'GameName'})
TopGamesDF['Rank'] = TopGamesDF.index + 1
#print(FullURL) - Debug
#Grab & Replace Pagination Value
RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
Pagination = pgn
#print(FullURL) - Debug
iterations += 1
Figured it out:
def top_games(page_count):
from time import gmtime, strftime
strftime("%Y-%m-%d %H:%M:%S", gmtime())
print("Time of Execution:", strftime("%Y-%m-%d %H:%M:%S", gmtime()))
#In order to condense the code above and be more efficient, a while/for loop would work great.
#Goal: Run a While Loop to create a larger DataFrame through Pagination as the Twitch API only allows for 100 records per call.
baseURL = 'https://api.twitch.tv/helix/games/top?first=100' #Base URL
Headers = {'client-id':'lqctse0orgdbs5gdf5faz665api03r','Authorization': 'Bearer a1yl09mwmnwetp6ovocilheias8pzt'}
Indent = 2
Pagination = ''
FullURL = BaseURL + Pagination
Response = requests.get(FullURL,headers=Headers)
start_count = 0
count = 0 # Data records returned are equivalent to iterations x100
max_count = page_count
#Loop: Response, Convert JSON data, Append to Data List, Get Pagination & Replace String in Variable - Iterate until 300 records
while count <= max_count:
#Grab JSON Data, Extend List
FullURL = baseURL + Pagination
Response = requests.get(FullURL,headers=Headers)
ResponseJSONData = Response.json()
pd.set_option('display.max_rows', None)
if count == start_count:
TopGamesDFL = ResponseJSONData['data']
if count > start_count:
i = ResponseJSONData['data']
#Grab & Replace Pagination Value
RPagination = pd.DataFrame(ResponseJSONData['pagination'],index=[0])
pgn = str('&after='+RPagination.to_string(index=False,header=False).strip())
Pagination = pgn
count += 1
if count == max_count:
FinalDataFrame = pd.DataFrame(TopGamesDFL)
FinalDataFrame = FinalDataFrame[['id','name']]
FinalDataFrame = FinalDataFrame.rename(columns={'id':'GameID','name':'GameName'})
FinalDataFrame['Rank'] = FinalDataFrame.index + 1
return FinalDataFrame
I want to put it in data,And I'm going to insert MySQL,But the resulting content field is repeated,I think it's my for circular logic error,So how do I change?
def get_user_data(self,start_url):
html = requests.get(url=start_url,headers=self.headers,cookies=self.cookies).content
selector = etree.fromstring(html,etree.HTMLParser(encoding='utf-8'))
contents = selector.xpath('//span[#class="ctt"]/text()')
times = selector.xpath('//span[#class="ct"]/text()')
data = {}
for each_text in contents:
data['content'] = each_text
for each_time in times:
month_day, time, device = each_time.split(maxsplit=2)
data['mobile_phone'] = device
data['create_time'] = month_day + '\n' + time
data['crawl_time'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
I have looping script returning different filtered results, I can make this data return as an array for each of the different filter classes. However I am unsure of the best method to join all of these arrays together.
import mechanize
import urllib
import json
import re
import random
import datetime
from sched import scheduler
from time import time, sleep
from sets import Set
##### Code to loop the script and set up scheduling time
s = scheduler(time, sleep)
##### Code to stop duplicates part 1
userset = set ()
def run_periodically(start, end, interval, func):
event_time = start
while event_time < end:
s.enterabs(event_time, 0, func, ())
event_time += interval + random.randrange(-5, 10)
##### Code to get the data required from the URL desired
def getData():
post_url = "URL OF INTEREST"
browser = mechanize.Browser()
browser.addheaders = [('User-agent', 'Firefox')]
##### These are the parameters you've got from checking with the aforementioned tools
parameters = {'page' : '1',
'rp' : '250',
'sortname' : 'race_time',
'sortorder' : 'asc'
##### Encode the parameters
data = urllib.urlencode(parameters)
trans_array = browser.open(post_url,data).read().decode('UTF-8')
xmlload1 = json.loads(trans_array)
pattern2 = re.compile('/control/profile/view/(.*)\' title=')
pattern4 = re.compile('title=\'posted: (.*) strikes:')
pattern5 = re.compile('strikes: (.*)\'><img src=')
for row in xmlload1['rows']:
cell = row["cell"]
##### defining the Keys (key is the area from which data is pulled in the XML) for use in the pattern finding/regex
user_delimiter = cell['username']
selection_delimiter = cell['race_horse']
user_numberofselections = float(re.findall(pattern4, user_delimiter)[0])
user_numberofstrikes = float(re.findall(pattern5, user_delimiter)[0])
strikeratecalc1 = user_numberofstrikes/user_numberofselections
strikeratecalc2 = strikeratecalc1*100
userid_delimiter_results = (re.findall(pattern2, user_delimiter)[0])
##### Code to stop duplicates throughout the day part 2 (skips if the id is already in the userset)
if userid_delimiter_results in userset: continue;
arraym = ""
arrayna = ""
if strikeratecalc2 > 50 and strikeratecalc2 < 100):
arraym0 = "System M"
arraym1 = "user id = ",userid_delimiter_results
arraym2 = "percantage = ",strikeratecalc2,"%"
arraym3 = ""
arraym = [arraym0, arraym1, arraym2, arraym3]
if strikeratecalc2 > 0 and strikeratecalc2 < 50):
arrayna0 = "System NA"
arrayna1 = "user id = ",userid_delimiter_results
arrayna2 = "percantage = ",strikeratecalc2,"%"
arrayna3 = ""
arrayna = [arrayna0, arrayna1, arrayna2, arrayna3]
run_periodically(time()+5, time()+1000000, 10, getData)
What I want to be able to do, is return both the 'arraym' and the 'arrayna' as one final Array, however due to the looping nature of the script upon each loop of the script the old 'arraym'/'arrayna' are overwritten, currently my attempts to yield one array containing all of the data has resulted in the last userid for 'systemm' and the last userid for 'sustemna'. This is obviously because, upon each run of the loop it overwrites the old 'arraym' and the 'arrayna' however I do not know of a way to get around this, so that all of my data can be accumulated in one array. Please note, I have been coding for cumulatively two weeks now, so there may well be some simple function to overcome this problem.
Kind regards AEA
Without looking at that huge code segment, typically you can do something like:
my_array = [] # Create an empty list
for <some loop>:
# At this point, my_array is a list containing some_value for each loop iteration
Look into python's list.append()
So your code might look something like:
arraym = []
arrayna = []
for row in xmlload1['rows']:
if strikeratecalc2 > 50 and strikeratecalc2 < 100):
arraym.append("System M")
arraym.append("user id = %s" % userid_delimiter_results)
arraym.append("percantage = %s%%" % strikeratecalc2)
if strikeratecalc2 > 0 and strikeratecalc2 < 50):
arrayna.append("System NA")
arrayna.append("user id = %s" % userid_delimiter_results)
arrayna.append("percantage = %s%%" % strikeratecalc2)