I'm having some issues with updating my code. I can run my code and it works fine. But when I make adjustments to the code the output stays the same as when I originally ran the file. If I create a new .py file and just copy and paste the updated code it produces the desired output with the updates. Why is my original file not reflecting the changes in the output?
My specific example is in the code below. The code ran and produced outputs as expected. Then I updated it to add the "sector" and "close" variables. However, the new output did not include the data, just the names in the header. Does it have to do with the .pyc file?
import multiprocessing
import datetime
import re
from progressbar import ProgressBar
import csv
import urllib2
from lxml import etree
def mp_worker(s):
url1 = ("https://research.tdameritrade.com/grid/public/research/stocks/fundamentals?symbol=" + s)
url2 = ("https://research.tdameritrade.com/grid/public/research/stocks/summary?symbol=" + s)
url3 = ("https://research.tdameritrade.com/grid/public/research/stocks/industryposition?symbol=" + s)
htmlparser = etree.HTMLParser()
try:
response3 = urllib2.urlopen(url3)
tree3 = etree.parse(response3, htmlparser)
perf = tree3.xpath("""//*[#id="stock-industrypositionmodule"]/div/div/table/tbody[1]/tr[3]/td[1]/text()""")
if len(perf) > 0:
EPS5yr = tree3.xpath("""//*[#id="stock-industrypositionmodule"]/div/div/table/tbody[2]/tr[4]/td[1]/text()""")
else:
response1 = urllib2.urlopen(url1)
tree1 = etree.parse(response1, htmlparser)
EPS5yr = tree1.xpath("""//*[#id="layout-full"]/div[3]/div/div[3]/section/div/div/div[1]/div/dl/dd[1]/div/label/span/text()""")
response2 = urllib2.urlopen(url2)
tree2 = etree.parse(response2, htmlparser)
EPSttm = tree2.xpath("""//*[#id="stock-summarymodule"]/div/div/div[1]/div/div[2]/dl/ul/li[3]/dd/text()""")
sector = tree2.xpath("""//*[#id="layout-header"]/div[1]/div/text()""")
indy = tree2.xpath("""//*[#id="layout-header"]/div[1]/div/a[1]/text()""")
close = tree2.xpath("""//*[#id="stock-quotebar"]/div/div/table/tbody/tr/td[1]/dl/dd/text()""")
except Exception as e:
EPS5yr = 'Error'
EPSttm = 'Error'
perf = 'Error'
indy = 'Error'
close = 'Error'
sector = 'Error'
pass
return s, close, EPS5yr, EPSttm, perf, sector, indy
def mp_handler():
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d_%H%M")
file = ('total_market' + '_' + date +'.csv')
p = multiprocessing.Pool(16)
symbols = {'AABA',
'AAOI',
'AAPL',
'AAWC'}
with open(file, "ab") as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(['Symbol','Price','5 Yr EPS','EPS TTM','52 Wk Perf','Sector','Industry'])
for result in p.imap(mp_worker, symbols):
# (filename, count) tuples from worker
writer.writerow(result)
if __name__=='__main__':
mp_handler()
Though this is an old question I'd like to provide another possible solution. If the module is something you installed (e.g you have a setup.py file and you've pip installed your project) then uninstalling the module could fix it. This turned out to solve my problem when I ran into it.
Related
I have a code like this:
data_to_string = data_from_table.to_xml(path_or_buffer= file_name + ".xml", index=False,
root_name="convoy", row_name="vehicle", xml_declaration=False)
It works fine with files containing several rows but if there is nothing to export, i have only
<convoy/>
in my .xml.
How can I fix the code to have:
<convoy>
</convoy>
in any cases?
This behavior is hard-coded into the xml-library.
Your requirement can be fulfilled by checking whether there are rows in your dataframe and, if not, by creating an empty html-string (here and here) instead and writing that to the output.
Be aware that <convoy/> and <convoy></convoy> are equivalent in xml.
Here is a fully working example:
import pandas as pd
from lxml import etree
file_name = "test"
path = file_name + ".xml"
root_name = "convoy"
row_name = "vehicle"
data_from_table = pd.DataFrame()
if len(data_from_table.index) > 0:
data_to_string = data_from_table.to_xml(
path_or_buffer=path,
index=False,
root_name=root_name,
row_name=row_name,
xml_declaration=False,
)
else:
elem = etree.Element(root_name)
empty_xml = etree.tostring(elem, method="html", encoding="unicode")
with open(path, "w") as f:
f.write(empty_xml)
Hello I'm new to web development and have been using python to make my web app. Here's my code:
def forecastvalues():
import fileinput
import csv
from pyexcel_xlsx import get_data
xlsxfile= "test.xlsx"
import json
with open(xlsxfile, "rb") as f:
content = f.read()
r = pe.get_book(file_type="xlsx", file_content=content, start_row=1)
for i in records:
columns = sheet.row[i]
for j in columns:
rem = sheet.column[0]
sold1 = sheet.column[1]
sold2 = sheet.column[2]
return '<h1>Result: %s</h1>' % result
I properly installed pyexcel but when i import pyexcel it gets a syntax error, how do I fix this?
content = f.read() is not indented, nor its following line.
Should be:
with open(xlsxfile, "rb") as f:
content = f.read()
r = pe.get_book(file_type="xlsx", file_content=content, start_row=1)
The following code is failing to write the name and address variables to a csv file. When I test it using numbers or words, or the "write' variable, these will be recorded in the csv, but the "Writeaddress" and "WriteName" will not*. (Also, using the original sources for these variables will also leave blanks)
import requests, sys, pyperclip, bs4, csv
StationList = open('CTA Station Addresses.csv', 'w', newline='')
StationWrite = csv.writer(StationList)
for i in range(149):
id = str(i)
res = requests.get('http://www.transitchicago.com/travel_information /station.aspx?StopId=' + id)
res.raise_for_status()
Station = bs4.BeautifulSoup(res.text)
Name = Station.select('.rtehdng')
Address = Station.select('#ctl07_divAddress')
Write = 0
if Name == []:
print('missing name')
Write = 1
else:
#print(Name[0].getText())
WriteName = Name[0].getText()
pass
if Address == []:
print('missing address')
Write = 1
else:
#print(Address[0].getText())
WriteAddress = Address[0].getText()
pass
if Write == 0:
StationWrite.writerow([Write, WriteName, WriteAddress])
Write = 0
StationList.close()
*(I can do "writerows([3, Write, Writename]) and the CSV row will be "3, 0, ")
I couldn't reproduce your error but the data you get has embedded newlines and spaces which can make the csv look odd. I've cleaned up the script and scrubbed the data before writing the csv and ended up with station,address entries. I didn't see a need to write Write because it was always 0 in your script and doesn't even exist in mine since I leverage exception handling instead.
import requests, sys, pyperclip, bs4, csv
with open('CTA Station Addresses.csv', 'w', newline='') as StationList:
StationWrite = csv.writer(StationList)
for i in range(149):
_id = str(i)
res = requests.get('http://www.transitchicago.com/travel_information/station.aspx?StopId=' + _id)
res.raise_for_status()
Station = bs4.BeautifulSoup(res.text, 'lxml')
try:
name = Station.select('.rtehdng')[0].getText().strip()
address = Station.select('#ctl07_divAddress')[0].getText().splitlines()[-1].strip()
except IndexError as e:
print("No data for station", _id)
continue
if not name or not address:
print('Empty elements for station', _id)
continue
print(repr(name), repr(address))
StationWrite.writerow([name, address])
A mp3 is accessible via two different URLs. I'm trying to use Python to figure out which URL is fastest to download from...?
For example, I want to time how long https://cpx.podbean.com/mf/download/a6bxxa/LAF_15min_044_mindfulness.mp3 takes to download and compare that to how long http://cpx.podbean.com/mf/play/a6bxxa/LAF_15min_044_mindfulness.mp3 takes to download.
To download the mp3 I'm currently using:
urllib.request.urlretrieve(mp3_url, mp3_filename)
you could essentially do something like:
from datetime import datetime
starttime = datetime.now()
urllib.request.urlretrieve(mp3_url, mp3_filename) # Whatever code you're using...
finishtime = datetime.now()
runtime = finishtime - starttime
print str(runtime)
this will print a timestamp like 0:03:19.356798 in the format of [hours]:[minutes]:[seconds.micro seconds]
My bad... i didn't realize you're trying to figure out which link was the fastest. I have no clue how you're storing the your mp3_url and mp3_filename elements, but try something like this (adjust accordingly):
from datetime import datetime
mp3_list = {
'file1.mp3': 'http://www.url1.com',
'file2.mp3': 'http://www.url2.com',
'file3.mp3': 'http://www.url3.com',
}
runtimes = []
for mp3_url, mp3_filename in mp3_list.items(): # i'm not sure how or where you are storing mp3_url or mp3_filename, so you'll have to modify this line accordingly...
starttime = datetime.now()
urllib.request.urlretrieve(mp3_url, mp3_filename) # Whatever code you're using...
finishtime = datetime.now()
runtime = finishtime - starttime
runtimes.append({'runtime': runtime, 'url': mp3_url, 'filename': mp3_filename})
fastest_mp3_url = sorted(runtimes, key=lambda k: k['runtime'])[0]['url']
fastest_mp3_filename = sorted(runtimes, key=lambda k: k['runtime'])[0]['filename']
print fastest_mp3_url
print fastest_mp3_filename
It's simple there are plenty of methods to do so (python3x)
using win64pyinstaller with progress
from win64pyinstaller import install
install("your_url", "destination_folder_with_file_name")
using urllib3 with progress
modifying [PabloG's] solution which is in python 2x
How to download a file over HTTP?
import urllib3
from sys import stdout
from urllib.request import urlopen
def _restart_line():
stdout.write('\r')
stdout.flush()
url = "your_url"
file_name = url.split('/')[-1]
u = urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.get("Content-Length"))
print(f"Downloading: {file_name} Bytes: {file_size}")
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = f"done - {(file_size_dl/1000000):.2f}, {(file_size_dl * 100 / file_size):.2f} %"
status = status + chr(8)*(len(status)+1)
stdout.write(status)
stdout.flush()
_restart_line()
f.close()
there are more ways to do it, hope you got your answer thankyou!
I'm try to iterate through tables in html by a searchlabel, then update the found value to a dictionary, then write those values to a csv. The output currently works for both the url and the headline, but the name output will either be blank or show "None." If i print the output of blog["name'] however, it is correctly pulling the information I want. I suspect that it's an indentation error but I can't figure out where to line things up. I've tried moving things around but nothing seems to work to get the name assignment to work inside that loop.
import os
from bs4 import BeautifulSoup
import my_csv_writer
def td_finder(tr, searchLabel):
value = ""
index = tr.text.find(searchLabel)
if index>-1:
tds = tr.findAll('td')
if len(tds)>1:
value = tds[1].text
return value
def main():
topdir = 'some_directory'
writer = my_csv_writer.CsvWriter("output.csv")
writer.writeLine(["url", "headline", "name"])
"""Main Function"""
blog = []
for root, dirs, files in os.walk(topdir):
for f in files:
url = os.path.join(root, f)
url = os.path.dirname(url).split('some_file')[1]
if f.lower().endswith((".html")):
file_new = open(os.path.join(root, f), "r").read()
soup = BeautifulSoup(file_new)
blog = {}
#Blog Title
blog["title"] = soup.find('title').text
for table in soup.findAll("table"):
for tr in table.findAll("tr"):
#name
blog["name"] = td_finder(tr, "name:")
seq = [url, unicode(blog["title"]), unicode(blog.get("name"))]
writer.writeLine(seq)
#return ""
if __name__ == '__main__':
main()
print "Finished main"
You're writing unicode strings to a csv file which according to the official docs "The csv module doesn’t directly support reading and writing Unicode...".
It does offer alternative classes to enable different encodings via UnicodeWriter. The following answer from Boud on SO highlights the need to set the desired encoding in the CSV file.