#!/usr/bin/env python
from urllib import request
import requests
from urllib.request import urlopen
import threading # import threadding
import json # import json
READ_API_KEY= "ZG0YZXYKP9LOMMB9"
CHANNEL_ID= "1370649"
while True:
TS = urllib3.urlopen("http://api.thingspeak.com/channels/%s/feeds/last.json?api_key=%s" \
% (CHANNEL_ID,READ_API_KEY))
response = TS.read()
data=json.loads(response)
b = data['field1']
c = data['field2']
print (b)
print (c)
time.sleep(15)
TS.close()
I uploaded the data from raspberry pi to thingspeak, but in order to use it in the follium map, I need to access these data instantly. The code here seems to work but I keep getting errors. Can you help me?
I would recommend you research a bit more about python imports.
In the meantime, this change should fix the issue:
import time # you forgot to import
TS = urllib3.urlopen(#url)
# should be
TS = urlopen(#url)
while True:
TS = urllib3.urlopen(#"http://api.thingspeak.com/channels/1370649/feeds/last.json?api_key=ZG0YZXYKP9LOMMB9")
response = TS.read()
data=json.loads(response)
b = data['field1']
c = data['field2']
print (b)
print (c)
time.sleep(15)
TS.close()
data=json.loads(response)
^
SyntaxError: invalid syntax
I'm searching, but I haven't found much.
Related
I have made a simple chat system with python-requests. There are two different files one is the sender and another is the receiver. the main concept of these two files is
1. sender file contains a while loop which always takes the message as input. after
giving the message as input, it sends the message to a website.
2. receiver file also contains a while loop which gets requests from the website after every
5 seconds.
Now I want to run these two different works in the same window with Tkinter. how to do it? Thanks in advance.
Sender.py Code is here
import configme as con
import requests
import datetime
from cryptography.fernet import Fernet
nam = con.my_name
cookies_dict = con.cookie
key = con.crypto_key
url = con.base_url + '/config.php'
def makeID():
return datetime.datetime.now().timestamp()
# encription staff
fernet = Fernet(key)
# member joining message
if nam.__len__() != 0:
requests.get(url+f"?iD={makeID()}&name=<<<>>>&msg={nam} join the room.", cookies=cookies_dict)
with requests.Session() as r:
while True:
msg = input("Enter your Messege: ")
if msg == ".exit":
# r.get(url+f"?iD={makeID()}&name=<<<>>>&msg={nam} has left the room.", cookies=cookies_dict)
break
else:
encMessage = fernet.encrypt(msg.encode())
messenger = {'iD': makeID() ,'name': nam , 'msg': encMessage}
if msg != "":
r.get(url, params=messenger, cookies=cookies_dict)
Receiver.py code here...
import configme as con
import requests
import json
from cryptography.fernet import Fernet
from time import sleep
from datetime import datetime
from pytz import timezone
import pytz
cookies_dict = con.cookie
ozone = con.my_timezone
key = con.crypto_key
time_format = con.date_time_format
url = con.base_url + '/log.json'
t = con.receive_time
# encription staff
fernet = Fernet(key)
timezone = timezone(ozone)
def setTime(t):
stamptime = int(float(t))
GMT0 = pytz.utc.localize(datetime.utcfromtimestamp(stamptime))
return GMT0.astimezone(timezone).strftime(time_format)
j = 0
while True:
r = requests.get(url, cookies=cookies_dict).text
message = json.loads(r)
message_sz = len(message)
if message_sz == 0:
print("Looks like there are no message")
break
for msg in message[j:]:
local_time = setTime(msg['id'])
if msg['nam'] == '<<<>>>':
print(f"{local_time} :: {msg['nam']} :: {msg['msg']}")
else:
decMessage = fernet.decrypt(bytes(msg['msg'], "utf-8")).decode()
print(f"{local_time} :: {msg['nam']} :: {decMessage}")
j = message_sz
sleep(t)
I would not suggest using this checking and going to website method, but you could thread the while loops to go at the same time. And you could update tk when you want using tk.update().
You could get Data from vars that the threaded loops are setting and use them in your single tk window.
use multi threading .or else load data desperately
So I have been using youtube api to scrape a channel. Everything was working fine until 3 days ago (03/15/2019) when the result isn't sorted anymore. It seems that no matter what I put in the order parameter, the results are all the same. Can anyone tell me why it isn't working? Here's the code snippet:
import re
import os
import json
import MySQLdb
from pytube import YouTube
import urllib
import isodate
import sys
def get_all_video_in_channel(channel_id):
api_key = '<MY KEY>'
video_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails&id={}&key={}'
first_url = 'https://www.googleapis.com/youtube/v3/search?key={}&channelId={}&part=snippet,id&order=date&maxResults=50'.format(api_key, channel_id) #order by date but won't work
res = []
url = first_url
while True:
inp = urllib.urlopen(url)
resp = json.load(inp)
vidIds = []
for jobject in resp['items']:
if jobject['id']['kind'] == "youtube#video":
vidIds.append(jobject['id']['videoId'])
vidreq = urllib.urlopen(video_url.format(",".join(vidIds),api_key))
vidres = json.load(vidreq)
for vidjson in vidres['items']:
res.append(vidjson)
if (len(res) >= 50):
break
try:
next_page_token = resp['nextPageToken']
url = first_url + '&pageToken={}'.format(next_page_token)
except:
break
return res
c_id = 'UCycyxZMoPwg9cuRDMyQE7PQ'
episodes = get_all_video_in_channel(c_id)
Edit: I did some more research and people say that the API indeed is not working properly due to Youtube doing something with deleting the New Zealand shooting video and it will soon be working properly again.
I recommend you to see the answer https://stackoverflow.com/a/55220182/8327971. This is a known and acknowledged issue by Google: https://issuetracker.google.com/issues/128673552.
I have a lot of Images from my Apache server that I want to put to azure.
I cannot afford to do it in a sequential manner , SO I will add threading afterwards. I can access those images from a given URL and build a list on that. Easy.
Now I do not have enough disk space for downloading the image and uploading it then delete it. I would like something cleaner.
Now is there a method to do that ?
Something like :
block_blob_service.AZURECOMMAND(container, source_URL, target_blob_name)
If not possible, is there a workaround ?
here is the complete code I have today ( download and then upload which I want to avoid ):
EDIT : Thanks to Gaurav Mantri I got it now. I update the code.
import requests
from bs4 import BeautifulSoup
from os.path import basename
import os
import sys
import urllib
import urllib2
import urlparse
import argparse
import json
import config
import random
import base64
import datetime
import time
import string
from azure.storage import CloudStorageAccount, AccessPolicy
from azure.storage.blob import BlockBlobService, PageBlobService, AppendBlobService
from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
from azure.storage.blob.models import BlobBlock, ContainerPermissions, ContentSettings
#from azure.storage.blob import BlobService
from azure.storage import *
#from azure.storage.blob.blobservice import BlobService
CURRENT_DIR = os.getcwd()
STORING_DIRECTORY_NAME = "stroage_scrapped_images"
STORING_DIRECTORY = CURRENT_DIR+"/"+STORING_DIRECTORY_NAME
if not os.path.exists(STORING_DIRECTORY):
os.makedirs(STORING_DIRECTORY)
def randomword(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
startdate = time.clock()
metadata_loaded = {'Owner': 'ToBeAddedSoon', 'Date_Of_Upload': startdate, 'VAR_2': 'VAL_VAR_2','VAR_3': 'VAL_VAR_3','VAR_4': 'VAL_VAR_4'}
with open("credentials.json", 'r') as f:
data = json.loads(f.read())
StoAcc_var_name = data["storagacc"]["Accountname"]
StoAcc_var_key = data["storagacc"]["AccountKey"]
StoAcc_var_container = data["storagacc"]["Container"]
#print StoAcc_var_name, StoAcc_var_key, StoAcc_var_container
def copy_azure_files(source_url,destination_object,destination_container):
blob_service = BlockBlobService(account_name=StoAcc_var_name, account_key=StoAcc_var_key)
blob_service.copy_blob(destination_container, destination_object, source_url)
block_blob_service = BlockBlobService(account_name=StoAcc_var_name, account_key=StoAcc_var_key)
def upload_func(container,blobname,filename):
start = time.clock()
block_blob_service.create_blob_from_path(
container,
blobname,
filename)
elapsed = time.clock()
elapsed = elapsed - start
print "*** DEBUG *** Time spent uploading API " , filename , " is : " , elapsed , " in Bucket/container : " , container
#URL_TARGET = "https://mouradcloud.westeurope.cloudapp.azure.com/blog/blog/category/food/"
URL_TARGET = "https://www.cdiscount.com/search/10/telephone.html"
base_url = URL_TARGET
out_folder = '/tmp'
r = requests.get(URL_TARGET)
data = r.text
soup = BeautifulSoup(data, "lxml")
for link in soup.find_all('img'):
src = link
image_url = link.get("src")
while image_url is not None :
if 'http' in image_url:
blocks = []
if image_url.endswith(('.png', '.jpg', '.jpeg')):
print " ->>>>>>>>>>>>>> THIS IS AN IMAGE ... PROCESSING "
file_name_downloaded = basename(image_url)
file_name_path_local = STORING_DIRECTORY+"/"+file_name_downloaded
with open(file_name_path_local, "wb") as f:
f.write(requests.get(image_url).content)
filename_in_clouddir="uploads"+"/"+file_name_downloaded
#upload_func(StoAcc_var_container,filename_in_clouddir,file_name_path_local)
copy_azure_files(image_url,filename_in_clouddir,StoAcc_var_container)
break
else :
print " ->>>>>>>>>>>>>> THIS NOT AN IMAGE ... SKIPPING "
break
else :
print " ->>>>>>>>>>>>>> THIS IS A LOCAL IMAGE ... SKIPPING "
break
continue
Indeed there's something exactly like this: copy_blob
block_blob_service.copy_blob(container, target_blob_name, source_URL)
Please keep in mind that this copy operation is asynchronous server side copying, thus:
Source of the copy should be publicly available.
You must wait for the copy operation to finish before deleting source items.
UPDATE
Modified code (I have not tried running it)
import requests
from bs4 import BeautifulSoup
from os.path import basename
import os
import sys
import urllib
import urllib2
import urlparse
import argparse
import json
import config
import random
import base64
import datetime
import time
import string
from azure.storage import CloudStorageAccount, AccessPolicy
from azure.storage.blob import BlockBlobService, PageBlobService, AppendBlobService
from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
from azure.storage.blob.models import BlobBlock, ContainerPermissions, ContentSettings
CURRENT_DIR = os.getcwd()
STORING_DIRECTORY_NAME = "stroage_scrapped_images"
STORING_DIRECTORY = CURRENT_DIR+"/"+STORING_DIRECTORY_NAME
if not os.path.exists(STORING_DIRECTORY):
os.makedirs(STORING_DIRECTORY)
def randomword(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
startdate = time.clock()
metadata_loaded = {'Owner': 'ToBeAddedSoon', 'Date_Of_Upload': startdate, 'VAR_2': 'VAL_VAR_2','VAR_3': 'VAL_VAR_3','VAR_4': 'VAL_VAR_4'}
with open("credentials.json", 'r') as f:
data = json.loads(f.read())
StoAcc_var_name = data["storagacc"]["Accountname"]
StoAcc_var_key = data["storagacc"]["AccountKey"]
StoAcc_var_container = data["storagacc"]["Container"]
#print StoAcc_var_name, StoAcc_var_key, StoAcc_var_container
block_blob_service = BlockBlobService(account_name=StoAcc_var_name, account_key=StoAcc_var_key)
def upload_func(container,blobname,sourceurl):
start = time.clock()
block_blob_service.copy_blob(
container,
blobname,
sourceurl)
elapsed = time.clock()
elapsed = elapsed - start
print "*** DEBUG *** Time spent uploading API " , filename , " is : " , elapsed , " in Bucket/container : " , container
#URL_TARGET = "https://mouradcloud.westeurope.cloudapp.azure.com/blog/blog/category/food/"
URL_TARGET = "https://www.cdiscount.com/search/10/telephone.html"
base_url = URL_TARGET
out_folder = '/tmp'
r = requests.get(URL_TARGET)
data = r.text
soup = BeautifulSoup(data, "lxml")
for link in soup.find_all('img'):
src = link
image_url = link.get("src")
while image_url is not None :
if 'http' in image_url:
blocks = []
if image_url.endswith(('.png', '.jpg', '.jpeg')):
print " ->>>>>>>>>>>>>> THIS IS AN IMAGE ... PROCESSING "
file_name_downloaded = basename(image_url)
filename_in_clouddir="uploads"+"/"+file_name_downloaded
upload_func(StoAcc_var_container,filename_in_clouddir,image_url)
break
else :
print " ->>>>>>>>>>>>>> THIS NOT AN IMAGE ... SKIPPING "
break
else :
print " ->>>>>>>>>>>>>> THIS IS A LOCAL IMAGE ... SKIPPING "
break
continue
I have been working on a program where ICAO codes are parsed from an online text file as well as their latitudes and longitudes using urllib. The program takes the ICAO codes and plugs them into a url to a different website. So far I've been successful in seeing which urls work and which ones do not, but when I try to print the latitude with the urls that work, they end up giving me a false latitude.
Here is my code...
import re
import cookielib
from cookielib import CookieJar
import time
import scipy.interpolate
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from string import *
import urllib2
from urllib2 import urlopen
from urllib2 import Request,HTTPError, URLError
import time
import csv
from StringIO import StringIO
from mpl_toolkits.basemap import Basemap
import scipy
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor)
opener.addheaders = [('User-agent','mr_anderson')]
keywords = map(''.join, product(ascii_lowercase, repeat=3))
keywords = ["k"+a+b+c for a,b,c in product(ascii_lowercase, repeat=3)]
start_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))
text_file = open("nws_gov.txt","a")
try:
a = 1
b = 1
c = 0
List=[""]
for element in range(1,10):
i=1
i+=1
a+=1
b+=1
c+=1
keywargs = str(keywords[a]).upper()
argument = 'http://weather.rap.ucar.edu/surface/stations.txt'
sourceCode = opener.open(argument).read()
airportcode = re.findall(r'\K\w\w\w.*?',str(sourceCode))
lat = re.findall(r'\d{1,2}\s\d{1,2}\N',str(sourceCode))
lata = lat[a]
arg = 'http://w1.weather.gov/xml/current_obs/'+str(airportcode[a])+'.rss'
try:
page_open = opener.open(arg)
except:
None
else:
print(arg+str(lata))
except Exception, e:
print(str(e))
Thanks,
Scott Reinhardt
I want to get many pages from a website, like
curl "http://farmsubsidy.org/DE/browse?page=[0000-3603]" -o "de.#1"
but get the pages' data in python, not disk files.
Can someone please post pycurl code to do this,
or fast urllib2 (not one-at-a-time) if that's possible,
or else say "forget it, curl is faster and more robust" ? Thanks
So you have 2 problem and let me show you in one example. Notice the pycurl already did the multithreading/not one-at-a-time w/o your hardwork.
#! /usr/bin/env python
import sys, select, time
import pycurl,StringIO
c1 = pycurl.Curl()
c2 = pycurl.Curl()
c3 = pycurl.Curl()
c1.setopt(c1.URL, "http://www.python.org")
c2.setopt(c2.URL, "http://curl.haxx.se")
c3.setopt(c3.URL, "http://slashdot.org")
s1 = StringIO.StringIO()
s2 = StringIO.StringIO()
s3 = StringIO.StringIO()
c1.setopt(c1.WRITEFUNCTION, s1.write)
c2.setopt(c2.WRITEFUNCTION, s2.write)
c3.setopt(c3.WRITEFUNCTION, s3.write)
m = pycurl.CurlMulti()
m.add_handle(c1)
m.add_handle(c2)
m.add_handle(c3)
# Number of seconds to wait for a timeout to happen
SELECT_TIMEOUT = 1.0
# Stir the state machine into action
while 1:
ret, num_handles = m.perform()
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
# Keep going until all the connections have terminated
while num_handles:
# The select method uses fdset internally to determine which file descriptors
# to check.
m.select(SELECT_TIMEOUT)
while 1:
ret, num_handles = m.perform()
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
# Cleanup
m.remove_handle(c3)
m.remove_handle(c2)
m.remove_handle(c1)
m.close()
c1.close()
c2.close()
c3.close()
print "http://www.python.org is ",s1.getvalue()
print "http://curl.haxx.se is ",s2.getvalue()
print "http://slashdot.org is ",s3.getvalue()
Finally, these code is mainly based on an example on the pycurl site =.=
may be you should really read doc. ppl spend huge time on it.
here is a solution based on urllib2 and threads.
import urllib2
from threading import Thread
BASE_URL = 'http://farmsubsidy.org/DE/browse?page='
NUM_RANGE = range(0000, 3603)
THREADS = 2
def main():
for nums in split_seq(NUM_RANGE, THREADS):
t = Spider(BASE_URL, nums)
t.start()
def split_seq(seq, num_pieces):
start = 0
for i in xrange(num_pieces):
stop = start + len(seq[i::num_pieces])
yield seq[start:stop]
start = stop
class Spider(Thread):
def __init__(self, base_url, nums):
Thread.__init__(self)
self.base_url = base_url
self.nums = nums
def run(self):
for num in self.nums:
url = '%s%s' % (self.base_url, num)
data = urllib2.urlopen(url).read()
print data
if __name__ == '__main__':
main()
You can just put that into a bash script inside a for loop.
However you may have better success at parsing each page using python.
http://www.securitytube.net/Crawling-the-Web-for-Fun-and-Profit-video.aspx
You will be able to get at the exact data and save it at the same time into a db.
http://www.securitytube.net/Storing-Mined-Data-from-the-Web-for-Fun-and-Profit-video.aspx
If you want to crawl a website using python, you should have a look to scrapy http://scrapy.org
Using BeautifulSoup4 and requests -
Grab head page:
page = Soup(requests.get(url='http://rootpage.htm').text)
Create an array of requests:
from requests import async
requests = [async.get(url.get('href')) for url in page('a')]
responses = async.map(requests)
[dosomething(response.text) for response in responses]
Requests requires gevent to do this btw.
I can recommend you to user async module of human_curl
Look example:
from urlparse import urljoin
from datetime import datetime
from human_curl.async import AsyncClient
from human_curl.utils import stdout_debug
def success_callback(response, **kwargs):
"""This function call when response successed
"""
print("success callback")
print(response, response.request)
print(response.headers)
print(response.content)
print(kwargs)
def fail_callback(request, opener, **kwargs):
"""Collect errors
"""
print("fail callback")
print(request, opener)
print(kwargs)
with AsyncClient(success_callback=success_callback,
fail_callback=fail_callback) as async_client:
for x in xrange(10000):
async_client.get('http://google.com/', params=(("x", str(x)),)
async_client.get('http://google.com/', params=(("x", str(x)),),
success_callback=success_callback, fail_callback=fail_callback)
Usage very simple. Then page success loaded of failed async_client call you callback. Also you can specify number on parallel connections.