telethon.errors.rpcerrorlist.UserAlreadyParticipantError: The authenticated user is already a participant of the chat (caused by ImportChatInviteRequest)
my only request is for it to ignore and continue processing when i get this error
with open('numaralar.csv', 'r')as f:
str_list = [row[0] for row in csv.reader(f)]
po = 0
for pphone in str_list:
phone = utils.parse_phone(pphone)
po += 1
print(Style.BRIGHT + Fore.GREEN + f"Giriş {phone}")
client = TelegramClient(f"sessions/{phone}", 2392599, '7e14b38d250953c8c1e94fd7b2d63550')
client.connect()
if not client.is_user_authorized():
try:
client.send_code_request(pphone)
client.sing_in(pphone,input('Kodu Gir :'))
print('')
client.sign_in(pphone)
except SessionPasswordNeededError:
password = input('2fa Şifresini Gir: ')
print('')
client.sign_in(password=password)
except:
traceback.print_exc()
print(Style.BRIGHT + Fore.RED + f"fuatcim BUNLA ZATEN GİRMİSSİN AMK")
continue
gplink = 'qDPUgvuTiCliNzdk'
client(ImportChatInviteRequest(gplink))
print(Style.BRIGHT + Fore.GREEN + f"fuatcim gruba girdim askim")
I tried something like this but failed
how can i do something it seems easy but i am just starting to learn
The error message states that the error is caused by ImportChatInviteRequest()
so you have to surround that part with a try - except block.
try:
gplink = 'qDPUgvuTiCliNzdk'
client(ImportChatInviteRequest(gplink))
print(Style.BRIGHT + Fore.GREEN + f"fuatcim gruba girdim askim")
except:
pass # When there is an exception, do nothing
I have the following code that iterates over an array of type LIST ['ABC','AAA','BBB'], sending http requests to api, the received data is saved to another array and sent via email and telegram
Now the array is processed sequentially and it is slow.
I am trying to do parallel processing of this data, now I am trying to use asinkio, but I get an array type error on execution - TypeError: 'async for' requires an object with __aiter__ method, got Series
Can you advise how best to solve this problem or how to correctly convert the array type?
Current code:
for value in alldata:
print('Processing', value)
today = str(datetime.today().strftime('%d.%m.%Y'))
if debug_mode is True:
start = "18.09.2020"
end = "18.09.2020"
else:
start = today
end = today
########
periods={'tick': 1, 'min': 2, '5min': 3, '10min': 4, '15min': 5, '30min': 6, 'hour': 7, 'daily': 8, 'week': 9, 'month': 10}
print ("value="+value+"; period="+str(period)+"; start="+start+"; end="+end)
try:
Ids = str((urlopen('https://testapi.dev/export.js').readlines())[0])
Codes = str((urlopen('https://testapi.dev/export.js').readlines())[2])
except Exception:
print('Cannot get Ids & Codes')
try:
index = Codes.index(value)
symbol_code = str(Ids[index])
except Exception:
try:
Ids = str((urlopen('https://testapi.dev/import')
Codes = str((urlopen('https://testapi.dev/import')
index = Codes.index(value)
symbol_code = str(Ids[index])
except Exception:
print("value not in list" ,value)
region = 0
start_date = datetime.strptime(start, "%d.%m.%Y").date()
start_date_rev=datetime.strptime(start, '%d.%m.%Y').strftime('%Y%m%d')
end_date = datetime.strptime(end, "%d.%m.%Y").date()
end_date_rev=datetime.strptime(end, '%d.%m.%Y').strftime('%Y%m%d')
params = urlencode([
('region', region),
('symbol', symbol_code),
('code', value),
('df', start_date.day),
('mf', start_date.month - 1),
('yf', start_date.year),
('from', start_date),
('dt', end_date.day),
('mt', end_date.month - 1),
('yt', end_date.year),
('to', end_date),
('p', period),
('f', value+"_" + start_date_rev + "_" + end_date_rev)
url = FULL_URL + value+"_" + start_date_rev + "_" + end_date_rev + params
try:
txt=urlopen(url).readlines()
except Exception:
try:
time.sleep(random.randint(1, 10))
txt=urlopen(url).readlines()
except Exception:
time.sleep(random.randint(1, 10))
txt=urlopen(url).readlines()
try:
imported_data = []
for line in txt:
imported_data.append(line.strip().decode( "utf-8" ).replace(',',";"))
except Exception:
print("Cannot get data ")
try:
current_value = (imported_data[1].split(";")[0])
first_price = float(imported_data[1].split(";")[5])
last_price = float(imported_data[-1].split(";")[5])
percent_difference = float( (last_price / first_price) * 100 - 100 )
time.sleep(int(request_delay))
if percent_difference > percent_trigger :
trigger = True
if ( str(value) + ',' + str(today) ) in already_found:
print( 'Value ' + str(value) + ' already found' )
else:
take_profit = last_price * (1 + 5 / 100)
found_tickers.append(str(current_value + ',' + str(first_price) + ',' + str(last_price) + ',' + str(take_profit)))
already_found.append( str(value) + ',' + str(today) )
if send_immediately == 'yes':
try:
subject = str(value)
mail_content = (str(current_value + ',' + str(first_price) + ',' + str(last_price) + ',' + str(take_profit)) )
#The mail addresses and password
#Setup the MIME
message = MIMEMultipart()
message['From'] = sender_address
message['To'] = receiver_address
message['Subject'] = subject #The subject line
message['X-Priority'] = '1'
#The body and the attachments for the mail
message.attach(MIMEText(mail_content, 'plain'))
#Create SMTP session for sending the mail
session = smtplib.SMTP(smtp_server, smtp_port) #use gmail with port
session.starttls() #enable security
session.login(sender_address, sender_pass) #login with mail_id and password
text = message.as_string()
session.sendmail(sender_address, receiver_address, text)
session.quit()
except Exception:
print("Cannot send Email")
# Sent to telegram
try:
telegram_bot_sendtext((str(current_value) + ' ' + str(first_price) + ' ' + str(last_price) + ' ' + str(take_profit) ) )
except Exception:
print("Cannot sent message to Telegram")
else:
trigger = False
except Exception:
print("Processing error for value" ,value)
Parallel code:
async def main(alldata):
for value in alldata:
print('Processing', value)
today = str(datetime.today().strftime('%d.%m.%Y'))
if debug_mode is True:
start = "18.09.2020"
end = "18.09.2020"
else:
start = today
end = today
########
periods={'tick': 1, 'min': 2, '5min': 3, '10min': 4, '15min': 5, '30min': 6, 'hour': 7, 'daily': 8, 'week': 9, 'month': 10}
print ("value="+value+"; period="+str(period)+"; start="+start+"; end="+end)
try:
Ids = str((urlopen('https://testapi.dev/export.js').readlines())[0])
Codes = str((urlopen('https://testapi.dev/export.js').readlines())[2])
except Exception:
print('Cannot get Ids & Codes')
try:
index = Codes.index(value)
symbol_code = str(Ids[index])
except Exception:
try:
Ids = str((urlopen('https://testapi.dev/import')
Codes = str((urlopen('https://testapi.dev/import')
index = Codes.index(value)
symbol_code = str(Ids[index])
except Exception:
print("value not in list" ,value)
region = 0
start_date = datetime.strptime(start, "%d.%m.%Y").date()
start_date_rev=datetime.strptime(start, '%d.%m.%Y').strftime('%Y%m%d')
end_date = datetime.strptime(end, "%d.%m.%Y").date()
end_date_rev=datetime.strptime(end, '%d.%m.%Y').strftime('%Y%m%d')
params = urlencode([
('region', region),
('symbol', symbol_code),
('code', value),
('df', start_date.day),
('mf', start_date.month - 1),
('yf', start_date.year),
('from', start_date),
('dt', end_date.day),
('mt', end_date.month - 1),
('yt', end_date.year),
('to', end_date),
('p', period),
('f', value+"_" + start_date_rev + "_" + end_date_rev)
url = FULL_URL + value+"_" + start_date_rev + "_" + end_date_rev + params
try:
txt=urlopen(url).readlines()
except Exception:
try:
time.sleep(random.randint(1, 10))
txt=urlopen(url).readlines()
except Exception:
time.sleep(random.randint(1, 10))
txt=urlopen(url).readlines()
try:
imported_data = []
for line in txt:
imported_data.append(line.strip().decode( "utf-8" ).replace(',',";"))
except Exception:
print("Cannot get data ")
try:
current_value = (imported_data[1].split(";")[0])
first_price = float(imported_data[1].split(";")[5])
last_price = float(imported_data[-1].split(";")[5])
percent_difference = float( (last_price / first_price) * 100 - 100 )
time.sleep(int(request_delay))
if percent_difference > percent_trigger :
trigger = True
if ( str(value) + ',' + str(today) ) in already_found:
print( 'Value ' + str(value) + ' already found' )
else:
take_profit = last_price * (1 + 5 / 100)
found_tickers.append(str(current_value + ',' + str(first_price) + ',' + str(last_price) + ',' + str(take_profit)))
already_found.append( str(value) + ',' + str(today) )
if send_immediately == 'yes':
try:
subject = str(value)
mail_content = (str(current_value + ',' + str(first_price) + ',' + str(last_price) + ',' + str(take_profit)) )
#The mail addresses and password
#Setup the MIME
message = MIMEMultipart()
message['From'] = sender_address
message['To'] = receiver_address
message['Subject'] = subject #The subject line
message['X-Priority'] = '1'
#The body and the attachments for the mail
message.attach(MIMEText(mail_content, 'plain'))
#Create SMTP session for sending the mail
session = smtplib.SMTP(smtp_server, smtp_port) #use gmail with port
session.starttls() #enable security
session.login(sender_address, sender_pass) #login with mail_id and password
text = message.as_string()
session.sendmail(sender_address, receiver_address, text)
session.quit()
except Exception:
print("Cannot send Email")
# Sent to telegram
try:
telegram_bot_sendtext((str(current_value) + ' ' + str(first_price) + ' ' + str(last_price) + ' ' + str(take_profit) ) )
except Exception:
print("Cannot sent message to Telegram")
else:
trigger = False
except Exception:
print("Processing error for value" ,value)
asyncio.run(main(alldata))
Async in Python
Let's begin by clarifying that both asynchronous code and multi processing are two different approaches of concurrency. So an async approach will not bet executed in parallel.
If I'm not mistaken, your function parallel.main, apart from the asnyc def line, does not have any trace of asynchronicity. Async, at least in Python, requires usually some serious restructuring of the code base: every code execution which is to be executed asynchronously (e.g. network requests) has to be refactored and declared as asnyc.
On the other hand, multi processing in Python is much simpler: import multiprocessing, create a pool & apply your function.
Async Example
Since your code is quite extensive and I do not know which steps actually are to be executed asynchronously, here is an example of how asnyc can be used in Python:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import aiohttp
import asyncio
from aiohttp.typedefs import URL
from typing import List, NoReturn, Union, Tuple
TIMEOUT: int = 5
def print_job_started(job_name: str) -> NoReturn:
print(job_name, "STARTED")
def print_job_finished(job_name: str) -> NoReturn:
print(job_name, "FINISHED")
async def asnyc_request_content(
session: aiohttp.ClientSession,
method: str,
url: Union[str, URL],
timeout: int = TIMEOUT,
**kwargs
) -> Tuple[str, int]:
"""
Abstract asynchronous request. Returns the text content & status code.
"""
async with session.request(method=method, url=url, timeout=timeout, **kwargs) as response:
return await response.text(), response.status
async def fun(session: aiohttp.ClientSession, url: str) -> Tuple[str, int]:
print_job_started("fun")
response = await asnyc_request_content(session=session, method="get", url=url)
print_job_finished("fun")
return response
async def _main(url_list: List[str]):
async with aiohttp.ClientSession() as session:
tasks = []
for url in url_list:
tasks.append(asyncio.ensure_future(fun(session=session, url=url)))
return await asyncio.gather(*tasks)
def main():
url_list = [
"https://example.com" for _ in range(10)
]
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(_main(url_list=url_list))
return loop.run_until_complete(future)
if __name__ == '__main__':
res = main()
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun STARTED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
# fun FINISHED
for r in res:
print(r[1])
# 200
# 200
# 200
# 200
# 200
# 200
# 200
# 200
# 200
# 200
The example is fairly elementary but suffices for demonstration purposes. When I deal with async my typical workflow is the as follows:
define the asnyc functions (asnyc_request_content & fun)
create a first async wrapper where the tasks are defined (_main)
finalize with a second sync wrapper where the loop is defined (main)
This is just one way of implementing it - there are many other alternatives. However, regardless of which alternative you choose, step 1. will always need to be done (which is usually the most time consuming one).
Closing Note
For an asnyc approach it seems to me you would still have to deal with step 1. which can be quite tedious: your code involves many network requests. If you want a full-asnyc code, all lines with requests would need to be refactored. If your goal is to have a partially-async approach, then you'd have much less refactoring but all remaining synchronous requests would seriously bottleneck your code (such an approach is usually discouraged).
On the other hand, implementing a multi processing approach would be extremely fast (to develop) as you can reuse your code pretty much as-is.
Finally, asnyc could still make a lot sense (e.g. not enough CPUs/Threads for significant parallel processing as is the case on servers, more efficient/scalable, ...) but it requires definitely more work.
Edited
Q)Can someone help getting the values inserted into mysql database , just confused where place mydb function
Reason :Once I manually enter cntrl+c for .py , then only the values are getting inserted into mysql database
Used in the .py file
here is the complete code , where should i place the mydb function?
Table values not getting inserted into mysql database until cntrl+c is entered to close python file in linux
import os
import re
from builtins import len, Exception
import slack
import logging
from subprocess import check_output
import datetime
import mysql.connector
import time
import json
import requests
#user_threads_info = {}
#thread_ts = ""
#slack.RTMClient.run_on(event='message')
def say_hello(**payload):
try:
##0 get clients and payload
logging.info('msg received')
data = payload['data']
web_client = payload['web_client']
rtm_client = payload['rtm_client']
##0 - 1 Check if it is the first msg, not replied msg by me
# print(data)
if data.get('text') == None:
logging.info('This msg is my replied msg.')
return False
##0-2 Get channel info
channel_id = data['channel']
thread_ts = data['ts']
global user
user = data['user']
#user_info = get_userinfo(user)
#print(user_info)
msg = data['text']
##1 get scenario submsg
retVal = analysis_msg(msg)
# print(retVal)
response = web_client.users_list()
assert(response['ok'])
user_map = {x['id']: x['name'] for x in response['members']}
global user_name
user_name = user_map[user] if user in user_map else None
print(user_name)
if retVal[0] == False:
retMsg = retVal[1] + "\nI can create the following orders. \n" \
"a) spu - store pickup \n" \
"b) sth - ship to home \n" \
"c) del - delivery \n" \
"d) digitalAsGuest - Digital item \n" \
" \n" \
"Please provide information as mentioned in below example.\n" \
" \n" \
"Example: spu:3646989:sftqa3:AMEX\n" \
"\n" \
"Sample SKUS:\n" \
"spu - [3646989,8862011]\n" \
"sth - [2592015,6140094]\n" \
"del - [5592005,8862011]\n" \
"digitalAsGuest - [2810037,5057400]"
send_msg(web_client, channel_id, thread_ts, user, retMsg)
return False
##2 form cmd
retVal = form_cmd(retVal[1])
print(retVal)
if retVal == False:
return False
##3 execute cmd
# inform the start of test
retMsg = "Creating an order,Please wait for the result."
send_msg(web_client, channel_id, thread_ts, user, retMsg)
global res
try:
res1 = os.popen(retVal).read()
print("Printing result...")
print(res1)
print("end of print")
res = reg_result_new(res1)
if res == False:
print("reg_function failure")
retMsg = "The test order placement failed."
else:
retMsg = "Order Id - " + res['id'] + "\nFirst Name - " + res['firstName'] + "\nLast Name - " + res['lastName'] + "\n PhoneNumber - " + res['dayPhoneNumber'] + "\n Email - " + res['email'] + "\n"
except Exception as ee:
retMsg = "The test scenario has a failure. Please Check the feature file."
## 4 send result to slack
# retMsg = "Order Id - " + res['id'] + "\nFirst Name - " + res['firstName'] + "\nLast Name - " + res['lastName'] + "\n PhoneNumber - " + res['day PhoneNumber'] + "\n Email - " + res['email'] + "\n"
create_result_file(user, res)
send_msg(web_client, channel_id, thread_ts, user, retMsg)
print(retVal)
except Exception as e:
print("error")
logging.critical(str(e))
############################ My handlers ##############################
def create_result_file(user, res):
try:
cur_time = datetime.datetime.now()
file_name = user + str(cur_time.year) + str(cur_time.month) + str(cur_time.day) + str(cur_time.hour) + str(
cur_time.minute) + str(cur_time.second) + '.txt'
file = open(file_name, 'w')
file.write(res)
file.close()
except Exception as e:
print(str(e))
def send_msg(web_client, channel_id, thread_ts,user,mgs):
print("thread_ts value is:"+thread_ts)
web_client.chat_postMessage(
channel=channel_id,
text=f"```Hi <#{user}>! \n " + mgs + "```",
thread_ts=thread_ts
)
#def get_userinfo(user):
# payload = {'token': slack_token, 'user': user}
# r = requests.get('https://slack.com/api/users.info', params=payload)
# print(r.text)
# return json.loads(r.text)["user"]
# error code mgmt.
def error_code(code):
# reserved
print(code)
return [False, code]
# break down msg to the test scenario submsgs
def analysis_msg(msg):
global submsg
submsg = msg.split(":")
for value in submsg:
print(value)
if len(submsg) != 4:
logging.warning("This msg not test scenario")
return error_code("Please check the format")
res = {}
res["feature"] = submsg[0]
res["sku"] = submsg[1]
res["env"] = submsg[2]
res["payment"] = submsg[3]
###check
if validate_sku(res["sku"]) == False:
return error_code("INVALID_SKU \n")
if validate_env(res["env"]) == False:
return error_code("INVALID_ENV \n")
if validate_payment(res["payment"]) == False:
return error_code("INVALID_payment \n")
if check_specialCharacter(res["feature"]) == False:
return error_code("INVALID_PROFILE_WITH_SPECIAL_CHARACTER")
return [True, res]
# form cmd for test bat files ! reserved
def form_cmd(submsg):
cmd = 'sh /home/iptbot/iptautobot/test.sh ' + submsg['env'] + ' ' + submsg['feature'] + ' ' + submsg["sku"] + ' ' + submsg["payment"]
return cmd
#code to print user details
#code to print user details
def reg_result_new(res):
start = 'COP Order Response :'
end = 'isGuestMode'
start_index = res.find(start) + len(start)
res = res[start_index:]
end_index = res.find(end) + 22
global data
data = res[:end_index]
try:
print('Data -> ' + str(data))
data = json.loads(data.strip())
new_data = {}
new_data['id'] = data['id']
new_data['firstName'] = data['lineItems'][0]['fulfillmentInfo']['storeInfo']['agentInfo']['firstName']
new_data['lastName'] = data['lineItems'][0]['fulfillmentInfo']['storeInfo']['agentInfo']['lastName']
new_data['dayPhoneNumber'] = data['lineItems'][0]['fulfillmentInfo']['storeInfo']['agentInfo']['dayPhoneNumber']
new_data['email'] = data['lineItems'][0]['fulfillmentInfo']['storeInfo']['agentInfo']['email']
#new_data['firstName'] = data['paymentInfo']['billingAddressInfo']['firstName']
return new_data
except Exception as e:
print('Here error -> '+str(e))
return False
#def reg_result(res):
# "COP Order Response"
# lines = res.split('\n')
# for line in lines:
# pattern = "COP Order Response*"
# prog = re.compile(pattern)
# result = prog.search(line)
# if result == None:
# continue
# res1 = result.string.split('{')
# if len(res1) < 2:
# continue
# res2 = res1[1].split(',')
# if len(res2) < 2:
# continue
# res3 = res2[0].split(':')
# if len(res3) < 2:
# continue
# return res3[1]
# COP Order Response : {"id":"BBY01-200001878853"
# return False
# return val is Boolean
# True/False
# Input type: String
# for positive integer only
# alternative way: Handle exception for int(d)
def validate_sku(sku_val):
return sku_val.isnumeric()
# input val : string
# return val: Boolean
def validate_env(env_val):
env_list = [
"sftqa1" , "sftqa2" , "sftqa3" , "sftqa4"
]
if env_val in env_list:
return True
else:
return False
def validate_payment(payment_val):
env_payment = [
"AMEX","VISA"
]
if payment_val in env_payment:
return True
else:
return False
# input val : string
# return val: Boolean
def check_specialCharacter(s):
if s == "":
return False
if s.isspace():
return False
return s.isalnum()
slack_token = os.environ["SLACK_API_TOKEN"]
rtm_client = slack.RTMClient(token=slack_token)
rtm_client.start()
#database connction
mydb = mysql.connector.connect(
host="host",
user="user",
passwd="pass",
database="db"
)
mycursor = mydb.cursor()
for value in submsg:
print(value)
fulfilment=submsg[0]
sku=submsg[1]
environment=submsg[2]
payment=submsg[3]
ts = time.time()
date = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
orderNumber=data['id']
username=user_name
print(fulfilment)
print(sku)
print(environment)
print(payment)
print(username)
print(orderNumber)
sqlformula = "INSERT INTO orderDetails (fulfilment,sku,environment,payment,orderNumber,date,user) VALUES (%s,%s,%s,%s,%s,%s,%s)"
#order=("sth",3643387,"sftqa2","AMEX")
#mycursor.execute(sqlformula,order)
mycursor.execute(sqlformula,(fulfilment,sku,environment,payment,orderNumber,date,username))
mydb.commit()
mydb.close()
Output
1 sh /home/iptbot/iptautobot/test.sh sftqa3 spu 3646989 AMEX
2 error
3 CRITICAL:root:'user'
4 error
5 CRITICAL:root:'user' // clicking Control+C values get inserted
6 ^CWARNING:slack.rtm.client:Websocket was closed.
7 3646989
8 sftqa3
9 AMEX
10 spu
11 3646989
12 sftqa3
13 AMEX
14 a6002043
15 BBY01-200002091354
You are stuck at this point because rtm_client.start() is a synchronous call.
If you want it to be asynchronous (non-blocking) then you should run:
rtm_client.start(run_async=True)
Here it is good walk-through on how to setup async usage of the library. Also have a look at the method signature for RTMClient to get an idea of how it works.
Here's a good example detailing a lot of what you would need in your case.
Then you will hit your db execution code where you will need to have a while loop to go through the data you want to add to the DB.
I would recommend that you use a Queue for this as it is synchronised and will be easier to manage than a global list which is overwritten on every order. Preferably you could use asyncio.Queue with an example of implementation here
When an order has passed the validation steps add it to the queue. Here is some pseudo code describing the flow with a basic (not asyncio) Queue:
import queue
q = queue.Queue()
def validate_order(order):
valid_order_data = ......
q.put(valid_order_data)
while True:
valid_order = q.get() # Will wait until there is a value on the queue
mycursor.execute(sqlformula, (valid_order))
1) i have a list of product links and it contain 3385 links
2) i have a function get_pro_info(link) it take link of product and append item to the json file.
3) i want selenium open 5 browser and 5 link parallel and get information of product and append in a file or list..
or 3) selenium open 1 browser and 5 tab(having 5 links) and append file.
Question how can i apply threading on my code?
my code...
new_url=''
def get_pro_info(pro_url):
driver = webdriver.Chrome(executable_path=r'C:\Users\Beenu\PycharmProjects/chromedriver.exe')
try:
new_url = 'https://pk.studiobytcs.com' + pro_url
print('new product URL: ' + new_url)
driver.execute_script("window.open('');")
sleep(1)
# use to switch control
driver.switch_to.window(driver.window_handles[0])
# sleep(1)
driver.get(new_url)
except(WebDriverException, selenium.common.exceptions.TimeoutException, Exception) as e:
print('There is error in getting Product by URL in get_pro_info()! \n' + str(e.stacktrace))
pass
description_source_code = ''
# description_soup = BeautifulSoup()
description_soup: BeautifulSoup = object
# global description_soup
try:
# description_soup = BeautifulSoup('html.parser')
description: WebElement = driver.find_element_by_xpath(
'//*[#id="shopify-section-product-template"]/div[2]/div[1]/div/div[2]')
description_source_code = description.get_attribute("innerHTML")
description_soup: BeautifulSoup = BeautifulSoup(description_source_code, 'html.parser')
except NoSuchElementException as e:
print('Product description taag not found! \n' + str(e.stacktrace))
pass
# 179 here
# This is for getting heading product name
head = ''
r_j_title = ''
try:
head = description_soup.find_all("h1", class_="product_name")
# print(head)
r_j_title = head[0].string.strip()
print("Title: " + r_j_title)
except (HTMLParser, IndexError):
print('Fail to get heading/title Tag! \n' + str(HTMLParser))
# This is for get brand name from heading/title
r_j_brand_and_designer = ''
try:
brand_and_designer = head[0].string.strip().split("-")[0]
r_j_brand_and_designer = str(brand_and_designer).strip()
print('Brand and designer: ' + r_j_brand_and_designer)
except (IndexError, ValueError) as e:
print('Fail to Split Brand from heading/title ! \n' + str(e.stacktrace))
# This is for getting price in integer
r_j_price_in_int = ''
try:
price = description_soup.find_all("span", class_="money")
# print(price)
price_new = price[0].string.strip()
print("New price: " + price_new)
# this is for getting price from string
r_c_price = price[0].string.strip().split(".")[1]
r_j_price_in_int = str(r_c_price).replace(",", "")
# price could ha ,
print('Price: ' + r_j_price_in_int)
except (HTMLParser, IndexError, ValueError) as e:
print('Fail to get Tag or failed to Split Brand from heading/title ! \n' + str(e.stacktrace))
# this is for getting full description
description_all = ''
r_j_desc = ''
try:
description_all = description_soup.find_all("div", class_="description")
final_des = str(description_all[0].get_text())
ch = final_des.split()
r_j_desc = str(' '.join(ch))
print("with split ch : " + r_j_desc) # addtion of .string.strip()
except (HTMLParser, IndexError, ValueError) as e:
print('Fail to get all description Tag or failed to Split and removing endline chr from description ! \n' + str(
e.stacktrace))
# This is for trying if fibric tag is not avaliable
try:
get_split_fibric = description_all[0].get_text().split("Fabric", 1)[1]
get_split_des = get_split_fibric.split("Disclaimer")[0]
r_j_fabric = str(get_split_des).strip()
print("getting fibric: " + r_j_fabric)
except IndexError as e:
r_j_fabric = 'N/A'
print('Fabric is not avaliable: ' + r_j_fabric)
item['brand_name'] = str(r_j_brand_and_designer)
item['designer'] = str(r_j_brand_and_designer)
item['title'] = str(r_j_title)
item['description'] = str(r_j_desc)
item['price'] = int(r_j_price_in_int)
item['currency'] = "PKR"
item['product_id'] = str(r_j_title)
item['source'] = str(new_url)
item['fabric'] = str(r_j_fabric)
item['gender'] = "woman"
print(item)
cloth = {
"cloth": item
}
# instruction
print(cloth)
list_before_dump.append(cloth)
driver.close()
driver.quit()
with open('product_link_read.txt', 'r') as file:
data = file.readlines()
# rd_pro_link_list=rd_pro_link_list+data.replace('\n', '')
print(data)
for line in data:
# fap=
rd_pro_link_list.append(str(line).strip())
print(rd_pro_link_list)
print(len(rd_pro_link_list))
for pro_link in rd_pro_link_list:
get_pro_info(pro_link)
print('Pro count = ' + str(pro_count))
pro_count = pro_count + 1
list_before_dump_file.write(json.dumps(list_before_dump))
driver.close()
list_before_dump_file.close()
if you want to iterate list and get always 20 links then you can use range(start, stop, step) with step=20
all_t = []
for i in range(0, len(list_of_product_link), 20):
twenty_links = list_of_product_link[i:i+20]
t = threading.Thread(target=get_product_info, args=(twenty_links,))
t.start()
all_t.append(t)
# --- later ---
for t in all_t:
t.join()
or
for i in range(0, len(list_of_product_link), 20):
twenty_links = list_of_product_link[i:i+20]
all_t = []
for link in twenty_links:
t = threading.Thread(target=get_product_info, args=(link,))
t.start()
all_t.append(t)
# --- inside first `for` loop ---
for t in all_t:
t.join()
Other method is good if you will no need later your list
all_t = []
while list_of_product_link:
twenty_links = list_of_product_link[:20]
list_of_product_link = list_of_product_link[20:]
t = threading.Thread(target=get_product_info, args=(twenty_links,))
t.start()
all_t.append(t)
# --- later ---
for t in all_t:
t.join()
or
while list_of_product_link:
twenty_links = list_of_product_link[:20]
list_of_product_link = list_of_product_link[20:]
all_t = []
for link in twenty_links:
t = threading.Thread(target=get_product_info, args=(link,))
t.start()
all_t.append(t)
# --- inside first `for` loop ---
for t in all_t:
t.join()
BTW: args= needs tuple - even if you have only one arguments so you need , in ( ) to create tuple with one element.
BTW: If you want it to run only 20 threads in every moment then better see multiprocessing and Pool(20)
from multiprocessing import Pool
def get_product_info(link):
result = ....
return result
if __name__ == '__main__':
with Pool(20) as p:
all_results = p.map(get_product_info, list_of_product_link)
import ldap
try:
l = ldap.initialize("ldap://ldap.xxxxx.com:389")
username=raw_input("Enter the username : ")
password = raw_input("Enter the password :")
if(username == "" or password==""):
print "Login Error : Username or password can't be blank"
else:
l.simple_bind(username,password)
print "Contact..."
except ldap.LDAPError, e:
print e
baseDn = "ou=active, ou=employees, ou=people, o=xxxxx.com";
searchScope = ldap.SCOPE_ONELEVEL
#retrieve all attributes
retrieveAttributes = None
search_query = raw_input("Enter the query :")
searchFilter = "cn="+search_query
try :
ldap_result_id = l.search(baseDn, searchScope, searchFilter, retrieveAttributes)
result_set = []
while 1:
result_type, result_data = l.result(ldap_result_id, 0)
if(result_data == []):
break
else:
if result_type == ldap.RES_SEARCH_ENTRY:
result_set.append(result_data)
#print result_set
print len(result_set)
except ldap.LDAPError, e:
print e
#print result_set[0]
The above code uses python-ldap to access ldap services. The result_set type is displayed as list but the number of items when using the len() function turns out to be zero. I need to perform operations on the retrieved string.