Keep log file for pushing data from python to Kafka topic - python

# importing the required libraries from time import sleep
from json import dumps
from kafka import KafkaProducer
# initializing the Kafka producer
my_producer = KafkaProducer( bootstrap_servers = ['localhost:9092'], value_serializer = lambda x:dumps(x).encode('utf-8') )
# generating the numbers ranging from 1 to 500
for n in range(10):
my_data = {'num' : n}
my_producer.send('testnum', value = my_data)
sleep(1)
for n in range(10):
if(n%2==0):
json_data= {'num' :n}
my_producer.send('testnum1',value = json_data)
sleep(1)
Could any one help me to set log for this file.

You've not printed anything, but if you want a log file, use python logging module, or simple shell redirection
python producer.py > out.log

Related

Sensor data with pythin does not get written to File

I'm currently working on a script for my sensor on my Raspberry Pi. The code underneath should get the values of my sensor and write it into a the data.json file. My problem is, if I run the scipt with my the Thonny editor everything works but if I add the script to my crontab menu the data does not get written to the data.json file.
The Code:
import time
import board
import adafruit_dht
import psutil
import io
import json
import os
from gpiozero import LED
from datetime import date
from datetime import datetime
# We first check if a libgpiod process is running. If yes, we kill it!
for proc in psutil.process_iter():
if proc.name() == "libgpiod_pulsein" or proc.name() == "libgpiod_pulsei":
proc.kill()
sensor = adafruit_dht.DHT11(board.D23)
# init
temp_values = [10]
hum_values = [10]
counter = 0
dataLED = LED(13)
dataList = []
def errSignal():
for i in range(0,3):
dataLED.on()
time.sleep(0.1)
dataLED.off()
time.sleep(0.1)
#on startup
def runSignal():
for i in range(0,5):
dataLED.on()
time.sleep(0.2)
dataLED.off()
time.sleep(0.2)
def getExistingData():
with open('data.json') as fp:
dataList = json.load(fp)
print(dataList)
def startupCheck():
if os.path.isfile("data.json") and os.access("data.json", os.R_OK):
# checks if file exists
print("File exists and is readable.")
# get json data an push into arr on startup
getExistingData()
else:
print("Either file is missing or is not readable, creating file...")
# create json file
with open("data.json", "w") as f:
print("The json file is created.")#
def calc_avgValue(values):
sum = 0
for iterator in values:
sum += iterator
return sum / len(values)
def onOFF():
dataLED.on()
time.sleep(0.7)
dataLED.off()
# data led blinking on startup
runSignal()
# checks if file exists
startupCheck()
while True:
try:
temp_values.insert(counter, sensor.temperature)
hum_values.insert(counter, sensor.humidity)
counter += 1
time.sleep(6)
if counter >= 10:
print(
"Temperature: {}*C Humidity: {}% ".format(
round(calc_avgValue(temp_values), 2),
round(calc_avgValue(hum_values), 2)
)
)
# get time
today = date.today()
now = datetime.now()
# create json obj
data = {
"temperature": round(calc_avgValue(temp_values), 2),
"humidity": round(calc_avgValue(hum_values), 2),
"fullDate": str(today),
"fullDate2": str(today.strftime("%d/%m/%Y")),
"fullDate3": str(today.strftime("%B %d, %Y")),
"fullDate4": str(today.strftime("%b-%d-%Y")),
"date_time": str(now.strftime("%d/%m/%Y %H:%M:%S"))
}
# push data into list
dataList.append(data)
# writing to data.json
with open("data.json", "w") as f:
json.dump(dataList, f, indent=4, separators=(',',': '))
# if data is written signal appears
onOFF()
print("Data has been written to data.json...")
counter = 0
except RuntimeError as error:
continue
except Exception as error:
sensor.exit()
while True:
errSignal()
raise error
time.sleep(0.2)
Crontab Menu:
The line in the center is the script.
Investigation areas:
Do not put & in crontab, it serves no purpose.
You should capture the output of your scripts to see what is going on. You do this by adding >/tmp/stats.out 2>/tmp/stats.err (and similar for the other 2 lines). You will see what output and errors your scripts encounter.
cron does not run your scripts in the same environment, and from the same directory you are running them. Load what you require in the script.
cron might not have permissions to write into data.yml in the directory it is running from. Specify a full path, and ensure cron can write in that directory.
Look at https://unix.stackexchange.com/questions/109804/crontabs-reboot-only-works-for-root for usage of #reboot. Things that should occur at startup should be configured through systemd or init.d (I do not know what Rasperry Pie uses vs distro). Cron is to schedule jobs, not run things at startup.
It could be as simple as not having python3 in the PATH configured in cron.

discord one script run multiple times

Friends, I am making a discord script using python I have to run this script multiple times with different parameters, I am trying that with os, threading, multiprocessing. when I am trying with this library that works only for first data Then it stuck, My code is below, please advise me.
note:- I am login as a user.
CSV file demo
auth-token1,channel-id1-1,channelid1-2
auth-token2,channel-id2-1,channelid2-2
...
...
main.py
import os
import csv
from time import sleep
import threading
import multiprocessing
rows = []
with open('data.csv', 'r') as csvfile:
# creating a csv reader object
csvreader = csv.reader(csvfile)
# extracting each data row one by one
for row in csvreader:
rows.append(row)
for _ in rows:
li = _[1:]
cmd = _[0]
for i in li:
cmd = cmd+" "+str(i)
print(f'python3 script.py {cmd}')
os.system(f'python3 script.py {cmd}')
sleep(10)
script.py
import time
import os
import sys
from time import sleep
import os
from discord import Permissions, message
import discord
import logging
import sys
argumentList = sys.argv
print(argumentList[1:])
TOKEN_AUTH = argumentList[1]
os.environ['TZ'] = 'Asia/Kolkata'
time.tzset()
logging.basicConfig(handlers=[logging.FileHandler(filename="./discord.txt",
encoding='utf-8', mode='a+')],
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
datefmt="%F %A %T",
level=logging.INFO)
channel = None
client = discord.Client()
ids = argumentList[2:]
sleep_time = 121
message = "enter your message"
#client.event
async def on_ready():
global channel
while True:
for _ in ids:
try:
channel1 = client.get_channel(int(_))
await channel1.send(message)
logging.info('1')
print('sleeping')
sleep(sleep_time*60)
except:
client.run(TOKEN_AUTH, bot=False)
client.run(TOKEN_AUTH, bot=False)
It is against Discord TOS to use bot=False otherwise known as a self bot (user)
All I will say is d.py is asynchronous and parts of your code are synchronous which is blocking.
client.run() is also a blocking method which is why your main.py stops running after the first run. Everything you do, with discord bots, needs to be async and within the client loop.
I recommend you rethink exactly what you are trying to do and do something that is not breaking their Terms of Service.
The reason because you are using ( client.run ) , this method will block the lines after it .
if you want to send messages in any place in your code ,
check this solution ^_^ :
client.run("TOKEN") blocking issue (python & discord.py)

Is it possible to detect corrupt Python dictionaries

I have a data file saved using the shelve module in python 2.7 which is somehow corrupt. I can load it with db = shelve.open('file.db') but when I call len(db) or even bool(db) it hangs, and I have to kill the process.
However, I am able to loop through the entire thing and create a new non-corrupt file:
db = shelve.open('orig.db')
db2 = shelve.open('copy.db')
for k, v in db.items():
db2[k] = v
db2.close() # copy.db will now be a fully working copy
The question is, how can I test the dict and avoid the hang?
BTW, I still have the original file, and it exhibits the same behaviour when copied to other machines, in case someone also wants to help me get to the bottom of what's actually wrong with the file in the first place!
I'm unaware of any inspection methods other than dbm.whichdb(). For debugging a possible pickle protocol mismatch in a manner that allows you to timeout long running tests maybe try:
import shelve
import pickle
import dbm
import multiprocessing
import time
import psutil
def protocol_check():
print('orig.db is', dbm.whichdb('orig.db'))
print('copy.db is', dbm.whichdb('copy.db'))
for p in range(pickle.HIGHEST_PROTOCOL + 1):
print('trying protocol', p)
db = shelve.open('orig.db', protocol=p)
db2 = shelve.open('copy.db')
try:
for k, v in db.items():
db2[k] = v
finally:
db2.close()
db.close()
print('great success on', p)
def terminate(grace_period=2):
procs = psutil.Process().children()
for p in procs:
p.terminate()
gone, still_alive = psutil.wait_procs(procs, timeout=grace_period)
for p in still_alive:
p.kill()
process = multiprocessing.Process(target=protocol_check)
process.start()
time.sleep(10)
terminate()

Looking for a way to de-reference a bash var wrapped in a python command call

I'm trying to find a way to de-reference goldenClusterID to use it in an AWS CLI command to terminate my cluster. This program is to compensate for dynamic Job-Flow Numbers generated each day so normal Data Pipeline shutdown via schedule is applicable. I can os.system("less goldenClusterID") all day and it gives me the right answer. However, it won't give up the goodies with a straight de-ref. Suggestions?
from __future__ import print_function
import json
import urllib
import boto3
import commands
import os
import re
import datetime
import awscli
foundCluster = ""
rawClusterNum = ""
mainClusterNum = ""
goldenClusterID = ""
# Next, we populate the list file with clusters currently active
os.system("aws emr list-clusters --active >> foundCluster")
# We search for a specific Cluster Name
os.system("fgrep 'AnAWSEMRCluster' foundCluster")
os.system("grep -B 1 DrMikesEMRCluster foundCluster >> rawClusterNum")
# Look for the specific Cluster ID in context with it's Cluster Name
os.system("fgrep 'j-' rawClusterNum >> mainClusterNum")
# Regex the Cluster ID from the line
os.system("grep -o '\j-[0-9a-zA-Z]*' mainClusterNum >> goldenClusterID")
# Read the Cluster ID from the file and run AWS Terminate on it
os.system("aws emr describe-cluster --cluster-id %s" % goldenClusterID")
os.system("aws emr terminate-clusters --cluster-ids goldenClusterID")
os.system("rm *")
Never mind, I figured it out. Too much coffee and not enough sleep. The answer is to use:
goldkeyID=open('goldenClusterID', 'r').read()
os.system("aws emr describe-cluster --cluster-id %s" % goldkeyID)

How to specify 'logger' for apscheduler

I'm trying to learn how to use Python's apscheduler package, but periodically, it throws the following error:
No handlers could be found for logger "apscheduler.scheduler"
This message seems to be associated with errors in the scheduled jobs, for example, using jobTester as the scheduled job, the following code, which uses an undefined variable (nameStr0) in jobTester gives the above error message:
from apscheduler.scheduler import Scheduler
from apscheduler.jobstores.shelve_store import ShelveJobStore
from datetime import datetime, timedelta
from schedJob import toyJob
def jobTester(nameStr):
outFileName = nameStr0 + '.txt'
outFile = open(outFileName,'w')
outFile.write(nameStr)
outFile.close()
def schedTester(jobList):
scheduler = Scheduler()
scheduler.add_jobstore(ShelveJobStore('example.db'),'shelve')
refTime = datetime.now()
for index, currJob in enumerate(jobList):
runTime = refTime + timedelta(seconds = 15)
jobName = currJob.name + '_' + str(index)
scheduler.add_date_job(jobTester, runTime, name = jobName,
jobstore = 'shelve', args = [jobName])
scheduler.start()
stopTime = datetime.now() + timedelta(seconds = 45)
print "Starting wait loop .....",
while stopTime > datetime.now():
pass
print "Done"
def doit():
names = ['Alan','Barbara','Charlie','Dana']
jobList = [toyJob(n) for n in names]
schedTester(jobList)
This may be seen by running this code (stored in the file schedTester.py) as follows:
>>> import schedTester
>>> schedTester.doit()
No handlers could be found for logger "apscheduler.scheduler"
Starting wait loop ..... Done
However, when I replace nameStr0 with nameStr (i.e. proper spelling of variable name), the code runs fine without the error message.
How do I create a logger for apscheduler.scheduler? Am I missing something in the section of the docs dealing with configuring the scheduler
Am I correct in thinking of this logger as some sort of a stderr ? If so, where will I look for it (if that is not determined by the way I set it up)
You can just create a default logger and everything should go to it:
import logging
logging.basicConfig()
The reason that you only have a problem when you use a variable that hasn't been defined is that this causes the jobTester function to throw an error which apscheduler is catching and trying to write the error message with logging.error(). Since you haven't setup the logger it is going to complain.
If you read up on python logging you will see that there are many ways to configure it. You could have it log everything to a file or print it to stdout.

Categories